diff --git a/data/anssel/wang/README.md b/data/anssel/wang/README.md
index 52ec165..d11dbc8 100644
--- a/data/anssel/wang/README.md
+++ b/data/anssel/wang/README.md
@@ -40,6 +40,8 @@ intervals (t-distribution) are reported.
 |                          |±0.019407    |±0.006259 |±0.007169 |±0.011460 |
 | attn1511                 | 0.852364    | 0.851368 | 0.708163 | 0.789822 | (defaults)
 |                          |±0.017280    |±0.005533 |±0.008958 |±0.013308 |
+| skipthoughts             | 0.717458    | 0.798090 | 0.651075 | 0.755428 | (defaults)
+|                          |±0.001086    |±0.002665 |±0.002302 |±0.003628 |
 |--------------------------|-------------|----------|----------|----------|---------
 | Ubu. rnn                 | 0.895331    | 0.872205 | 0.731038 | 0.814410 | Ubuntu transfer learning (``ptscorer=B.dot_ptscorer`` ``pdim=1`` ``inp_e_dropout=0`` ``dropout=0`` ``balance_class=True`` ``adapt_ubuntu=True`` ``opt='rmsprop'``)
 |                          |±0.006360    |±0.004435 |±0.007483 |±0.008340 |
diff --git a/data/anssel/yodaqa/README.md b/data/anssel/yodaqa/README.md
index 9472f9e..1afbf7a 100644
--- a/data/anssel/yodaqa/README.md
+++ b/data/anssel/yodaqa/README.md
@@ -65,6 +65,8 @@ curatedv2:
 |                          |±0.044228    |±0.023533 |±0.007741 |±0.014747 |
 | attn1511                 | 0.432403    | 0.475125 | 0.275219 | 0.468555 | (defaults)
 |                          |±0.016183    |±0.012810 |±0.006562 |±0.014433 |
+| skipthoughts             | 0.504828    | 0.359774 | 0.285137 | 0.433982 | (defaults)
+|                          |±0.002487    |±0.003927 |±0.001038 |±0.002402 |
 |--------------------------|-------------|----------|----------|----------|---------
 | rnn                      | 0.600532    | 0.493167 | 0.300700 | 0.463808 | Ubuntu transfer learning (``ptscorer=B.dot_ptscorer`` ``pdim=1`` ``inp_e_dropout=0`` ``dropout=0`` ``balance_class=True`` ``adapt_ubuntu=True`` ``vocabt='ubuntu'`` ``opt='rmsprop'``)
 |                          |±0.045585    |±0.015647 |±0.007871 |±0.011789 |
diff --git a/data/para/msr/README.md b/data/para/msr/README.md
index cf81543..c758309 100644
--- a/data/para/msr/README.md
+++ b/data/para/msr/README.md
@@ -36,6 +36,8 @@ For randomized models, 95% confidence intervals (t-distribution) are reported.
 |                          |±0.028483  |±0.015017 |±0.006946 |±0.008944 |±0.006232 |±0.009749 |
 | attn1511                 | 0.741401  | 0.821830 | 0.702250 | 0.801453 | 0.699891 | 0.791798 | (defaults)
 |                          |±0.012435  |±0.005271 |±0.004882 |±0.007168 |±0.004946 |±0.008456 |
+| skipthoughts             | 0.788783  | 0.860917 | 0.737125 | 0.832970 | 0.731449 | 0.822175 | (defaults)
+|                          |±0.003688  |±0.001902 |±0.004458 |±0.002330 |±0.001566 |±0.001564 |
 
 These results are obtained like this:
 
diff --git a/data/sts/semeval-sts/README.md b/data/sts/semeval-sts/README.md
index 8cfced3..e33af4a 100644
--- a/data/sts/semeval-sts/README.md
+++ b/data/sts/semeval-sts/README.md
@@ -50,6 +50,8 @@ set.
 |                          |±0.032854 |±0.005099 |±0.007836 |±0.005869 |±0.010037 |±0.007489 |±0.005823 |±0.094360 |
 | attn1511                 | 0.712086 | 0.656483 | 0.429167 | 0.632170 | 0.628803 | 0.657264 | 0.668384 | 0.603158 | (defaults)
 |                          |±0.033190 |±0.009479 |±0.019904 |±0.016477 |±0.015415 |±0.012070 |±0.023045 |±0.109596 |
+| skipthoughts             | 0.713562 | 0.430110 | 0.359320 | 0.633636 | 0.617385 | 0.561543 | 0.796134 | 0.593603 | ``l2reg=0.001`` ``dropout=0.2`` ``use_eos=1`` ``use_flags=0``
+|                          |±0.000664 |±0.001639 |±0.001614 |±0.000804 |±0.001273 |±0.000864 |±0.000488 | ±0.031334
 
 These results are obtained like this:
 
diff --git a/data/sts/sick2014/README.md b/data/sts/sick2014/README.md
index 0d9ef0a..df5d8bf 100644
--- a/data/sts/sick2014/README.md
+++ b/data/sts/sick2014/README.md
@@ -72,6 +72,8 @@ Reporting accuracy...
 |                          |±0.084148 |±0.060789 |±0.058780 |
 | attn1511                 | 0.857792 | 0.783875 | 0.766757 | ``ptscorer='1'``
 |                          |±0.010444 |±0.005104 |±0.004373 |
+| skipthoughts             | 0.759340 | 0.725806 | 0.728035 | ``l2reg=0.001`` ``dropout=0.2`` ``use_eos=1`` ``use_flags=0``
+|                          |±0.001594 |±0.001756 |±0.001733 |
 |--------------------------|----------|----------|----------|---------
 | rnn                      | 0.930833 | 0.829750 | 0.812614 | Ubuntu transfer learning (``pdim=1`` ``ptscorer=B.mlp_ptscorer`` ``dropout=0`` ``inp_e_dropout=0`` ``adapt_ubuntu=True``)
 |                          |±0.017211 |±0.007164 |±0.004619 |
diff --git a/models/skipthoughts.py b/models/skipthoughts.py
new file mode 100644
index 0000000..6cbeedd
--- /dev/null
+++ b/models/skipthoughts.py
@@ -0,0 +1,194 @@
+"""
+A simple model based on skipthoughts sentence embeddings.
+
+To set up:
+    * Execute the "Getting started" wgets in its README
+    * set config['skipthoughts_datadir'] to directory with downloaded files
+    * make skipthoughts.py from https://github.com/ryankiros/skip-thoughts/blob/master/skipthoughts.py 
+        available via import skipthoughts
+
+Inner working: First we compute skipthought embedding of both inputs; then we merge them (multiply & subtract), cancatenate, and compute result (1 MLP layer).
+"""
+
+from __future__ import print_function
+from __future__ import division
+
+
+from keras.models import Graph
+from keras.layers.core import Activation, Dense, Dropout
+from keras.regularizers import l2
+from keras.optimizers import Adam
+
+import pysts.embedding as emb
+import pysts.loader as loader
+import pysts.kerasts.blocks as B
+from pysts.kerasts.objectives import pearsonobj
+
+import numpy as np
+
+
+def config(c):
+    # XXX:
+    c['skipthoughts_datadir'] = "/storage/ostrava1/home/nadvorj1/skip-thoughts/"
+
+    # disable GloVe
+    c['embdim'] = None
+    # disable Keras training
+    c['ptscorer'] = None
+
+    # Which version of precomputed ST vectors to use
+    c["skipthoughts_uni_bi"] = "combined"
+
+    # loss is set in __init__
+    c["loss"] = None
+
+    # Values from original code (ryankiros/skip-thoughts/eval_sick.py):
+    c['merge_sum'] = True
+    c['merge_mul'] = False
+    c['merge_diff'] = False
+    c['merge_absdiff'] = True
+    # l2=0 is used in eval_sick.py. They used some value in paper
+    c['l2reg'] = 0.0
+    c['dropout'] = 0.0
+
+    # Add <End-Of-Sentence> mark to inputs. If inputs have correct
+    # punctuation it tend to be better without EOS.
+    c['use_eos'] = True
+
+    # appending boolean flags to ST vectors
+    c["use_flags"] = False
+
+
+class STModel:
+    """ Quacks (a little) like a Keras model. """
+
+    def __init__(self, c, output):
+        self.weights_to_load = None
+        self.c = c
+        self.output = output
+
+        if c.get("clipnorm"):
+            c["opt"] = Adam(clipnorm=c["clipnorm"])
+
+        # xxx: this will probably break soon
+        if output == 'classes':
+            self.output_width = 6  # xxx: sick only needs 5
+            self.output = 'classes'
+            if not self.c.get("loss"):
+                # note: this can be overwritten from shell, but not from task config
+                self.c["loss"] = "categorical_crossentropy"  # (used in orig paper)
+
+            if not self.c.get("output_activation"):
+                self.c["output_activation"] = "softmax"
+
+            c['balance_class'] = False
+
+        else:  # output == binary
+            self.output_width = 1
+            self.output = 'score'
+
+            if not self.c.get("loss"):
+                self.c['loss'] = 'binary_crossentropy'
+
+            if not self.c.get("output_activation"):
+                c["output_activation"] = "sigmoid"
+
+            c['balance_class'] = True
+
+        if not self.c.get("use_eos"):
+            self.c["use_eos"] = output == 'classes'
+
+        self.st = emb.SkipThought(c=self.c)
+        self.N = self.st.N
+
+    def prep_model(self, do_compile=True, load_weights=True):
+        if hasattr(self, "model"):
+            return
+        dropout = self.c["dropout"]
+
+        self.model = Graph()
+        self.model.add_input(name='e0', input_shape=(self.N,))
+        self.model.add_input(name='e1', input_shape=(self.N,))
+        self.model.add_node(name="e0_", input="e0", layer=Dropout(dropout))
+        self.model.add_node(name="e1_", input="e1", layer=Dropout(dropout))
+
+        merges = []
+        if self.c.get("merge_sum"):
+            self.model.add_node(name='sum', inputs=['e0_', 'e1_'], layer=Activation('linear'), merge_mode='sum')
+            self.model.add_node(name="sum_", input="sum", layer=Dropout(dropout))
+            merges.append("sum_")
+
+        if self.c.get("merge_mul"):
+            self.model.add_node(name='mul', inputs=['e0_', 'e1_'], layer=Activation('linear'), merge_mode='mul')
+            self.model.add_node(name="mul_", input="mul", layer=Dropout(dropout))
+            merges.append("mul_")
+
+        if self.c.get("merge_absdiff"):
+            merge_name = B.absdiff_merge(self.model, ["e0_", "e1_"], pfx="", layer_name="absdiff", )
+            self.model.add_node(name="%s_" % merge_name, input=merge_name, layer=Dropout(dropout))
+            merges.append("%s_" % merge_name)
+
+        if self.c.get("merge_diff"):
+            merge_name = B.absdiff_merge(self.model, ["e0_", "e1_"], pfx="", layer_name="diff")
+            self.model.add_node(name="%s_" % merge_name, input=merge_name, layer=Dropout(dropout))
+            merges.append("%s_" % merge_name)
+
+        self.model.add_node(name='hidden', inputs=merges, merge_mode='concat',
+                            layer=Dense(self.output_width, W_regularizer=l2(self.c['l2reg'])))
+        self.model.add_node(name='out', input='hidden', layer=Activation(self.c['output_activation']))
+        self.model.add_output(name=self.output, input='out')
+
+        if do_compile:
+            self.model.compile(loss={self.output: self.c['loss']}, optimizer=self.c["opt"])
+
+        if self.weights_to_load and load_weights:
+            self.model.load_weights(*self.weights_to_load[0], **self.weights_to_load[1])
+ 
+    def add_flags(self, e, f):
+        f = np.asarray(f, dtype="float32")
+        flags_n = f.shape[1] * f.shape[2]
+        f = f.reshape(e.shape[0], flags_n)
+        e = np.concatenate((e, f), axis=1)
+        return e
+
+    def prepare_data(self, gr, balance=False):
+        self.precompute_embeddings(gr)
+
+        e0, e1, _, _, y = loader.load_embedded(self.st, gr["s0"], gr["s1"], gr[self.output], balance=False, ndim=1)
+
+        if self.c.get("use_flags"):
+            e0 = self.add_flags(e0, gr["f0"])
+            e1 = self.add_flags(e1, gr["f1"])
+            self.N = e0.shape[1]
+
+        if balance:
+            e0, e1, y = loader.balance_dataset((e0, e1, gr[self.output]))
+        return np.array(e0), np.array(e1), y
+
+    def fit(self, gr, **kwargs):
+        e0, e1, y = self.prepare_data(gr, balance=self.c["balance_class"])
+        self.prep_model()
+
+        self.model.fit({'e0': e0, 'e1': e1, self.output: y},
+                       batch_size=self.c["batch_size"], nb_epoch=self.c["nb_epoch"],
+                       verbose=2)
+
+    def load_weights(self, *args, **kwargs):
+        self.weights_to_load = (args, kwargs)
+
+    def save_weights(self, *args, **kwargs):
+        self.model.save_weights(*args, **kwargs)
+
+    def precompute_embeddings(self, gr):
+        sentences = [" ".join(words) for words in gr["s0"] + gr["s1"]]
+        self.st.batch_embedding(sentences)
+
+    def predict(self, gr):
+        e0, e1, _ = self.prepare_data(gr, balance=False)
+        self.prep_model()
+        result = self.model.predict({'e0': e0, 'e1': e1})
+        return result
+
+
+def prep_model(vocab, c, output='score'):
+    return STModel(c, output)
diff --git a/pysts/embedding.py b/pysts/embedding.py
index 876d14c..234bf98 100644
--- a/pysts/embedding.py
+++ b/pysts/embedding.py
@@ -10,13 +10,6 @@
 from __future__ import print_function
 
 import numpy as np
-import os
-
-try:
-    import skipthoughts
-    skipthoughts_available = True
-except ImportError:
-    skipthoughts_available = False
 
 
 class Embedder(object):
@@ -101,40 +94,61 @@ def __init__(self, N=300, w2vpath='GoogleNews-vectors-negative%d.bin.gz'):
 
 
 class SkipThought(Embedder):
-    def __init__(self, datadir, uni_bi="combined"):
-        """ Embed Skip_Thought vectors, using precomputed model in npy format.
-
-        Args:
-            uni_bi: possible values are "uni", "bi" or "combined" determining what kind of embedding should be used.
-
-
-        todo: is argument ndim working properly?
-        """
+    """Embedding of sentences, using precomputed skip-thought model [1506.06726].
+    To set up:
+    * Get skipthoughts.py file from https://github.com/ryankiros/skip-thoughts
+    * Execute the "Getting started" wgets in its README
+    * set up config['skipthoughts_datadir'] with path to dir where these files
+        were downloaded
+    
+    Skip-thoughts use embeddings build from the Children Book dataset.
+
+    Config:
+    * config['skipthoughts_uni_bi'] = 'uni' or 'bi' or 'combined'; Two different 
+        skipthought versions, or their combination (see original paper for details)"""
+
+    def __init__(self, c=None):
+        """Load precomputed model."""
+        if not c:
+            c = {}
+        self.c = c
 
         import skipthoughts
         self.encode = skipthoughts.encode
 
-        if datadir is None:
-            datadir = os.path.realpath('__file__')
-        self.datadir = self.datadir
+        if self.c.get("skipthoughts_datadir"):
+            datadir = self.c["skipthoughts_datadir"]
+        else:
+            raise KeyError("config['skipthoughts_datadir'] is not set")
 
         # table for memoizing embeddings
         self.cache_table = {}
 
-        self.uni_bi = uni_bi
-        if uni_bi in ("uni", "bi"):
+        self.uni_bi = self.c["skipthoughts_uni_bi"]
+        if self.uni_bi in ("uni", "bi"):
             self.N = 2400
-        elif uni_bi == "combined":
+        elif self.uni_bi == "combined":
             self.N = 4800
         else:
-            raise ValueError("uni_bi has invalid value. Valid values: 'uni', 'bi', 'combined'")
+            raise KeyError("config['skipthoughts_uni_bi'] has invalid value. Possible values: 'uni', 'bi', 'combined'")
 
-        self.skipthoughts.path_to_models = self.datadir
-        self.skipthoughts.path_to_tables = self.datadir
-        self.skipthoughts.path_to_umodel = skipthoughts.path_to_models + 'uni_skip.npz'
-        self.skipthoughts.path_to_bmodel = skipthoughts.path_to_models + 'bi_skip.npz'
+        skipthoughts.path_to_models = datadir
+        skipthoughts.path_to_tables = datadir
+        skipthoughts.path_to_umodel = skipthoughts.path_to_models + 'uni_skip.npz'
+        skipthoughts.path_to_bmodel = skipthoughts.path_to_models + 'bi_skip.npz'
         self.st = skipthoughts.load_model()
 
+    def batch_embedding(self, sentences):
+        """Precompute batch embeddings of sentences, and remember them for use 
+        later (during this run; ie: without saving into file).
+        sentences is list of strings."""
+
+        new_sentences = list(set(sentences) - set(self.cache_table.keys()))
+        new_sentences = filter(lambda sen: len(sen) > 0, new_sentences)
+        embeddings = self.encode(self.st, new_sentences, verbose=False, use_eos=self.c.get("use_eos"))
+        assert len(new_sentences) == len(embeddings)
+        self.cache_table.update(zip(new_sentences, embeddings))
+
     def map_tokens(self, tokens, ndim=2):
         """
         Args:
@@ -151,4 +165,13 @@ def map_tokens(self, tokens, ndim=2):
         else:
             output_vector, = self.encode(self.st, [sentence, ], verbose=False)
             self.cache_table[sentence] = output_vector
-        return output_vector
+        if self.uni_bi == 'combined':
+            return output_vector
+        elif self.uni_bi == 'uni':
+            return output_vector[:self.N]
+        elif self.uni_bi == 'bi':
+            return output_vector[self.N:]
+        else:
+            raise ValueError("skipthoughts_uni_bi has invalid value")
+
+
diff --git a/pysts/kerasts/blocks.py b/pysts/kerasts/blocks.py
index 7c8fb17..c106bcc 100644
--- a/pysts/kerasts/blocks.py
+++ b/pysts/kerasts/blocks.py
@@ -10,6 +10,7 @@
 from keras.layers.embeddings import Embedding
 from keras.layers.recurrent import GRU
 from keras.regularizers import l2
+from keras import backend as K
 
 import pysts.nlp as nlp
 
@@ -252,9 +253,9 @@ def cat_ptscorer(model, inputs, Ddim, N, l2reg, pfx='out', extra_inp=[]):
     return pfx+'cat'
 
 
-
-def absdiff_merge(model, inputs, pfx="out", layer_name="absdiff"):
-    """ Merging two layers into one, via element-wise subtraction and then taking absolute value.
+def absdiff_merge(model, inputs, pfx="out", layer_name="absdiff", abs_=True):
+    """ Merging two layers into one, via element-wise subtraction, and then
+    (by default) taking absolute value
 
     Example of usage: layer_name = absdiff_merge(model, inputs=["e0_", "e1_"])
 
@@ -263,8 +264,12 @@ def absdiff_merge(model, inputs, pfx="out", layer_name="absdiff"):
     if len(inputs) != 2:
         raise ValueError("absdiff_merge has to got exactly 2 inputs")
 
-    def diff(X):
-        return K.abs(X[0] - X[1])
+    if abs_:
+        def diff(X):
+            return K.abs(X[0] - X[1])
+    else:
+        def diff(X):
+            return X[0] - X[1]
 
     def output_shape(input_shapes):
         return input_shapes[0]
diff --git a/pysts/loader.py b/pysts/loader.py
index 9d68891..a91f1cd 100644
--- a/pysts/loader.py
+++ b/pysts/loader.py
@@ -285,18 +285,24 @@ def balance_dataset(ds):
     with random classifier giving 50%.
 
     This makes sense only for datasets with crisp 0/1 labels! """
-    # FIXME: we assume 1-labelled < 0-labelled
     y = ds[2]
     class1 = np.where(y == 1)[0]
     n_imbal = np.sum(y == 0) - np.sum(y == 1)
 
+    if n_imbal < 0:
+        smaller_class = np.where(y == 0)[0]
+        n_imbal = - n_imbal
+    else:
+        smaller_class = class1
+
     s0 = list(ds[0])
     s1 = list(ds[1])
     labels = list(ds[2])
     has_toklabels = len(ds) > 3
     if has_toklabels:
         toklabels = list(ds[3]) if ds[3] is not None else None
-    for i in np.random.choice(class1, size=n_imbal):
+
+    for i in np.random.choice(smaller_class, size=n_imbal):
         s0.append(ds[0][i])
         s1.append(ds[1][i])
         labels.append(ds[2][i])