From 6aa4e2dcf00fb48bb0c183d0f703b1e49d6c6619 Mon Sep 17 00:00:00 2001
From: husein zolkepli <husein.zol05@gmail.com>
Date: Wed, 16 Jan 2019 20:14:29 +0800
Subject: [PATCH] added session for summarization agents

---
 session/summary/residual-freeze.ipynb     | 746 ++++++++++++++++++++++
 session/summary/skip-news.py              | 256 ++++++++
 session/summary/skip-thought-freeze.ipynb | 676 ++++++++++++++++++++
 session/summary/skip-wiki.py              | 319 +++++++++
 session/word2vec/wiki-256.py              | 103 +++
 session/word2vec/word2vec.py              | 173 +++--
 6 files changed, 2204 insertions(+), 69 deletions(-)
 create mode 100644 session/summary/residual-freeze.ipynb
 create mode 100644 session/summary/skip-news.py
 create mode 100644 session/summary/skip-thought-freeze.ipynb
 create mode 100644 session/summary/skip-wiki.py
 create mode 100644 session/word2vec/wiki-256.py

diff --git a/session/summary/residual-freeze.ipynb b/session/summary/residual-freeze.ipynb
new file mode 100644
index 00000000..c103cefe
--- /dev/null
+++ b/session/summary/residual-freeze.ipynb
@@ -0,0 +1,746 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import tensorflow as tf\n",
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class Attention:\n",
+    "    def __init__(self,hidden_size):\n",
+    "        self.hidden_size = hidden_size\n",
+    "        self.dense_layer = tf.layers.Dense(hidden_size)\n",
+    "        self.v = tf.random_normal([hidden_size],mean=0,stddev=1/np.sqrt(hidden_size))\n",
+    "        \n",
+    "    def score(self, hidden_tensor, encoder_outputs):\n",
+    "        energy = tf.nn.tanh(self.dense_layer(tf.concat([hidden_tensor,encoder_outputs],2)))\n",
+    "        energy = tf.transpose(energy,[0,2,1])\n",
+    "        batch_size = tf.shape(encoder_outputs)[0]\n",
+    "        v = tf.expand_dims(tf.tile(tf.expand_dims(self.v,0),[batch_size,1]),1)\n",
+    "        energy = tf.matmul(v,energy)\n",
+    "        return tf.squeeze(energy,1)\n",
+    "    \n",
+    "    def __call__(self, hidden, encoder_outputs):\n",
+    "        seq_len = tf.shape(encoder_outputs)[1]\n",
+    "        batch_size = tf.shape(encoder_outputs)[0]\n",
+    "        H = tf.tile(tf.expand_dims(hidden, 1),[1,seq_len,1])\n",
+    "        attn_energies = self.score(H,encoder_outputs)\n",
+    "        return tf.expand_dims(tf.nn.softmax(attn_energies),1)\n",
+    "\n",
+    "class Model:\n",
+    "    def __init__(\n",
+    "        self,\n",
+    "        dict_size,\n",
+    "        size_layers,\n",
+    "        learning_rate,\n",
+    "        maxlen,\n",
+    "        num_blocks = 3,\n",
+    "    ):\n",
+    "        block_size = size_layers\n",
+    "        self.BEFORE = tf.placeholder(tf.int32,[None,maxlen])\n",
+    "        self.INPUT = tf.placeholder(tf.int32,[None,maxlen])\n",
+    "        self.AFTER = tf.placeholder(tf.int32,[None,maxlen])\n",
+    "        self.batch_size = tf.shape(self.INPUT)[0]\n",
+    "        self.output_layer = tf.layers.Dense(dict_size, name=\"output_layer\")\n",
+    "        self.output_layer.build(size_layers)\n",
+    "        self.embeddings = tf.Variable(tf.random_uniform([dict_size, size_layers], -1, 1))\n",
+    "        embedded = tf.nn.embedding_lookup(self.embeddings, self.INPUT)\n",
+    "        self.attention = Attention(size_layers)\n",
+    "\n",
+    "        def residual_block(x, size, rate, block, reuse = False):\n",
+    "            with tf.variable_scope(\n",
+    "                'block_%d_%d' % (block, rate), reuse = reuse\n",
+    "            ):\n",
+    "                attn_weights = self.attention(tf.reduce_sum(x,axis=1), x)\n",
+    "                conv_filter = tf.layers.conv1d(\n",
+    "                    attn_weights,\n",
+    "                    x.shape[2] // 4,\n",
+    "                    kernel_size = size,\n",
+    "                    strides = 1,\n",
+    "                    padding = 'same',\n",
+    "                    dilation_rate = rate,\n",
+    "                    activation = tf.nn.tanh,\n",
+    "                )\n",
+    "                conv_gate = tf.layers.conv1d(\n",
+    "                    x,\n",
+    "                    x.shape[2] // 4,\n",
+    "                    kernel_size = size,\n",
+    "                    strides = 1,\n",
+    "                    padding = 'same',\n",
+    "                    dilation_rate = rate,\n",
+    "                    activation = tf.nn.sigmoid,\n",
+    "                )\n",
+    "                out = tf.multiply(conv_filter, conv_gate)\n",
+    "                out = tf.layers.conv1d(\n",
+    "                    out,\n",
+    "                    block_size,\n",
+    "                    kernel_size = 1,\n",
+    "                    strides = 1,\n",
+    "                    padding = 'same',\n",
+    "                    activation = tf.nn.tanh,\n",
+    "                )\n",
+    "                return tf.add(x, out), out\n",
+    "\n",
+    "        forward = tf.layers.conv1d(\n",
+    "            embedded, block_size, kernel_size = 1, strides = 1, padding = 'SAME'\n",
+    "        )\n",
+    "        zeros = tf.zeros_like(forward)\n",
+    "        for i in range(num_blocks):\n",
+    "            for r in [1, 2, 4, 8, 16]:\n",
+    "                forward, s = residual_block(\n",
+    "                    forward, size = 7, rate = r, block = i\n",
+    "                )\n",
+    "                zeros = tf.add(zeros, s)\n",
+    "        forward = tf.layers.conv1d(\n",
+    "            zeros,\n",
+    "            block_size,\n",
+    "            kernel_size = 1,\n",
+    "            strides = 1,\n",
+    "            padding = 'SAME',\n",
+    "            activation = tf.nn.tanh,\n",
+    "        )\n",
+    "        self.get_thought = tf.reduce_sum(forward,axis=1, name = 'logits')\n",
+    "        \n",
+    "        def decoder(labels, reuse):\n",
+    "            decoder_in = tf.nn.embedding_lookup(self.embeddings, labels)\n",
+    "            forward = tf.layers.conv1d(\n",
+    "                decoder_in, block_size, kernel_size = 1, strides = 1, padding = 'SAME'\n",
+    "            )\n",
+    "            zeros = tf.zeros_like(forward)\n",
+    "            for r in [8, 16, 24]:\n",
+    "                forward, s = residual_block(forward, size = 7, rate = r, block = 10, reuse = reuse)\n",
+    "                zeros = tf.add(zeros, s)\n",
+    "            return tf.layers.conv1d(\n",
+    "                zeros,\n",
+    "                block_size,\n",
+    "                kernel_size = 1,\n",
+    "                strides = 1,\n",
+    "                padding = 'SAME',\n",
+    "                activation = tf.nn.tanh,\n",
+    "            )\n",
+    "        \n",
+    "        fw_logits = decoder(self.AFTER, False)\n",
+    "        bw_logits = decoder(self.BEFORE, True)\n",
+    "        self.attention = tf.matmul(\n",
+    "            self.get_thought, tf.transpose(self.embeddings), name = 'attention'\n",
+    "        )\n",
+    "        self.loss = self.calculate_loss(fw_logits, self.AFTER) + self.calculate_loss(bw_logits, self.BEFORE)\n",
+    "        self.optimizer = tf.train.AdamOptimizer(learning_rate).minimize(self.loss)\n",
+    "    \n",
+    "    def calculate_loss(self, outputs, labels):\n",
+    "        mask = tf.cast(tf.sign(labels), tf.float32)\n",
+    "        logits = self.output_layer(outputs)\n",
+    "        return tf.contrib.seq2seq.sequence_loss(logits, labels, mask)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "200004"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import json\n",
+    "with open('skip-wiki-dict.json') as fopen:\n",
+    "    dictionary = json.load(fopen)\n",
+    "len(dictionary)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def rename(checkpoint_dir, replace_from, replace_to, add_prefix, dry_run=False):\n",
+    "    checkpoint = tf.train.get_checkpoint_state(checkpoint_dir)\n",
+    "    with tf.Session() as sess:\n",
+    "        for var_name, _ in tf.contrib.framework.list_variables(checkpoint_dir):\n",
+    "            var = tf.contrib.framework.load_variable(checkpoint_dir, var_name)\n",
+    "            new_name = var_name\n",
+    "            if None not in [replace_from, replace_to]:\n",
+    "                new_name = new_name.replace(replace_from, replace_to)\n",
+    "            if add_prefix:\n",
+    "                new_name = add_prefix + new_name\n",
+    "\n",
+    "            if dry_run:\n",
+    "                print('%s would be renamed to %s.' % (var_name, new_name))\n",
+    "            else:\n",
+    "                print('Renaming %s to %s.' % (var_name, new_name))\n",
+    "                # Rename the variable\n",
+    "                var = tf.Variable(var, name=new_name)\n",
+    "\n",
+    "        if not dry_run:\n",
+    "            # Save the variables\n",
+    "            saver = tf.train.Saver()\n",
+    "            sess.run(tf.global_variables_initializer())\n",
+    "            saver.save(sess, 'skip-rename/model.ckpt')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# rename('skip/model.ckpt','thought_scope_e1d42da4-5ae4-4898-b0f1-f52f687a4e28',\n",
+    "#       'thought_scope',None)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py:1711: UserWarning: An interactive session is already active. This can cause out-of-memory errors in some cases. You must explicitly call `InteractiveSession.close()` to release resources held by the other session(s).\n",
+      "  warnings.warn('An interactive session is already active. This can '\n"
+     ]
+    }
+   ],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "sess = tf.InteractiveSession()\n",
+    "model = Model(len(dictionary), 64, 1e-3, 50)\n",
+    "sess.run(tf.global_variables_initializer())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "INFO:tensorflow:Restoring parameters from skip-wiki/model.ckpt\n"
+     ]
+    }
+   ],
+   "source": [
+    "saver=tf.train.Saver(tf.global_variables())\n",
+    "saver.restore(sess, 'skip-wiki/model.ckpt')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import random\n",
+    "\n",
+    "def sequence(s, w2v_model, maxlen, vocabulary_size):\n",
+    "    words = s.split()\n",
+    "    np_array = np.zeros((maxlen),dtype=np.int32)\n",
+    "    current_no = 0\n",
+    "    for no, word in enumerate(words[:maxlen - 2]):\n",
+    "        id_to_append = 1\n",
+    "        if word in w2v_model:\n",
+    "            word_id = w2v_model[word]\n",
+    "            if word_id < vocabulary_size:\n",
+    "                id_to_append = word_id\n",
+    "        np_array[no] = id_to_append\n",
+    "        current_no = no\n",
+    "    np_array[current_no + 1] = 3\n",
+    "    return np_array\n",
+    "\n",
+    "def generate_batch(sentences,batch_size,w2v_model,maxlen,vocabulary_size):\n",
+    "    window_size = batch_size + 2\n",
+    "    first_index = 1000\n",
+    "    batch_sentences = sentences[first_index:first_index+window_size]\n",
+    "    print(batch_sentences)\n",
+    "    batch_sequences = np.array([sequence(sentence,w2v_model,maxlen,vocabulary_size) for sentence in batch_sentences])\n",
+    "    window_shape = []\n",
+    "    for i in range(batch_size):\n",
+    "        window_shape.append(batch_sequences[i:i+3])\n",
+    "    window_shape = np.array(window_shape)\n",
+    "    return window_shape[:,0], window_shape[:,1], window_shape[:,2]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "with open('news-bm.json','r') as fopen:\n",
+    "    sentences = json.loads(fopen.read())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['pahang diwakili pemangku raja pahang tengku abdullah sultan ahmad shah manakala kelantan diwakili pemangku raja kelantan dr', 'tengku muhammad faiz petra', 'pada hari kedua mesyuarat yang bermula kira pukul pagi itu raja-raja melayu diiringi menteri besar masing-masing manakala yang dipertua negeri pulau pinang sabah dan melaka diiringi ketua menteri masing-masing']\n"
+     ]
+    }
+   ],
+   "source": [
+    "bw_input, current_input, fw_input = generate_batch(sentences,1,dictionary,50,len(dictionary))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "encoded = sess.run(model.get_thought,feed_dict={model.INPUT:fw_input})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[ 0.07066324,  0.13310698, -0.62426007, -0.4613824 , -0.17707539,\n",
+       "        -0.3925364 ,  1.1155262 ,  1.1873002 ,  0.48969495,  0.81452906,\n",
+       "        -0.1577659 , -0.17734857, -0.37914753, -0.7942437 ,  0.56107384,\n",
+       "         0.29675886, -0.7340232 , -0.07755096,  0.29897642, -0.0737358 ,\n",
+       "         0.6024291 ,  0.95485014, -0.95064414, -0.63884234,  0.03552189,\n",
+       "        -0.40762448, -0.25227717, -0.24423571,  0.37850273, -0.11428429,\n",
+       "        -0.8386208 , -0.2072649 , -0.9640392 , -0.63121736, -0.5339436 ,\n",
+       "         0.96501446, -0.12163527,  0.31738836,  0.9421329 , -0.51436657,\n",
+       "         0.6444553 , -0.2436821 , -0.4731561 , -0.00128211, -0.05046922,\n",
+       "         0.5482205 ,  0.85903156,  0.681826  ,  0.02734087,  0.5048841 ,\n",
+       "         0.08036114,  0.00166782,  0.5863657 ,  0.37902188, -0.14853519,\n",
+       "         0.11486635,  0.03344561,  1.1854374 , -0.07733421, -0.8486209 ,\n",
+       "         0.9942196 ,  0.9136265 , -0.10116772, -0.21602613]],\n",
+       "      dtype=float32)"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "encoded"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "strings = ','.join(\n",
+    "    [\n",
+    "        n.name\n",
+    "        for n in tf.get_default_graph().as_graph_def().node\n",
+    "        if (\n",
+    "            'Variable' in n.op\n",
+    "            or n.name.find('Placeholder') >= 0\n",
+    "            or 'add_1' in n.name\n",
+    "            or 'attention' in n.name\n",
+    "            or 'logits' in n.name\n",
+    "        )\n",
+    "        and 'Adam' not in n.name\n",
+    "    ]\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['Placeholder',\n",
+       " 'Placeholder_1',\n",
+       " 'Placeholder_2',\n",
+       " 'output_layer/kernel',\n",
+       " 'output_layer/bias',\n",
+       " 'Variable',\n",
+       " 'conv1d/kernel',\n",
+       " 'conv1d/bias',\n",
+       " 'block_0_1/dense/kernel',\n",
+       " 'block_0_1/dense/bias',\n",
+       " 'block_0_1/dense/Tensordot/add_1',\n",
+       " 'block_0_1/conv1d/kernel',\n",
+       " 'block_0_1/conv1d/bias',\n",
+       " 'block_0_1/conv1d_1/kernel',\n",
+       " 'block_0_1/conv1d_1/bias',\n",
+       " 'block_0_1/conv1d_2/kernel',\n",
+       " 'block_0_1/conv1d_2/bias',\n",
+       " 'block_0_1/dense/Tensordot_1/add_1',\n",
+       " 'block_0_2/conv1d/kernel',\n",
+       " 'block_0_2/conv1d/bias',\n",
+       " 'block_0_2/conv1d_1/kernel',\n",
+       " 'block_0_2/conv1d_1/bias',\n",
+       " 'block_0_2/conv1d_2/kernel',\n",
+       " 'block_0_2/conv1d_2/bias',\n",
+       " 'block_0_1/dense/Tensordot_2/add_1',\n",
+       " 'block_0_4/conv1d/kernel',\n",
+       " 'block_0_4/conv1d/bias',\n",
+       " 'block_0_4/conv1d_1/kernel',\n",
+       " 'block_0_4/conv1d_1/bias',\n",
+       " 'block_0_4/conv1d_2/kernel',\n",
+       " 'block_0_4/conv1d_2/bias',\n",
+       " 'block_0_1/dense/Tensordot_3/add_1',\n",
+       " 'block_0_8/conv1d/kernel',\n",
+       " 'block_0_8/conv1d/bias',\n",
+       " 'block_0_8/conv1d_1/kernel',\n",
+       " 'block_0_8/conv1d_1/bias',\n",
+       " 'block_0_8/conv1d_2/kernel',\n",
+       " 'block_0_8/conv1d_2/bias',\n",
+       " 'block_0_1/dense/Tensordot_4/add_1',\n",
+       " 'block_0_16/conv1d/kernel',\n",
+       " 'block_0_16/conv1d/bias',\n",
+       " 'block_0_16/conv1d_1/kernel',\n",
+       " 'block_0_16/conv1d_1/bias',\n",
+       " 'block_0_16/conv1d_2/kernel',\n",
+       " 'block_0_16/conv1d_2/bias',\n",
+       " 'block_0_1/dense/Tensordot_5/add_1',\n",
+       " 'block_1_1/conv1d/kernel',\n",
+       " 'block_1_1/conv1d/bias',\n",
+       " 'block_1_1/conv1d_1/kernel',\n",
+       " 'block_1_1/conv1d_1/bias',\n",
+       " 'block_1_1/conv1d_2/kernel',\n",
+       " 'block_1_1/conv1d_2/bias',\n",
+       " 'block_0_1/dense/Tensordot_6/add_1',\n",
+       " 'block_1_2/conv1d/kernel',\n",
+       " 'block_1_2/conv1d/bias',\n",
+       " 'block_1_2/conv1d_1/kernel',\n",
+       " 'block_1_2/conv1d_1/bias',\n",
+       " 'block_1_2/conv1d_2/kernel',\n",
+       " 'block_1_2/conv1d_2/bias',\n",
+       " 'block_0_1/dense/Tensordot_7/add_1',\n",
+       " 'block_1_4/conv1d/kernel',\n",
+       " 'block_1_4/conv1d/bias',\n",
+       " 'block_1_4/conv1d_1/kernel',\n",
+       " 'block_1_4/conv1d_1/bias',\n",
+       " 'block_1_4/conv1d_2/kernel',\n",
+       " 'block_1_4/conv1d_2/bias',\n",
+       " 'block_0_1/dense/Tensordot_8/add_1',\n",
+       " 'block_1_8/conv1d/kernel',\n",
+       " 'block_1_8/conv1d/bias',\n",
+       " 'block_1_8/conv1d_1/kernel',\n",
+       " 'block_1_8/conv1d_1/bias',\n",
+       " 'block_1_8/conv1d_2/kernel',\n",
+       " 'block_1_8/conv1d_2/bias',\n",
+       " 'block_0_1/dense/Tensordot_9/add_1',\n",
+       " 'block_1_16/conv1d/kernel',\n",
+       " 'block_1_16/conv1d/bias',\n",
+       " 'block_1_16/conv1d_1/kernel',\n",
+       " 'block_1_16/conv1d_1/bias',\n",
+       " 'block_1_16/conv1d_2/kernel',\n",
+       " 'block_1_16/conv1d_2/bias',\n",
+       " 'block_0_1/dense/Tensordot_10/add_1',\n",
+       " 'block_2_1/conv1d/kernel',\n",
+       " 'block_2_1/conv1d/bias',\n",
+       " 'block_2_1/conv1d_1/kernel',\n",
+       " 'block_2_1/conv1d_1/bias',\n",
+       " 'block_2_1/conv1d_2/kernel',\n",
+       " 'block_2_1/conv1d_2/bias',\n",
+       " 'block_0_1/dense/Tensordot_11/add_1',\n",
+       " 'block_2_2/conv1d/kernel',\n",
+       " 'block_2_2/conv1d/bias',\n",
+       " 'block_2_2/conv1d_1/kernel',\n",
+       " 'block_2_2/conv1d_1/bias',\n",
+       " 'block_2_2/conv1d_2/kernel',\n",
+       " 'block_2_2/conv1d_2/bias',\n",
+       " 'block_0_1/dense/Tensordot_12/add_1',\n",
+       " 'block_2_4/conv1d/kernel',\n",
+       " 'block_2_4/conv1d/bias',\n",
+       " 'block_2_4/conv1d_1/kernel',\n",
+       " 'block_2_4/conv1d_1/bias',\n",
+       " 'block_2_4/conv1d_2/kernel',\n",
+       " 'block_2_4/conv1d_2/bias',\n",
+       " 'block_0_1/dense/Tensordot_13/add_1',\n",
+       " 'block_2_8/conv1d/kernel',\n",
+       " 'block_2_8/conv1d/bias',\n",
+       " 'block_2_8/conv1d_1/kernel',\n",
+       " 'block_2_8/conv1d_1/bias',\n",
+       " 'block_2_8/conv1d_2/kernel',\n",
+       " 'block_2_8/conv1d_2/bias',\n",
+       " 'block_0_1/dense/Tensordot_14/add_1',\n",
+       " 'block_2_16/conv1d/kernel',\n",
+       " 'block_2_16/conv1d/bias',\n",
+       " 'block_2_16/conv1d_1/kernel',\n",
+       " 'block_2_16/conv1d_1/bias',\n",
+       " 'block_2_16/conv1d_2/kernel',\n",
+       " 'block_2_16/conv1d_2/bias',\n",
+       " 'conv1d_1/kernel',\n",
+       " 'conv1d_1/bias',\n",
+       " 'logits/reduction_indices',\n",
+       " 'logits',\n",
+       " 'conv1d_2/kernel',\n",
+       " 'conv1d_2/bias',\n",
+       " 'block_0_1/dense/Tensordot_15/add_1',\n",
+       " 'block_10_8/conv1d/kernel',\n",
+       " 'block_10_8/conv1d/bias',\n",
+       " 'block_10_8/conv1d_1/kernel',\n",
+       " 'block_10_8/conv1d_1/bias',\n",
+       " 'block_10_8/conv1d_2/kernel',\n",
+       " 'block_10_8/conv1d_2/bias',\n",
+       " 'block_0_1/dense/Tensordot_16/add_1',\n",
+       " 'block_10_16/conv1d/kernel',\n",
+       " 'block_10_16/conv1d/bias',\n",
+       " 'block_10_16/conv1d_1/kernel',\n",
+       " 'block_10_16/conv1d_1/bias',\n",
+       " 'block_10_16/conv1d_2/kernel',\n",
+       " 'block_10_16/conv1d_2/bias',\n",
+       " 'block_0_1/dense/Tensordot_17/add_1',\n",
+       " 'block_10_24/conv1d/kernel',\n",
+       " 'block_10_24/conv1d/bias',\n",
+       " 'block_10_24/conv1d_1/kernel',\n",
+       " 'block_10_24/conv1d_1/bias',\n",
+       " 'block_10_24/conv1d_2/kernel',\n",
+       " 'block_10_24/conv1d_2/bias',\n",
+       " 'conv1d_3/kernel',\n",
+       " 'conv1d_3/bias',\n",
+       " 'conv1d_4/kernel',\n",
+       " 'conv1d_4/bias',\n",
+       " 'block_0_1/dense/Tensordot_18/add_1',\n",
+       " 'block_0_1/dense/Tensordot_19/add_1',\n",
+       " 'block_0_1/dense/Tensordot_20/add_1',\n",
+       " 'conv1d_5/kernel',\n",
+       " 'conv1d_5/bias',\n",
+       " 'attention',\n",
+       " 'output_layer/Tensordot/add_1',\n",
+       " 'output_layer/Tensordot_1/add_1',\n",
+       " 'beta1_power',\n",
+       " 'beta2_power']"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "strings.split(',')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def freeze_graph(model_dir, output_node_names):\n",
+    "\n",
+    "    if not tf.gfile.Exists(model_dir):\n",
+    "        raise AssertionError(\n",
+    "            \"Export directory doesn't exists. Please specify an export \"\n",
+    "            \"directory: %s\" % model_dir)\n",
+    "\n",
+    "    checkpoint = tf.train.get_checkpoint_state(model_dir)\n",
+    "    input_checkpoint = checkpoint.model_checkpoint_path\n",
+    "    \n",
+    "    absolute_model_dir = \"/\".join(input_checkpoint.split('/')[:-1])\n",
+    "    output_graph = absolute_model_dir + \"/frozen_model.pb\"\n",
+    "    clear_devices = True\n",
+    "    with tf.Session(graph=tf.Graph()) as sess:\n",
+    "        saver = tf.train.import_meta_graph(input_checkpoint + '.meta', clear_devices=clear_devices)\n",
+    "        saver.restore(sess, input_checkpoint)\n",
+    "        output_graph_def = tf.graph_util.convert_variables_to_constants(\n",
+    "            sess,\n",
+    "            tf.get_default_graph().as_graph_def(),\n",
+    "            output_node_names.split(\",\")\n",
+    "        ) \n",
+    "        with tf.gfile.GFile(output_graph, \"wb\") as f:\n",
+    "            f.write(output_graph_def.SerializeToString())\n",
+    "        print(\"%d ops in the final graph.\" % len(output_graph_def.node))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "INFO:tensorflow:Restoring parameters from skip-wiki/model.ckpt\n",
+      "INFO:tensorflow:Froze 127 variables.\n",
+      "Converted 127 variables to const ops.\n",
+      "2031 ops in the final graph.\n"
+     ]
+    }
+   ],
+   "source": [
+    "freeze_graph('skip-wiki', strings)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def load_graph(frozen_graph_filename):\n",
+    "    with tf.gfile.GFile(frozen_graph_filename, \"rb\") as f:\n",
+    "        graph_def = tf.GraphDef()\n",
+    "        graph_def.ParseFromString(f.read())\n",
+    "    with tf.Graph().as_default() as graph:\n",
+    "        tf.import_graph_def(graph_def)\n",
+    "    return graph"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "g=load_graph('skip-wiki/frozen_model.pb')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py:1711: UserWarning: An interactive session is already active. This can cause out-of-memory errors in some cases. You must explicitly call `InteractiveSession.close()` to release resources held by the other session(s).\n",
+      "  warnings.warn('An interactive session is already active. This can '\n"
+     ]
+    }
+   ],
+   "source": [
+    "x = g.get_tensor_by_name('import/Placeholder_1:0')\n",
+    "logits = g.get_tensor_by_name('import/logits:0')\n",
+    "attention = g.get_tensor_by_name('import/attention:0')\n",
+    "test_sess = tf.InteractiveSession(graph=g)\n",
+    "out, att = test_sess.run([logits,attention], feed_dict={x:fw_input})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(1, 200004)"
+      ]
+     },
+     "execution_count": 24,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "att.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "rev_dict = {v: k for k, v in dictionary.items()}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "38799\n",
+      "jagaannya\n",
+      "4035\n",
+      "zulkifli\n",
+      "101993\n",
+      "ferdy\n",
+      "11445\n",
+      "hoe\n",
+      "165827\n",
+      "sharidake\n",
+      "325\n",
+      "televisyen\n",
+      "1681\n",
+      "kawan\n",
+      "124186\n",
+      "diimbau\n",
+      "34683\n",
+      "luteum\n",
+      "636\n",
+      "brunei\n"
+     ]
+    }
+   ],
+   "source": [
+    "for i in att[0].argsort()[-10:][::-1]:\n",
+    "    print(i)\n",
+    "    print(rev_dict[i])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.5.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/session/summary/skip-news.py b/session/summary/skip-news.py
new file mode 100644
index 00000000..8fb22ade
--- /dev/null
+++ b/session/summary/skip-news.py
@@ -0,0 +1,256 @@
+
+# coding: utf-8
+
+# In[1]:
+
+
+import sys
+import warnings
+
+if not sys.warnoptions:
+    warnings.simplefilter('ignore')
+
+import tensorflow as tf
+import numpy as np
+from tqdm import tqdm
+import re
+import collections
+import json
+import os
+from tensorflow.contrib import seq2seq
+
+
+def sequence(s, w2v_model, maxlen = 50, vocabulary_size = 500000):
+    words = s.split()
+    np_array = np.zeros((maxlen), dtype = np.int32)
+    current_no = 0
+    for no, word in enumerate(words[: maxlen - 2]):
+        id_to_append = 1
+        if word in w2v_model:
+            word_id = w2v_model[word]
+            if word_id < vocabulary_size:
+                id_to_append = word_id
+        np_array[no] = id_to_append
+        current_no = no
+    np_array[current_no + 1] = 3
+    return np_array
+
+
+def batch_sequence(sentences, dictionary, maxlen = 50):
+    np_array = np.zeros((len(sentences), maxlen), dtype = np.int32)
+    for no_sentence, sentence in enumerate(sentences):
+        current_no = 0
+        for no, word in enumerate(sentence.split()[: maxlen - 2]):
+            np_array[no_sentence, no] = dictionary.get(word, 1)
+            current_no = no
+        np_array[no_sentence, current_no + 1] = 3
+    return np_array
+
+
+class Model:
+    def __init__(
+        self,
+        vocabulary_size,
+        maxlen = 50,
+        output_size = 512,
+        learning_rate = 1e-3,
+        embedding_size = 256,
+        batch_size = 16,
+        max_grad_norm = 10,
+        **kwargs
+    ):
+        word_embeddings = tf.Variable(
+            tf.random_uniform(
+                [vocabulary_size, embedding_size], -np.sqrt(3), np.sqrt(3)
+            )
+        )
+        self.output_size = output_size
+        self.maxlen = maxlen
+        self.embeddings = word_embeddings
+        self.output_layer = tf.layers.Dense(vocabulary_size)
+        self.output_layer.build(output_size)
+
+        self.BEFORE = tf.placeholder(tf.int32, [None, maxlen])
+        self.INPUT = tf.placeholder(tf.int32, [None, maxlen])
+        self.AFTER = tf.placeholder(tf.int32, [None, maxlen])
+        self.batch_size = tf.shape(self.INPUT)[0]
+
+        self.get_thought = self.thought(self.INPUT)
+        self.attention = tf.matmul(
+            self.get_thought, tf.transpose(self.embeddings), name = 'attention'
+        )
+        self.fw_logits = self.decoder(self.get_thought, self.AFTER)
+        self.bw_logits = self.decoder(self.get_thought, self.BEFORE)
+        self.loss = self.calculate_loss(
+            self.fw_logits, self.AFTER
+        ) + self.calculate_loss(self.bw_logits, self.BEFORE)
+        tvars = tf.trainable_variables()
+        grads, _ = tf.clip_by_global_norm(
+            tf.gradients(self.loss, tvars), max_grad_norm
+        )
+        self.optimizer = tf.train.AdamOptimizer(learning_rate).minimize(self.loss)
+
+    def get_embedding(self, inputs):
+        return tf.nn.embedding_lookup(self.embeddings, inputs)
+
+    def thought(self, inputs):
+        encoder_in = self.get_embedding(inputs)
+        fw_cell = tf.nn.rnn_cell.GRUCell(self.output_size)
+        bw_cell = tf.nn.rnn_cell.GRUCell(self.output_size)
+        sequence_length = tf.reduce_sum(tf.sign(inputs), axis = 1)
+        with tf.variable_scope(
+            'thought_scope', reuse = False
+        ):
+            rnn_output = tf.nn.bidirectional_dynamic_rnn(
+                fw_cell,
+                bw_cell,
+                encoder_in,
+                sequence_length = sequence_length,
+                dtype = tf.float32,
+            )[1]
+            return sum(rnn_output)
+
+    def decoder(self, thought, labels):
+        main = tf.strided_slice(labels, [0, 0], [self.batch_size, -1], [1, 1])
+        shifted_labels = tf.concat([tf.fill([self.batch_size, 1], 2), main], 1)
+        decoder_in = self.get_embedding(shifted_labels)
+        cell = tf.nn.rnn_cell.GRUCell(self.output_size)
+        max_seq_lengths = tf.fill([self.batch_size], self.maxlen)
+        helper = seq2seq.TrainingHelper(
+            decoder_in, max_seq_lengths, time_major = False
+        )
+        decoder = seq2seq.BasicDecoder(cell, helper, thought)
+        decoder_out = seq2seq.dynamic_decode(decoder)[0].rnn_output
+        return decoder_out
+
+    def calculate_loss(self, outputs, labels):
+        mask = tf.cast(tf.sign(labels), tf.float32)
+        logits = self.output_layer(outputs)
+        return seq2seq.sequence_loss(logits, labels, mask)
+
+
+def counter_words(sentences):
+    word_counter = collections.Counter()
+    word_list = []
+    num_lines, num_words = (0, 0)
+    for i in sentences:
+        words = re.findall('[\\w\']+|[;:\-\(\)&.,!?"]', i)
+        word_counter.update(words)
+        word_list.extend(words)
+        num_lines += 1
+        num_words += len(words)
+    return word_counter, word_list, num_lines, num_words
+
+
+def build_dict(word_counter, vocab_size = 500000):
+    count = [['PAD', 0], ['UNK', 1], ['START', 2], ['END', 3]]
+    count.extend(word_counter.most_common(vocab_size))
+    dictionary = dict()
+    for word, _ in count:
+        dictionary[word] = len(dictionary)
+    return dictionary, {word: idx for idx, word in dictionary.items()}
+
+
+def train_model(
+    train_X,
+    train_Y_before,
+    train_Y_after,
+    epoch = 10,
+    batch_size = 16,
+    embedding_size = 128,
+    maxlen = 100,
+    **kwargs
+):
+    word_counter, _, _, _ = counter_words(train_X)
+    dictionary, _ = build_dict(word_counter)
+    print(len(dictionary))
+    _graph = tf.Graph()
+    with _graph.as_default():
+        model = Model(
+            len(dictionary),
+            embedding_size = embedding_size,
+            output_size = embedding_size,
+            batch_size = batch_size,
+            maxlen = maxlen,
+            **kwargs
+        )
+        sess = tf.InteractiveSession()
+        saver = tf.train.Saver(tf.global_variables())
+        sess.run(tf.global_variables_initializer())
+    saver.save(sess, 'skip/model.ckpt')
+
+    for e in range(epoch):
+        pbar = tqdm(range(0, len(train_X), batch_size), desc = 'minibatch loop')
+        for i in pbar:
+            batch_x = batch_sequence(
+                train_X[i : min(i + batch_size, len(train_X))],
+                dictionary,
+                maxlen = maxlen,
+            )
+            batch_y_before = batch_sequence(
+                train_Y_before[i : min(i + batch_size, len(train_X))],
+                dictionary,
+                maxlen = maxlen,
+            )
+            batch_y_after = batch_sequence(
+                train_Y_after[i : min(i + batch_size, len(train_X))],
+                dictionary,
+                maxlen = maxlen,
+            )
+            loss, _ = sess.run(
+                [model.loss, model.optimizer],
+                feed_dict = {
+                    model.BEFORE: batch_y_before,
+                    model.INPUT: batch_x,
+                    model.AFTER: batch_y_after,
+                },
+            )
+            pbar.set_postfix(cost = loss)
+    saver.save(sess, 'skip/model.ckpt')
+    return sess, model, dictionary
+
+
+# In[2]:
+
+
+import json
+with open('news-bm.json','r') as fopen:
+    corpus = json.loads(fopen.read())
+
+print(len(corpus))
+corpus = [sentence for sentence in corpus if len(sentence) > 10]
+print(len(corpus))
+
+
+# In[3]:
+
+
+stride = 1
+t_range = int((len(corpus) - 3) / stride + 1)
+left, middle, right = [], [], []
+for i in range(t_range):
+    slices = corpus[i * stride : i * stride + 3]
+    left.append(slices[0])
+    middle.append(slices[1])
+    right.append(slices[2])
+
+
+# In[5]:
+
+
+len(left) == len(middle) == len(right)
+
+
+# In[6]:
+
+
+from sklearn.utils import shuffle
+left, middle, right = shuffle(left, middle, right)
+
+
+# In[ ]:
+
+
+_,_,dictionary = train_model(middle,left,right)
+with open('skip-news-dict.json', 'w') as fopen:
+    fopen.write(json.dumps(dictionary))
\ No newline at end of file
diff --git a/session/summary/skip-thought-freeze.ipynb b/session/summary/skip-thought-freeze.ipynb
new file mode 100644
index 00000000..debe7454
--- /dev/null
+++ b/session/summary/skip-thought-freeze.ipynb
@@ -0,0 +1,676 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import tensorflow as tf\n",
+    "from tensorflow.contrib import seq2seq\n",
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class Model:\n",
+    "    def __init__(\n",
+    "        self,\n",
+    "        vocabulary_size,\n",
+    "        maxlen = 50,\n",
+    "        output_size = 512,\n",
+    "        learning_rate = 1e-3,\n",
+    "        embedding_size = 256,\n",
+    "        batch_size = 16,\n",
+    "        max_grad_norm = 10,\n",
+    "        **kwargs\n",
+    "    ):\n",
+    "        word_embeddings = tf.Variable(\n",
+    "            tf.random_uniform(\n",
+    "                [vocabulary_size, embedding_size], -np.sqrt(3), np.sqrt(3)\n",
+    "            )\n",
+    "        )\n",
+    "        self.output_size = output_size\n",
+    "        self.maxlen = maxlen\n",
+    "        self.embeddings = word_embeddings\n",
+    "        self.output_layer = tf.layers.Dense(vocabulary_size)\n",
+    "        self.output_layer.build(output_size)\n",
+    "\n",
+    "        self.BEFORE = tf.placeholder(tf.int32, [None, maxlen])\n",
+    "        self.INPUT = tf.placeholder(tf.int32, [None, maxlen])\n",
+    "        self.AFTER = tf.placeholder(tf.int32, [None, maxlen])\n",
+    "        self.batch_size = tf.shape(self.INPUT)[0]\n",
+    "\n",
+    "        self.get_thought = self.thought(self.INPUT)\n",
+    "        self.attention = tf.matmul(\n",
+    "            self.get_thought, tf.transpose(self.embeddings), name = 'attention'\n",
+    "        )\n",
+    "        self.fw_logits = self.decoder(self.get_thought, self.AFTER)\n",
+    "        self.bw_logits = self.decoder(self.get_thought, self.BEFORE)\n",
+    "        self.loss = self.calculate_loss(\n",
+    "            self.fw_logits, self.AFTER\n",
+    "        ) + self.calculate_loss(self.bw_logits, self.BEFORE)\n",
+    "        tvars = tf.trainable_variables()\n",
+    "        grads, _ = tf.clip_by_global_norm(\n",
+    "            tf.gradients(self.loss, tvars), max_grad_norm\n",
+    "        )\n",
+    "        self.optimizer = tf.train.AdamOptimizer(learning_rate).apply_gradients(\n",
+    "            zip(grads, tvars)\n",
+    "        )\n",
+    "\n",
+    "    def get_embedding(self, inputs):\n",
+    "        return tf.nn.embedding_lookup(self.embeddings, inputs)\n",
+    "\n",
+    "    def thought(self, inputs):\n",
+    "        encoder_in = self.get_embedding(inputs)\n",
+    "        fw_cell = tf.nn.rnn_cell.GRUCell(self.output_size)\n",
+    "        bw_cell = tf.nn.rnn_cell.GRUCell(self.output_size)\n",
+    "        sequence_length = tf.reduce_sum(tf.sign(inputs), axis = 1)\n",
+    "        with tf.variable_scope(\n",
+    "            'thought_scope', reuse = False\n",
+    "        ):\n",
+    "            rnn_output = tf.nn.bidirectional_dynamic_rnn(\n",
+    "                fw_cell,\n",
+    "                bw_cell,\n",
+    "                encoder_in,\n",
+    "                sequence_length = sequence_length,\n",
+    "                dtype = tf.float32,\n",
+    "            )[1]\n",
+    "            return sum(rnn_output)\n",
+    "\n",
+    "    def decoder(self, thought, labels):\n",
+    "        main = tf.strided_slice(labels, [0, 0], [self.batch_size, -1], [1, 1])\n",
+    "        shifted_labels = tf.concat([tf.fill([self.batch_size, 1], 2), main], 1)\n",
+    "        decoder_in = self.get_embedding(shifted_labels)\n",
+    "        cell = tf.nn.rnn_cell.GRUCell(self.output_size)\n",
+    "        max_seq_lengths = tf.fill([self.batch_size], self.maxlen)\n",
+    "        helper = seq2seq.TrainingHelper(\n",
+    "            decoder_in, max_seq_lengths, time_major = False\n",
+    "        )\n",
+    "        decoder = seq2seq.BasicDecoder(cell, helper, thought)\n",
+    "        decoder_out = seq2seq.dynamic_decode(decoder)[0].rnn_output\n",
+    "        return decoder_out\n",
+    "\n",
+    "    def calculate_loss(self, outputs, labels):\n",
+    "        mask = tf.cast(tf.sign(labels), tf.float32)\n",
+    "        logits = self.output_layer(outputs)\n",
+    "        return seq2seq.sequence_loss(logits, labels, mask)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "54718"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import json\n",
+    "with open('skip-news-dict.json') as fopen:\n",
+    "    dictionary = json.load(fopen)\n",
+    "len(dictionary)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def rename(checkpoint_dir, replace_from, replace_to, add_prefix, dry_run=False):\n",
+    "    checkpoint = tf.train.get_checkpoint_state(checkpoint_dir)\n",
+    "    with tf.Session() as sess:\n",
+    "        for var_name, _ in tf.contrib.framework.list_variables(checkpoint_dir):\n",
+    "            var = tf.contrib.framework.load_variable(checkpoint_dir, var_name)\n",
+    "            new_name = var_name\n",
+    "            if None not in [replace_from, replace_to]:\n",
+    "                new_name = new_name.replace(replace_from, replace_to)\n",
+    "            if add_prefix:\n",
+    "                new_name = add_prefix + new_name\n",
+    "\n",
+    "            if dry_run:\n",
+    "                print('%s would be renamed to %s.' % (var_name, new_name))\n",
+    "            else:\n",
+    "                print('Renaming %s to %s.' % (var_name, new_name))\n",
+    "                # Rename the variable\n",
+    "                var = tf.Variable(var, name=new_name)\n",
+    "\n",
+    "        if not dry_run:\n",
+    "            # Save the variables\n",
+    "            saver = tf.train.Saver()\n",
+    "            sess.run(tf.global_variables_initializer())\n",
+    "            saver.save(sess, 'skip-rename/model.ckpt')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# rename('skip/model.ckpt','thought_scope_e1d42da4-5ae4-4898-b0f1-f52f687a4e28',\n",
+    "#       'thought_scope',None)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tf.reset_default_graph()\n",
+    "sess = tf.InteractiveSession()\n",
+    "model = Model(len(dictionary), embedding_size = 128, output_size = 128, batch_size=16,maxlen=100)\n",
+    "sess.run(tf.global_variables_initializer())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['Variable:0',\n",
+       " 'dense/kernel:0',\n",
+       " 'dense/bias:0',\n",
+       " 'thought_scope/bidirectional_rnn/fw/gru_cell/gates/kernel:0',\n",
+       " 'thought_scope/bidirectional_rnn/fw/gru_cell/gates/bias:0',\n",
+       " 'thought_scope/bidirectional_rnn/fw/gru_cell/candidate/kernel:0',\n",
+       " 'thought_scope/bidirectional_rnn/fw/gru_cell/candidate/bias:0',\n",
+       " 'thought_scope/bidirectional_rnn/bw/gru_cell/gates/kernel:0',\n",
+       " 'thought_scope/bidirectional_rnn/bw/gru_cell/gates/bias:0',\n",
+       " 'thought_scope/bidirectional_rnn/bw/gru_cell/candidate/kernel:0',\n",
+       " 'thought_scope/bidirectional_rnn/bw/gru_cell/candidate/bias:0',\n",
+       " 'decoder/gru_cell/gates/kernel:0',\n",
+       " 'decoder/gru_cell/gates/bias:0',\n",
+       " 'decoder/gru_cell/candidate/kernel:0',\n",
+       " 'decoder/gru_cell/candidate/bias:0',\n",
+       " 'decoder_1/gru_cell/gates/kernel:0',\n",
+       " 'decoder_1/gru_cell/gates/bias:0',\n",
+       " 'decoder_1/gru_cell/candidate/kernel:0',\n",
+       " 'decoder_1/gru_cell/candidate/bias:0',\n",
+       " 'beta1_power:0',\n",
+       " 'beta2_power:0',\n",
+       " 'Variable/Adam:0',\n",
+       " 'Variable/Adam_1:0',\n",
+       " 'dense/kernel/Adam:0',\n",
+       " 'dense/kernel/Adam_1:0',\n",
+       " 'dense/bias/Adam:0',\n",
+       " 'dense/bias/Adam_1:0',\n",
+       " 'thought_scope/bidirectional_rnn/fw/gru_cell/gates/kernel/Adam:0',\n",
+       " 'thought_scope/bidirectional_rnn/fw/gru_cell/gates/kernel/Adam_1:0',\n",
+       " 'thought_scope/bidirectional_rnn/fw/gru_cell/gates/bias/Adam:0',\n",
+       " 'thought_scope/bidirectional_rnn/fw/gru_cell/gates/bias/Adam_1:0',\n",
+       " 'thought_scope/bidirectional_rnn/fw/gru_cell/candidate/kernel/Adam:0',\n",
+       " 'thought_scope/bidirectional_rnn/fw/gru_cell/candidate/kernel/Adam_1:0',\n",
+       " 'thought_scope/bidirectional_rnn/fw/gru_cell/candidate/bias/Adam:0',\n",
+       " 'thought_scope/bidirectional_rnn/fw/gru_cell/candidate/bias/Adam_1:0',\n",
+       " 'thought_scope/bidirectional_rnn/bw/gru_cell/gates/kernel/Adam:0',\n",
+       " 'thought_scope/bidirectional_rnn/bw/gru_cell/gates/kernel/Adam_1:0',\n",
+       " 'thought_scope/bidirectional_rnn/bw/gru_cell/gates/bias/Adam:0',\n",
+       " 'thought_scope/bidirectional_rnn/bw/gru_cell/gates/bias/Adam_1:0',\n",
+       " 'thought_scope/bidirectional_rnn/bw/gru_cell/candidate/kernel/Adam:0',\n",
+       " 'thought_scope/bidirectional_rnn/bw/gru_cell/candidate/kernel/Adam_1:0',\n",
+       " 'thought_scope/bidirectional_rnn/bw/gru_cell/candidate/bias/Adam:0',\n",
+       " 'thought_scope/bidirectional_rnn/bw/gru_cell/candidate/bias/Adam_1:0',\n",
+       " 'decoder/gru_cell/gates/kernel/Adam:0',\n",
+       " 'decoder/gru_cell/gates/kernel/Adam_1:0',\n",
+       " 'decoder/gru_cell/gates/bias/Adam:0',\n",
+       " 'decoder/gru_cell/gates/bias/Adam_1:0',\n",
+       " 'decoder/gru_cell/candidate/kernel/Adam:0',\n",
+       " 'decoder/gru_cell/candidate/kernel/Adam_1:0',\n",
+       " 'decoder/gru_cell/candidate/bias/Adam:0',\n",
+       " 'decoder/gru_cell/candidate/bias/Adam_1:0',\n",
+       " 'decoder_1/gru_cell/gates/kernel/Adam:0',\n",
+       " 'decoder_1/gru_cell/gates/kernel/Adam_1:0',\n",
+       " 'decoder_1/gru_cell/gates/bias/Adam:0',\n",
+       " 'decoder_1/gru_cell/gates/bias/Adam_1:0',\n",
+       " 'decoder_1/gru_cell/candidate/kernel/Adam:0',\n",
+       " 'decoder_1/gru_cell/candidate/kernel/Adam_1:0',\n",
+       " 'decoder_1/gru_cell/candidate/bias/Adam:0',\n",
+       " 'decoder_1/gru_cell/candidate/bias/Adam_1:0']"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "[i.name for i in tf.global_variables()]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "INFO:tensorflow:Restoring parameters from skip/model.ckpt\n"
+     ]
+    }
+   ],
+   "source": [
+    "saver=tf.train.Saver(tf.global_variables())\n",
+    "saver.restore(sess, 'skip/model.ckpt')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import random\n",
+    "\n",
+    "def sequence(s, w2v_model, maxlen, vocabulary_size):\n",
+    "    words = s.split()\n",
+    "    np_array = np.zeros((maxlen),dtype=np.int32)\n",
+    "    current_no = 0\n",
+    "    for no, word in enumerate(words[:maxlen - 2]):\n",
+    "        id_to_append = 1\n",
+    "        if word in w2v_model:\n",
+    "            word_id = w2v_model[word]\n",
+    "            if word_id < vocabulary_size:\n",
+    "                id_to_append = word_id\n",
+    "        np_array[no] = id_to_append\n",
+    "        current_no = no\n",
+    "    np_array[current_no + 1] = 3\n",
+    "    return np_array\n",
+    "\n",
+    "def generate_batch(sentences,batch_size,w2v_model,maxlen,vocabulary_size):\n",
+    "    window_size = batch_size + 2\n",
+    "    first_index = 1000\n",
+    "    batch_sentences = sentences[first_index:first_index+window_size]\n",
+    "    print(batch_sentences)\n",
+    "    batch_sequences = np.array([sequence(sentence,w2v_model,maxlen,vocabulary_size) for sentence in batch_sentences])\n",
+    "    window_shape = []\n",
+    "    for i in range(batch_size):\n",
+    "        window_shape.append(batch_sequences[i:i+3])\n",
+    "    window_shape = np.array(window_shape)\n",
+    "    return window_shape[:,0], window_shape[:,1], window_shape[:,2]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "with open('news-bm.json','r') as fopen:\n",
+    "    sentences = json.loads(fopen.read())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['pahang diwakili pemangku raja pahang tengku abdullah sultan ahmad shah manakala kelantan diwakili pemangku raja kelantan dr', 'tengku muhammad faiz petra', 'pada hari kedua mesyuarat yang bermula kira pukul pagi itu raja-raja melayu diiringi menteri besar masing-masing manakala yang dipertua negeri pulau pinang sabah dan melaka diiringi ketua menteri masing-masing']\n"
+     ]
+    }
+   ],
+   "source": [
+    "bw_input, current_input, fw_input = generate_batch(sentences,1,dictionary,100,len(dictionary))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "encoded = sess.run(model.get_thought,feed_dict={model.INPUT:fw_input})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[ 1.0416520e-01, -5.5048943e-01, -8.6489022e-01, -4.7374249e-02,\n",
+       "         1.1276997e+00,  1.8109307e+00,  6.9022512e-01,  1.3390839e-02,\n",
+       "         2.2568166e-01, -1.2908951e+00,  1.8937750e+00, -6.6073686e-01,\n",
+       "         8.8402975e-01, -1.9575896e+00, -1.3369490e+00,  8.7181759e-01,\n",
+       "         6.0808134e-01, -1.3946321e+00,  1.2038462e-01,  1.2153907e+00,\n",
+       "         5.5231041e-01, -1.6721604e+00, -1.9526482e-04, -6.4797735e-01,\n",
+       "         1.9013047e-02,  1.6876624e+00, -1.7706637e+00,  3.0935839e-01,\n",
+       "         2.3643266e-01, -7.0005804e-01, -7.6473856e-01, -6.4990938e-01,\n",
+       "         8.5101128e-02,  1.9995425e+00, -1.3742411e+00,  1.4046657e+00,\n",
+       "         1.2373401e+00,  1.3037590e+00,  5.5078387e-01, -1.6784103e+00,\n",
+       "        -1.5637214e+00,  1.4834172e-01, -1.0372441e+00, -2.6549307e-01,\n",
+       "        -1.8813536e+00,  1.2753011e-01,  1.6532394e+00, -5.8884758e-01,\n",
+       "        -2.4680305e-01, -1.9865925e+00,  7.4487889e-01, -2.9214048e-01,\n",
+       "         7.9541242e-01, -7.1536422e-01,  9.7346407e-01, -2.9780412e-01,\n",
+       "        -1.4487034e+00,  1.0695006e+00,  7.1344101e-01, -1.7302066e-01,\n",
+       "         1.3620573e-01,  1.3157678e-01,  4.6292901e-02, -6.6628301e-01,\n",
+       "        -9.3853849e-01, -2.3844108e-02, -2.4575531e-02,  1.0214790e+00,\n",
+       "        -1.6275005e+00,  1.0081427e+00,  1.0262668e-02,  1.8486687e+00,\n",
+       "         1.1360471e+00, -8.4355950e-02, -2.7205276e-01, -3.5243776e-01,\n",
+       "        -8.7074924e-01,  9.2197478e-01, -1.6891556e+00, -1.2980952e+00,\n",
+       "        -5.3385198e-02, -6.4494354e-01,  6.6960633e-02,  4.6848938e-01,\n",
+       "        -6.9672108e-01, -1.6785400e+00,  7.6200837e-01, -5.0406647e-01,\n",
+       "        -1.4501936e+00,  1.3387250e+00, -5.6099737e-01, -2.6650232e-01,\n",
+       "        -3.4384909e-01,  1.5968245e+00, -1.7252556e+00, -2.8877589e-01,\n",
+       "         2.3671919e-01, -1.7661674e+00,  1.1558040e+00,  8.8561887e-01,\n",
+       "         5.6536603e-01,  1.6616430e+00,  1.5410352e-01, -1.9581079e-02,\n",
+       "        -1.4912158e+00,  1.4021204e+00,  9.7034663e-01,  1.5269648e+00,\n",
+       "        -6.9160253e-02, -1.2739227e+00, -2.5241894e-01, -1.5882177e+00,\n",
+       "        -1.1387055e+00, -1.7391834e+00,  1.9862680e+00,  8.7520087e-01,\n",
+       "        -1.0236690e+00,  9.9145275e-01,  1.8478736e-01, -5.5831087e-01,\n",
+       "        -8.1992823e-01,  6.1038101e-01,  4.4993043e-02,  1.4730409e+00,\n",
+       "         3.2682568e-01,  1.8637949e-01,  1.8340302e-01, -4.0022135e-01]],\n",
+       "      dtype=float32)"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "encoded"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "strings = ','.join(\n",
+    "    [\n",
+    "        n.name\n",
+    "        for n in tf.get_default_graph().as_graph_def().node\n",
+    "        if (\n",
+    "            'Variable' in n.op\n",
+    "            or n.name.find('Placeholder') >= 0\n",
+    "            or 'add_1' in n.name\n",
+    "            or 'attention' in n.name\n",
+    "        )\n",
+    "        and 'Adam' not in n.name\n",
+    "    ]\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['Variable',\n",
+       " 'dense/kernel',\n",
+       " 'dense/bias',\n",
+       " 'Placeholder',\n",
+       " 'Placeholder_1',\n",
+       " 'Placeholder_2',\n",
+       " 'thought_scope/bidirectional_rnn/fw/gru_cell/gates/kernel',\n",
+       " 'thought_scope/bidirectional_rnn/fw/gru_cell/gates/bias',\n",
+       " 'thought_scope/bidirectional_rnn/fw/gru_cell/candidate/kernel',\n",
+       " 'thought_scope/bidirectional_rnn/fw/gru_cell/candidate/bias',\n",
+       " 'thought_scope/bidirectional_rnn/fw/fw/while/add_1/y',\n",
+       " 'thought_scope/bidirectional_rnn/fw/fw/while/add_1',\n",
+       " 'thought_scope/bidirectional_rnn/bw/gru_cell/gates/kernel',\n",
+       " 'thought_scope/bidirectional_rnn/bw/gru_cell/gates/bias',\n",
+       " 'thought_scope/bidirectional_rnn/bw/gru_cell/candidate/kernel',\n",
+       " 'thought_scope/bidirectional_rnn/bw/gru_cell/candidate/bias',\n",
+       " 'thought_scope/bidirectional_rnn/bw/bw/while/add_1/y',\n",
+       " 'thought_scope/bidirectional_rnn/bw/bw/while/add_1',\n",
+       " 'thought_scope/add_1',\n",
+       " 'attention',\n",
+       " 'decoder/gru_cell/gates/kernel',\n",
+       " 'decoder/gru_cell/gates/bias',\n",
+       " 'decoder/gru_cell/candidate/kernel',\n",
+       " 'decoder/gru_cell/candidate/bias',\n",
+       " 'decoder/while/add_1/y',\n",
+       " 'decoder/while/add_1',\n",
+       " 'decoder_1/gru_cell/gates/kernel',\n",
+       " 'decoder_1/gru_cell/gates/bias',\n",
+       " 'decoder_1/gru_cell/candidate/kernel',\n",
+       " 'decoder_1/gru_cell/candidate/bias',\n",
+       " 'decoder_1/while/add_1/y',\n",
+       " 'decoder_1/while/add_1',\n",
+       " 'gradients/thought_scope/add_1_grad/Shape',\n",
+       " 'gradients/thought_scope/add_1_grad/Shape_1',\n",
+       " 'gradients/thought_scope/add_1_grad/BroadcastGradientArgs',\n",
+       " 'gradients/thought_scope/add_1_grad/Sum',\n",
+       " 'gradients/thought_scope/add_1_grad/Reshape',\n",
+       " 'gradients/thought_scope/add_1_grad/Sum_1',\n",
+       " 'gradients/thought_scope/add_1_grad/Reshape_1',\n",
+       " 'beta1_power',\n",
+       " 'beta2_power']"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "strings.split(',')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def freeze_graph(model_dir, output_node_names):\n",
+    "\n",
+    "    if not tf.gfile.Exists(model_dir):\n",
+    "        raise AssertionError(\n",
+    "            \"Export directory doesn't exists. Please specify an export \"\n",
+    "            \"directory: %s\" % model_dir)\n",
+    "\n",
+    "    checkpoint = tf.train.get_checkpoint_state(model_dir)\n",
+    "    input_checkpoint = checkpoint.model_checkpoint_path\n",
+    "    \n",
+    "    absolute_model_dir = \"/\".join(input_checkpoint.split('/')[:-1])\n",
+    "    output_graph = absolute_model_dir + \"/frozen_model.pb\"\n",
+    "    clear_devices = True\n",
+    "    with tf.Session(graph=tf.Graph()) as sess:\n",
+    "        saver = tf.train.import_meta_graph(input_checkpoint + '.meta', clear_devices=clear_devices)\n",
+    "        saver.restore(sess, input_checkpoint)\n",
+    "        output_graph_def = tf.graph_util.convert_variables_to_constants(\n",
+    "            sess,\n",
+    "            tf.get_default_graph().as_graph_def(),\n",
+    "            output_node_names.split(\",\")\n",
+    "        ) \n",
+    "        with tf.gfile.GFile(output_graph, \"wb\") as f:\n",
+    "            f.write(output_graph_def.SerializeToString())\n",
+    "        print(\"%d ops in the final graph.\" % len(output_graph_def.node))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "INFO:tensorflow:Restoring parameters from skip/model.ckpt\n",
+      "INFO:tensorflow:Froze 21 variables.\n",
+      "INFO:tensorflow:Converted 21 variables to const ops.\n",
+      "1224 ops in the final graph.\n"
+     ]
+    }
+   ],
+   "source": [
+    "freeze_graph('skip', strings)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def load_graph(frozen_graph_filename):\n",
+    "    with tf.gfile.GFile(frozen_graph_filename, \"rb\") as f:\n",
+    "        graph_def = tf.GraphDef()\n",
+    "        graph_def.ParseFromString(f.read())\n",
+    "    with tf.Graph().as_default() as graph:\n",
+    "        tf.import_graph_def(graph_def)\n",
+    "    return graph"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "g=load_graph('skip/frozen_model.pb')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py:1702: UserWarning: An interactive session is already active. This can cause out-of-memory errors in some cases. You must explicitly call `InteractiveSession.close()` to release resources held by the other session(s).\n",
+      "  warnings.warn('An interactive session is already active. This can '\n"
+     ]
+    }
+   ],
+   "source": [
+    "x = g.get_tensor_by_name('import/Placeholder_1:0')\n",
+    "logits = g.get_tensor_by_name('import/thought_scope/add_1:0')\n",
+    "attention = g.get_tensor_by_name('import/attention:0')\n",
+    "test_sess = tf.InteractiveSession(graph=g)\n",
+    "out, att = test_sess.run([logits,attention], feed_dict={x:fw_input})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(1, 54718)"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "att.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "rev_dict = {v: k for k, v in dictionary.items()}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "49104\n",
+      "menjebaknya\n",
+      "54\n",
+      "seperti\n",
+      "5951\n",
+      "gunanya\n",
+      "41221\n",
+      "hawar\n",
+      "6333\n",
+      "ganjaran\n",
+      "27612\n",
+      "dayangku\n",
+      "33504\n",
+      "pijak\n",
+      "44119\n",
+      "parol\n",
+      "43996\n",
+      "poupart\n",
+      "22753\n",
+      "scb\n"
+     ]
+    }
+   ],
+   "source": [
+    "for i in att[0].argsort()[-10:][::-1]:\n",
+    "    print(i)\n",
+    "    print(rev_dict[i])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.5.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/session/summary/skip-wiki.py b/session/summary/skip-wiki.py
new file mode 100644
index 00000000..50cff96d
--- /dev/null
+++ b/session/summary/skip-wiki.py
@@ -0,0 +1,319 @@
+
+# coding: utf-8
+
+# In[1]:
+
+
+import sys
+import warnings
+
+if not sys.warnoptions:
+    warnings.simplefilter('ignore')
+
+import tensorflow as tf
+import numpy as np
+from tqdm import tqdm
+import re
+import collections
+import json
+import os
+from unidecode import unidecode
+
+
+def batch_sequence(sentences, dictionary, maxlen = 50):
+    np_array = np.zeros((len(sentences), maxlen), dtype = np.int32)
+    for no_sentence, sentence in enumerate(sentences):
+        current_no = 0
+        for no, word in enumerate(sentence.split()[: maxlen - 2]):
+            np_array[no_sentence, no] = dictionary.get(word, 1)
+            current_no = no
+        np_array[no_sentence, current_no + 1] = 3
+    return np_array
+
+
+class Attention:
+    def __init__(self,hidden_size):
+        self.hidden_size = hidden_size
+        self.dense_layer = tf.layers.Dense(hidden_size)
+        self.v = tf.random_normal([hidden_size],mean=0,stddev=1/np.sqrt(hidden_size))
+        
+    def score(self, hidden_tensor, encoder_outputs):
+        energy = tf.nn.tanh(self.dense_layer(tf.concat([hidden_tensor,encoder_outputs],2)))
+        energy = tf.transpose(energy,[0,2,1])
+        batch_size = tf.shape(encoder_outputs)[0]
+        v = tf.expand_dims(tf.tile(tf.expand_dims(self.v,0),[batch_size,1]),1)
+        energy = tf.matmul(v,energy)
+        return tf.squeeze(energy,1)
+    
+    def __call__(self, hidden, encoder_outputs):
+        seq_len = tf.shape(encoder_outputs)[1]
+        batch_size = tf.shape(encoder_outputs)[0]
+        H = tf.tile(tf.expand_dims(hidden, 1),[1,seq_len,1])
+        attn_energies = self.score(H,encoder_outputs)
+        return tf.expand_dims(tf.nn.softmax(attn_energies),1)
+
+class Model:
+    def __init__(
+        self,
+        dict_size,
+        size_layers,
+        learning_rate,
+        maxlen,
+        num_blocks = 3,
+    ):
+        block_size = size_layers
+        self.BEFORE = tf.placeholder(tf.int32,[None,maxlen])
+        self.INPUT = tf.placeholder(tf.int32,[None,maxlen])
+        self.AFTER = tf.placeholder(tf.int32,[None,maxlen])
+        self.batch_size = tf.shape(self.INPUT)[0]
+        self.output_layer = tf.layers.Dense(dict_size, name="output_layer")
+        self.output_layer.build(size_layers)
+        self.embeddings = tf.Variable(tf.random_uniform([dict_size, size_layers], -1, 1))
+        embedded = tf.nn.embedding_lookup(self.embeddings, self.INPUT)
+        self.attention = Attention(size_layers)
+
+        def residual_block(x, size, rate, block, reuse = False):
+            with tf.variable_scope(
+                'block_%d_%d' % (block, rate), reuse = reuse
+            ):
+                attn_weights = self.attention(tf.reduce_sum(x,axis=1), x)
+                conv_filter = tf.layers.conv1d(
+                    attn_weights,
+                    x.shape[2] // 4,
+                    kernel_size = size,
+                    strides = 1,
+                    padding = 'same',
+                    dilation_rate = rate,
+                    activation = tf.nn.tanh,
+                )
+                conv_gate = tf.layers.conv1d(
+                    x,
+                    x.shape[2] // 4,
+                    kernel_size = size,
+                    strides = 1,
+                    padding = 'same',
+                    dilation_rate = rate,
+                    activation = tf.nn.sigmoid,
+                )
+                out = tf.multiply(conv_filter, conv_gate)
+                out = tf.layers.conv1d(
+                    out,
+                    block_size,
+                    kernel_size = 1,
+                    strides = 1,
+                    padding = 'same',
+                    activation = tf.nn.tanh,
+                )
+                return tf.add(x, out), out
+
+        forward = tf.layers.conv1d(
+            embedded, block_size, kernel_size = 1, strides = 1, padding = 'SAME'
+        )
+        zeros = tf.zeros_like(forward)
+        for i in range(num_blocks):
+            for r in [1, 2, 4, 8, 16]:
+                forward, s = residual_block(
+                    forward, size = 7, rate = r, block = i
+                )
+                zeros = tf.add(zeros, s)
+        forward = tf.layers.conv1d(
+            zeros,
+            block_size,
+            kernel_size = 1,
+            strides = 1,
+            padding = 'SAME',
+            activation = tf.nn.tanh,
+        )
+        self.get_thought = tf.reduce_sum(forward,axis=1, name = 'logits')
+        
+        def decoder(labels, reuse):
+            decoder_in = tf.nn.embedding_lookup(self.embeddings, labels)
+            forward = tf.layers.conv1d(
+                decoder_in, block_size, kernel_size = 1, strides = 1, padding = 'SAME'
+            )
+            zeros = tf.zeros_like(forward)
+            for r in [8, 16, 24]:
+                forward, s = residual_block(forward, size = 7, rate = r, block = 10, reuse = reuse)
+                zeros = tf.add(zeros, s)
+            return tf.layers.conv1d(
+                zeros,
+                block_size,
+                kernel_size = 1,
+                strides = 1,
+                padding = 'SAME',
+                activation = tf.nn.tanh,
+            )
+        
+        fw_logits = decoder(self.AFTER, False)
+        bw_logits = decoder(self.BEFORE, True)
+        self.attention = tf.matmul(
+            self.get_thought, tf.transpose(self.embeddings), name = 'attention'
+        )
+        self.loss = self.calculate_loss(fw_logits, self.AFTER) + self.calculate_loss(bw_logits, self.BEFORE)
+        self.optimizer = tf.train.AdamOptimizer(learning_rate).minimize(self.loss)
+    
+    def calculate_loss(self, outputs, labels):
+        mask = tf.cast(tf.sign(labels), tf.float32)
+        logits = self.output_layer(outputs)
+        return tf.contrib.seq2seq.sequence_loss(logits, labels, mask)
+
+
+def counter_words(sentences):
+    word_counter = collections.Counter()
+    word_list = []
+    num_lines, num_words = (0, 0)
+    for i in sentences:
+        words = re.findall('[\\w\']+|[;:\-\(\)&.,!?"]', i)
+        word_counter.update(words)
+        word_list.extend(words)
+        num_lines += 1
+        num_words += len(words)
+    return word_counter, word_list, num_lines, num_words
+
+
+def build_dict(word_counter, vocab_size = 200000):
+    count = [['PAD', 0], ['UNK', 1], ['START', 2], ['END', 3]]
+    count.extend(word_counter.most_common(vocab_size))
+    dictionary = dict()
+    for word, _ in count:
+        dictionary[word] = len(dictionary)
+    return dictionary, {word: idx for idx, word in dictionary.items()}
+
+
+def train_model(
+    train_X,
+    train_Y_before,
+    train_Y_after,
+    epoch = 10,
+    batch_size = 16,
+    embedding_size = 64,
+    maxlen = 50,
+    **kwargs
+):
+    word_counter, _, _, _ = counter_words(train_X)
+    dictionary, _ = build_dict(word_counter)
+    print(len(dictionary))
+    _graph = tf.Graph()
+    with _graph.as_default():
+        model = Model(
+            len(dictionary),
+            embedding_size,
+            1e-3,
+            maxlen,
+        )
+        sess = tf.InteractiveSession()
+        saver = tf.train.Saver(tf.global_variables())
+        sess.run(tf.global_variables_initializer())
+    saver.save(sess, 'skip-wiki/model.ckpt')
+
+    for e in range(epoch):
+        pbar = tqdm(range(0, len(train_X), batch_size), desc = 'minibatch loop')
+        for i in pbar:
+            batch_x = batch_sequence(
+                train_X[i : min(i + batch_size, len(train_X))],
+                dictionary,
+                maxlen = maxlen,
+            )
+            batch_y_before = batch_sequence(
+                train_Y_before[i : min(i + batch_size, len(train_X))],
+                dictionary,
+                maxlen = maxlen,
+            )
+            batch_y_after = batch_sequence(
+                train_Y_after[i : min(i + batch_size, len(train_X))],
+                dictionary,
+                maxlen = maxlen,
+            )
+            loss, _ = sess.run(
+                [model.loss, model.optimizer],
+                feed_dict = {
+                    model.BEFORE: batch_y_before,
+                    model.INPUT: batch_x,
+                    model.AFTER: batch_y_after,
+                },
+            )
+            pbar.set_postfix(cost = loss)
+    saver.save(sess, 'skip-wiki/model.ckpt')
+    return sess, model, dictionary
+
+
+# In[2]:
+
+
+def cleaning(string):
+    string = re.sub(
+        'http\S+|www.\S+',
+        '',
+        ' '.join(
+            [i for i in string.split() if i.find('#') < 0 and i.find('@') < 0]
+        ),
+    )
+    string = unidecode(string).replace('.', '. ').replace(',', ', ')
+    string = re.sub('[^A-Za-z ]+', ' ', string)
+    string = re.sub(r'[ ]+', ' ', string).strip()
+    string = ' '.join(
+            [
+                i
+                for i in re.findall('[\\w\']+|[;:\-\(\)&.,!?"]', string)
+                if len(i)
+            ]
+        )
+    return string.lower()
+
+def split_by_dot(string):
+    string = re.sub(
+        r'(?<!\d)\.(?!\d)',
+        'SPLITTT',
+        string.replace('\n', '').replace('/', ' '),
+    )
+    string = string.split('SPLITTT')
+    return [re.sub(r'[ ]+', ' ', sentence).strip() for sentence in string]
+
+with open('wiki-ms.txt') as fopen:
+    corpus = fopen.read()
+
+print(corpus[:1000])
+splitted = corpus.split()
+corpus = []
+for i in range(0, len(splitted), 50):
+    corpus.append(' '.join(splitted[i:i+50]))
+print(len(corpus))
+corpus = corpus[100000:300000]
+corpus = [cleaning(sentence) for sentence in corpus]
+
+print(len(corpus))
+corpus = [sentence for sentence in corpus if len(sentence) > 10]
+print(len(corpus))
+
+# In[3]:
+
+
+stride = 1
+t_range = int((len(corpus) - 3) / stride + 1)
+left, middle, right = [], [], []
+for i in range(t_range):
+    slices = corpus[i * stride : i * stride + 3]
+    left.append(slices[0])
+    middle.append(slices[1])
+    right.append(slices[2])
+
+
+# In[5]:
+
+
+len(left) == len(middle) == len(right)
+
+
+# In[6]:
+
+
+from sklearn.utils import shuffle
+left, middle, right = shuffle(left, middle, right)
+
+
+# In[ ]:
+
+
+_,_,dictionary = train_model(middle,left,right)
+with open('skip-wiki-dict.json', 'w') as fopen:
+    fopen.write(json.dumps(dictionary))
\ No newline at end of file
diff --git a/session/word2vec/wiki-256.py b/session/word2vec/wiki-256.py
new file mode 100644
index 00000000..078f505a
--- /dev/null
+++ b/session/word2vec/wiki-256.py
@@ -0,0 +1,103 @@
+
+# coding: utf-8
+
+# In[1]:
+
+
+import word2vec
+import numpy as np
+import tensorflow as tf
+import json
+import os
+import re
+from unidecode import unidecode
+os.environ['CUDA_VISIBLE_DEVICES'] = ''
+
+
+# In[2]:
+
+
+with open('wiki-ms.txt') as fopen:
+    sentences = fopen.read()
+
+
+def cleaning(string):
+    string = re.sub(
+        'http\S+|www.\S+',
+        '',
+        ' '.join(
+            [i for i in string.split() if i.find('#') < 0 and i.find('@') < 0]
+        ),
+    )
+    string = unidecode(string).replace('.', '. ').replace(',', ', ')
+    string = re.sub('[^A-Za-z ]+', ' ', string)
+    string = re.sub(r'[ ]+', ' ', string).strip()
+    string = ' '.join(
+            [
+                i
+                for i in re.findall('[\\w\']+|[;:\-\(\)&.,!?"]', string)
+                if len(i)
+            ]
+        )
+    return string.lower()
+# In[3]:
+
+sentences = cleaning(sentences).split()
+
+word_array, dictionary, rev_dictionary, num_lines, num_words = word2vec.build_word_array(sentences,vocab_size=1000000)
+
+
+# In[4]:
+
+
+len(dictionary)
+
+
+# In[5]:
+
+
+X, Y = word2vec.build_training_set(word_array)
+graph_params = {'batch_size': 32,
+                'vocab_size': np.max(X)+1,
+                'embed_size': 256,
+                'hid_size': 256,
+                'neg_samples': 128,
+                'learn_rate': 0.01,
+                'momentum': 0.9,
+                'embed_noise': 0.1,
+                'hid_noise': 0.3,
+                'epoch':10,
+                'optimizer': 'Momentum'}
+
+
+# In[6]:
+
+
+split = round(X.shape[0]*0.9)
+train_X, train_Y = X[:split, :], Y[:split, :]
+test_X, test_Y = X[split:, :], Y[split:, :]
+
+
+# In[7]:
+
+
+model = word2vec.Model(graph_params)
+print('model built, vocab size %d, document length %d'%(np.max(X)+1, len(word_array)))
+
+
+# In[ ]:
+
+
+embed_weights, nce_weights = model.train(train_X, train_Y, test_X, test_Y,
+                                         graph_params['epoch'],
+                                         graph_params['batch_size'])
+
+
+# In[ ]:
+
+
+import pickle
+with open('word2vec-wiki-256.p', 'wb') as fopen:
+    pickle.dump({'dictionary':dictionary,'rev_dictionary':rev_dictionary,
+                 'embed_weights':embed_weights,'nce_weights':nce_weights}, fopen)
+
diff --git a/session/word2vec/word2vec.py b/session/word2vec/word2vec.py
index 7e0c057b..1d1c5154 100644
--- a/session/word2vec/word2vec.py
+++ b/session/word2vec/word2vec.py
@@ -5,33 +5,38 @@
 from sklearn.utils import shuffle
 from sklearn.manifold import TSNE
 from scipy.spatial.distance import cdist
+from tqdm import tqdm
+
 
 def counter_words(sentences):
     word_counter = collections.Counter()
     word_list = []
     num_lines, num_words = (0, 0)
     for i in sentences:
-        words = re.findall("[\\w']+|[;:\-\(\)&.,!?\"]", i)
+        words = re.findall('[\\w\']+|[;:\-\(\)&.,!?"]', i)
         word_counter.update(words)
         word_list.extend(words)
         num_lines += 1
         num_words += len(words)
     return word_counter, word_list, num_lines, num_words
 
-def build_dict(word_counter, vocab_size=50000):
-    count = [['UNK', 0]]
+
+def build_dict(word_counter, vocab_size = 50000):
+    count = [['PAD', 0], ['UNK', 1], ['START', 2], ['END', 3]]
     count.extend(word_counter.most_common(vocab_size))
     dictionary = dict()
     for word, _ in count:
         dictionary[word] = len(dictionary)
     return dictionary, {word: idx for idx, word in dictionary.items()}
 
+
 def doc2num(word_list, dictionary):
     word_array = []
     unknown_val = len(dictionary)
     for word in word_list:
         word_array.append(dictionary.get(word, unknown_val))
-    return np.array(word_array, dtype=np.int32)
+    return np.array(word_array, dtype = np.int32)
+
 
 def build_word_array(sentences, vocab_size):
     word_counter, word_list, num_lines, num_words = counter_words(sentences)
@@ -39,97 +44,125 @@ def build_word_array(sentences, vocab_size):
     word_array = doc2num(word_list, dictionary)
     return word_array, dictionary, rev_dictionary, num_lines, num_words
 
+
 def build_training_set(word_array):
     num_words = len(word_array)
-    x = np.zeros((num_words-4, 4), dtype=np.int32)
-    y = np.zeros((num_words-4, 1), dtype=np.int32)
-    shift = np.array([-2, -1, 1, 2], dtype=np.int32)
-    for idx in range(2, num_words-2):
-        y[idx-2, 0] = word_array[idx]
-        x[idx-2, :] = word_array[idx+shift]
+    x = np.zeros((num_words - 4, 4), dtype = np.int32)
+    y = np.zeros((num_words - 4, 1), dtype = np.int32)
+    shift = np.array([-2, -1, 1, 2], dtype = np.int32)
+    for idx in range(2, num_words - 2):
+        y[idx - 2, 0] = word_array[idx]
+        x[idx - 2, :] = word_array[idx + shift]
     return x, y
 
+
 class Model:
     def __init__(self, graph_params):
         g_params = graph_params
         tf.reset_default_graph()
         self.sess = tf.InteractiveSession()
-        self.X = tf.placeholder(tf.int64, shape=[None, 4])
-        self.Y = tf.placeholder(tf.int64, shape=[None, 1])
-        w_m2, w_m1, w_p1, w_p2 = tf.unstack(self.X, axis=1)
-        self.embed_weights = tf.Variable(tf.random_uniform([g_params['vocab_size'],g_params['embed_size']],
-                                                            -g_params['embed_noise'],g_params['embed_noise']))
+        self.X = tf.placeholder(tf.int64, shape = [None, 4])
+        self.Y = tf.placeholder(tf.int64, shape = [None, 1])
+        w_m2, w_m1, w_p1, w_p2 = tf.unstack(self.X, axis = 1)
+        self.embed_weights = tf.Variable(
+            tf.random_uniform(
+                [g_params['vocab_size'], g_params['embed_size']],
+                -g_params['embed_noise'],
+                g_params['embed_noise'],
+            )
+        )
         embed_m2 = tf.nn.embedding_lookup(self.embed_weights, w_m2)
         embed_m1 = tf.nn.embedding_lookup(self.embed_weights, w_m1)
         embed_p1 = tf.nn.embedding_lookup(self.embed_weights, w_p1)
         embed_p2 = tf.nn.embedding_lookup(self.embed_weights, w_p2)
-        embed_stack = tf.concat([embed_m2, embed_m1, embed_p1, embed_p2],1)
-        hid_weights = tf.Variable(tf.random_normal([g_params['embed_size'] * 4,
-                                                    g_params['hid_size']],
-                                                   stddev=g_params['hid_noise']/(g_params['embed_size'] * 4)**0.5))
+        embed_stack = tf.concat([embed_m2, embed_m1, embed_p1, embed_p2], 1)
+        hid_weights = tf.Variable(
+            tf.random_normal(
+                [g_params['embed_size'] * 4, g_params['hid_size']],
+                stddev = g_params['hid_noise']
+                / (g_params['embed_size'] * 4) ** 0.5,
+            )
+        )
         hid_bias = tf.Variable(tf.zeros([g_params['hid_size']]))
         hid_out = tf.nn.tanh(tf.matmul(embed_stack, hid_weights) + hid_bias)
-        self.nce_weights = tf.Variable(tf.random_normal([g_params['vocab_size'],
-                                                         g_params['hid_size']],
-                                                        stddev=1.0 / g_params['hid_size'] ** 0.5))
+        self.nce_weights = tf.Variable(
+            tf.random_normal(
+                [g_params['vocab_size'], g_params['hid_size']],
+                stddev = 1.0 / g_params['hid_size'] ** 0.5,
+            )
+        )
         nce_bias = tf.Variable(tf.zeros([g_params['vocab_size']]))
-        self.cost = tf.reduce_mean(tf.nn.nce_loss(self.nce_weights, nce_bias,
-                                                  inputs=hid_out, labels=self.Y,
-                                                  num_sampled=g_params['neg_samples'],
-                                                  num_classes=g_params['vocab_size'],
-                                                  num_true=1, remove_accidental_hits=True))
-        self.logits = tf.argmax(tf.matmul(hid_out,self.nce_weights, transpose_b=True) + nce_bias, axis=1)
+        self.cost = tf.reduce_mean(
+            tf.nn.nce_loss(
+                self.nce_weights,
+                nce_bias,
+                inputs = hid_out,
+                labels = self.Y,
+                num_sampled = g_params['neg_samples'],
+                num_classes = g_params['vocab_size'],
+                num_true = 1,
+                remove_accidental_hits = True,
+            )
+        )
+        self.logits = tf.argmax(
+            tf.matmul(hid_out, self.nce_weights, transpose_b = True) + nce_bias,
+            axis = 1,
+        )
         if g_params['optimizer'] == 'RMSProp':
-            self.optimizer = tf.train.RMSPropOptimizer(g_params['learn_rate']).minimize(self.cost)
+            self.optimizer = tf.train.RMSPropOptimizer(
+                g_params['learn_rate']
+            ).minimize(self.cost)
         elif g_params['optimizer'] == 'Momentum':
-            self.optimizer = tf.train.MomentumOptimizer(g_params['learn_rate'],
-                                                        g_params['momentum']).minimize(self.cost)
+            self.optimizer = tf.train.MomentumOptimizer(
+                g_params['learn_rate'], g_params['momentum']
+            ).minimize(self.cost)
         elif g_params['optimizer'] == 'Adam':
-            self.optimizer = tf.train.AdamOptimizer(g_params['learn_rate']).minimize(self.cost)
+            self.optimizer = tf.train.AdamOptimizer(
+                g_params['learn_rate']
+            ).minimize(self.cost)
         else:
             print('Optimizer not supported,exit.')
         self.sess.run(tf.global_variables_initializer())
-            
-    def train(self,X, Y, X_val, Y_val,epoch,batch_size):
-        num_batches = len(X) // batch_size
-        avg_loss, avg_loss_count, batch_count = (0, 0, 0)
-        e_train, e_val = ([], [])
-        for i in range(1, epoch+1):
-            avg_loss, avg_loss_count = (0, 0)
+
+    def train(self, X, Y, X_val, Y_val, epoch, batch_size):
+        for i in range(epoch):
             X, Y = shuffle(X, Y)
-            for batch in range(num_batches):
-                bot_idx = batch * batch_size
-                top_idx = bot_idx + batch_size
-                feed_dict = {self.X: X[bot_idx:top_idx, :],self.Y: Y[bot_idx:top_idx, :]}
-                _, loss = self.sess.run([self.optimizer,self.cost],feed_dict=feed_dict)
-                avg_loss += loss
-                avg_loss_count += 1
-                batch_count += 1
-            num_batches = X_val.shape[0] // batch_size
-            avg_loss = avg_loss / avg_loss_count
-            e_train.append(avg_loss)
-            val_loss = 0
-            for batch in range(num_batches):
-                bot_idx = batch * batch_size
-                top_idx = bot_idx + batch_size
-                feed_dict = {self.X: X_val[bot_idx:top_idx, :],
-                             self.Y: Y_val[bot_idx:top_idx, :]}
-                val_loss += self.sess.run(self.cost, feed_dict=feed_dict)
-            val_loss = val_loss / num_batches 
-            e_val.append(val_loss)
-            print('epoch %d, total batch %d, train loss %f, val loss %f'%(i,batch_count,avg_loss, val_loss))
+            pbar = tqdm(
+                range(0, len(X), batch_size), desc = 'train minibatch loop'
+            )
+            for batch in pbar:
+                feed_dict = {
+                    self.X: X[batch : min(batch + batch_size, len(X))],
+                    self.Y: Y[batch : min(batch + batch_size, len(X))],
+                }
+                _, loss = self.sess.run(
+                    [self.optimizer, self.cost], feed_dict = feed_dict
+                )
+                pbar.set_postfix(cost = loss)
+
+            pbar = tqdm(
+                range(0, len(X_val), batch_size), desc = 'test minibatch loop'
+            )
+            for batch in pbar:
+                feed_dict = {
+                    self.X: X_val[batch : min(batch + batch_size, len(X_val))],
+                    self.Y: Y_val[batch : min(batch + batch_size, len(X_val))],
+                }
+                loss = self.sess.run(self.cost, feed_dict = feed_dict)
+                pbar.set_postfix(cost = loss)
         return self.embed_weights.eval(), self.nce_weights.eval()
 
+
 class Word2Vec:
-    def __init__(self,embed_matrix, dictionary):
+    def __init__(self, embed_matrix, dictionary):
         self._embed_matrix = embed_matrix
         self._dictionary = dictionary
         self._reverse_dictionary = {v: k for k, v in dictionary.items()}
-   
+
     def get_vector_by_name(self, word):
         return np.ravel(self._embed_matrix[self._dictionary[word], :])
-    
-    def n_closest(self, word, num_closest=5, metric='cosine'):
+
+    def n_closest(self, word, num_closest = 5, metric = 'cosine'):
         wv = self.get_vector_by_name(word)
         closest_indices = self.closest_row_indices(wv, num_closest + 1, metric)
         word_list = []
@@ -138,13 +171,15 @@ def n_closest(self, word, num_closest=5, metric='cosine'):
         if word in word_list:
             word_list.remove(word)
         return word_list
-    
+
     def closest_row_indices(self, wv, num, metric):
-        dist_array = np.ravel(cdist(self._embed_matrix, wv.reshape((1, -1)),metric=metric))
+        dist_array = np.ravel(
+            cdist(self._embed_matrix, wv.reshape((1, -1)), metric = metric)
+        )
         sorted_indices = np.argsort(dist_array)
         return sorted_indices[:num]
-    
-    def analogy(self, a, b, c, num=1, metric='cosine'):
+
+    def analogy(self, a, b, c, num = 1, metric = 'cosine'):
         va = self.get_vector_by_name(a)
         vb = self.get_vector_by_name(b)
         vc = self.get_vector_by_name(c)
@@ -156,7 +191,7 @@ def analogy(self, a, b, c, num=1, metric='cosine'):
         return d_word_list
 
     def project_2d(self, start, end):
-        tsne = TSNE(n_components=2)
+        tsne = TSNE(n_components = 2)
         embed_2d = tsne.fit_transform(self._embed_matrix[start:end, :])
         word_list = []
         for i in range(start, end):