diff --git a/demo.py b/demo.py
index a383823..2984946 100644
--- a/demo.py
+++ b/demo.py
@@ -10,7 +10,7 @@
 from tqdm import tqdm
 
 from dh_segment.io import PAGE
-from dh_segment.network import LoadedModel
+from dh_segment.inference import LoadedModel
 from dh_segment.post_processing import boxes_detection, binarization
 
 # To output results in PAGE XML format (http://www.primaresearch.org/schema/PAGE/gts/pagecontent/2013-07-15/)
@@ -89,14 +89,17 @@ def format_quad_to_string(quad):
                 cv2.polylines(original_img, [pred_page_coords[:, None, :]], True, (0, 0, 255), thickness=5)
                 # Write corners points into a .txt file
                 txt_coordinates += '{},{}\n'.format(filename, format_quad_to_string(pred_page_coords))
+
+                # Create page region and XML file
+                page_border = PAGE.Border(coords=PAGE.Point.cv2_to_point_list(pred_page_coords[:, None, :]))
             else:
                 print('No box found in {}'.format(filename))
+                page_border = PAGE.Border()
+
             basename = os.path.basename(filename).split('.')[0]
             imsave(os.path.join(output_dir, '{}_boxes.jpg'.format(basename)), original_img)
 
-            # Create page region and XML file
-            page_border = PAGE.Border(coords=PAGE.Point.cv2_to_point_list(pred_page_coords[:, None, :]))
-            page_xml = PAGE.Page(filename, image_width=original_shape[1], image_height=original_shape[0],
+            page_xml = PAGE.Page(image_filename=filename, image_width=original_shape[1], image_height=original_shape[0],
                                  page_border=page_border)
             xml_filename = os.path.join(output_pagexml_dir, '{}.xml'.format(basename))
             page_xml.write_to_file(xml_filename, creator_name='PageExtractor')
diff --git a/demo/interactive_demo.ipynb b/demo/interactive_demo.ipynb
new file mode 100644
index 0000000..a7c6df5
--- /dev/null
+++ b/demo/interactive_demo.ipynb
@@ -0,0 +1,347 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Interactive demo to load a trained model for page extraction and apply it to a randomly selected file"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### 1. Get the annotated sample dataset, which already contains the folders images and labels. Unzip it into `demo/pages_sample`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "! wget https://github.com/dhlab-epfl/dhSegment/releases/download/untagged-b55f9aa4fff5efd4b1b8/pages_sample.zip\n",
+    "! unzip pages_sample.zip"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### 2. Download the provided model (download and unzip it in `demo/model`)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "! wget https://github.com/dhlab-epfl/dhSegment/releases/download/v0.2/model.zip\n",
+    "! unzip model.zip"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### 3. Run the code step by step"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import cv2\n",
+    "from glob import glob\n",
+    "import numpy as np\n",
+    "import random\n",
+    "import tensorflow as tf\n",
+    "from imageio import imread, imsave"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "%matplotlib inline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from dh_segment.io import PAGE\n",
+    "from dh_segment.inference import LoadedModel\n",
+    "from dh_segment.post_processing import boxes_detection, binarization"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def page_make_binary_mask(probs: np.ndarray, threshold: float=-1) -> np.ndarray:\n",
+    "    \"\"\"\n",
+    "    Computes the binary mask of the detected Page from the probabilities outputed by network\n",
+    "    :param probs: array with values in range [0, 1]\n",
+    "    :param threshold: threshold between [0 and 1], if negative Otsu's adaptive threshold will be used\n",
+    "    :return: binary mask\n",
+    "    \"\"\"\n",
+    "\n",
+    "    mask = binarization.thresholding(probs, threshold)\n",
+    "    mask = binarization.cleaning_binary(mask, kernel_size=5)\n",
+    "    return mask"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Define input and output directories / files"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model_dir = 'page_model/export'\n",
+    "if not os.path.exists(model_dir):\n",
+    "    model_dir = 'model/'\n",
+    "assert(os.path.exists(model_dir))\n",
+    "\n",
+    "input_files = glob(os.path.join('pages_sample', 'images/*'))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "output_dir = './processed_images'\n",
+    "os.makedirs(output_dir, exist_ok=True)\n",
+    "# PAGE XML format output\n",
+    "output_pagexml_dir = os.path.join(output_dir, 'page_xml')\n",
+    "os.makedirs(output_pagexml_dir, exist_ok=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Start a tensorflow session"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "session = tf.InteractiveSession()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Select a random image"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "file_to_process = random.sample(input_files, 1)[0]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Load the model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "m = LoadedModel(model_dir, predict_mode='filename')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Predict each pixel's label"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# For each image, predict each pixel's label\n",
+    "prediction_outputs = m.predict(file_to_process)\n",
+    "probs = prediction_outputs['probs'][0]\n",
+    "original_shape = prediction_outputs['original_shape']\n",
+    "\n",
+    "probs = probs[:, :, 1]  # Take only class '1' (class 0 is the background, class 1 is the page)\n",
+    "probs = probs / np.max(probs)  # Normalize to be in [0, 1]\n",
+    "\n",
+    "# Binarize the predictions\n",
+    "page_bin = page_make_binary_mask(probs)\n",
+    "\n",
+    "# Upscale to have full resolution image (cv2 uses (w,h) and not (h,w) for giving shapes)\n",
+    "bin_upscaled = cv2.resize(page_bin.astype(np.uint8, copy=False),\n",
+    "                          tuple(original_shape[::-1]), interpolation=cv2.INTER_NEAREST)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Show the probability map and binarized mask"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.figure(figsize=(10,10))\n",
+    "plt.subplot(1,2,1)\n",
+    "plt.imshow(probs, cmap='gray')\n",
+    "plt.axis('off')\n",
+    "plt.title('Probability map')\n",
+    "plt.subplot(1,2,2)\n",
+    "plt.imshow(page_bin, cmap='gray')\n",
+    "plt.axis('off')\n",
+    "plt.title('Binary mask')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Find quadrilateral enclosing the page"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pred_page_coords = boxes_detection.find_boxes(bin_upscaled.astype(np.uint8, copy=False),\n",
+    "                                              mode='min_rectangle', n_max_boxes=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Draw page box on original image and export it. Add also box coordinates to the txt file\n",
+    "original_img = imread(file_to_process, pilmode='RGB')\n",
+    "if pred_page_coords is not None:\n",
+    "    cv2.polylines(original_img, [pred_page_coords[:, None, :]], True, (0, 0, 255), thickness=5)\n",
+    "else:\n",
+    "    print('No box found in {}'.format(filename))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.figure(figsize=(10,10))\n",
+    "plt.imshow(original_img)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Export image and create page region and XML file"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "basename = os.path.basename(file_to_process).split('.')[0]\n",
+    "imsave(os.path.join(output_dir, '{}_boxes.jpg'.format(basename)), original_img)\n",
+    "\n",
+    "page_border = PAGE.Border(coords=PAGE.Point.cv2_to_point_list(pred_page_coords[:, None, :]))\n",
+    "page_xml = PAGE.Page(image_filename=file_to_process, image_width=original_shape[1], image_height=original_shape[0], page_border=page_border)\n",
+    "xml_filename = os.path.join(output_pagexml_dir, '{}.xml'.format(basename))\n",
+    "page_xml.write_to_file(xml_filename, creator_name='PageExtractor')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### 4. Have a look at the results in ``demo/processed_images``"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python [conda env:dhsegment]",
+   "language": "python",
+   "name": "conda-env-dhsegment-py"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.5.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/dh_segment/estimator_fn.py b/dh_segment/estimator_fn.py
index d25303b..37d92bc 100644
--- a/dh_segment/estimator_fn.py
+++ b/dh_segment/estimator_fn.py
@@ -2,7 +2,6 @@
 from .utils import PredictionType, ModelParams, TrainingParams, \
     class_to_label_image, multiclass_to_label_image
 import numpy as np
-from .network.model import inference_resnet_v1_50, inference_vgg16, inference_u_net
 
 
 def model_fn(mode, features, labels, params):
@@ -18,45 +17,23 @@ def model_fn(mode, features, labels, params):
         input_images = tf.pad(input_images, [[0, 0], [margin, margin], [margin, margin], [0, 0]],
                               mode='SYMMETRIC', name='mirror_padding')
 
-    if model_params.pretrained_model_name == 'vgg16':
-        network_output = inference_vgg16(input_images,
-                                         model_params,
-                                         model_params.n_classes,
-                                         use_batch_norm=model_params.batch_norm,
-                                         weight_decay=model_params.weight_decay,
-                                         is_training=(mode == tf.estimator.ModeKeys.TRAIN)
-                                         )
-        key_restore_model = 'vgg_16'
+    encoder_class = model_params.get_encoder()
+    encoder = encoder_class(**model_params.encoder_network_params)
+    decoder_class = model_params.get_decoder()
+    decoder = decoder_class(**model_params.decoder_network_params)
 
-    elif model_params.pretrained_model_name == 'resnet50':
-        network_output = inference_resnet_v1_50(input_images,
-                                                model_params,
-                                                model_params.n_classes,
-                                                use_batch_norm=model_params.batch_norm,
-                                                weight_decay=model_params.weight_decay,
-                                                is_training=(mode == tf.estimator.ModeKeys.TRAIN)
-                                                )
-        key_restore_model = 'resnet_v1_50'
-    elif model_params.pretrained_model_name == 'unet':
-        network_output = inference_u_net(input_images,
-                                         model_params,
-                                         model_params.n_classes,
-                                         use_batch_norm=model_params.batch_norm,
-                                         weight_decay=model_params.weight_decay,
-                                         is_training=(mode == tf.estimator.ModeKeys.TRAIN)
-                                         )
-        key_restore_model = None
-    else:
-        raise NotImplementedError
+    is_training = (mode == tf.estimator.ModeKeys.TRAIN)
+    feature_maps = encoder(input_images, is_training=is_training)
+    network_output = decoder(feature_maps, num_classes=model_params.n_classes, is_training=is_training)
 
     if mode == tf.estimator.ModeKeys.TRAIN:
-        if key_restore_model is not None:
+        pretrained_file, pretrained_vars = encoder.pretrained_information()
+        if pretrained_file:
             # Pretrained weights as initialization
-            pretrained_restorer = tf.train.Saver(var_list=[v for v in tf.global_variables()
-                                                           if key_restore_model in v.name])
+            pretrained_restorer = tf.train.Saver(var_list=pretrained_vars)
 
             def init_fn(scaffold, session):
-                pretrained_restorer.restore(session, model_params.pretrained_model_file)
+                pretrained_restorer.restore(session, pretrained_file)
         else:
             init_fn = None
     else:
@@ -92,8 +69,10 @@ def init_fn(scaffold, session):
         if prediction_type == PredictionType.CLASSIFICATION:
             onehot_labels = tf.one_hot(indices=labels, depth=model_params.n_classes)
             with tf.name_scope("loss"):
-                per_pixel_loss = tf.nn.softmax_cross_entropy_with_logits(logits=network_output,
-                                                                         labels=onehot_labels, name='per_pixel_loss')
+                #per_pixel_loss = tf.nn.softmax_cross_entropy_with_logits(logits=network_output,
+                #                                                         labels=onehot_labels, name='per_pixel_loss')
+                per_pixel_loss = tf.nn.softmax_cross_entropy_with_logits_v2(logits=network_output,
+                                                                            labels=onehot_labels, name='per_pixel_loss')
                 if training_params.focal_loss_gamma > 0.0:
                     # Probability per pixel of getting the correct label
                     probs_correct_label = tf.reduce_max(tf.multiply(prediction_probs, onehot_labels))
@@ -207,14 +186,20 @@ def _fn(_in):
     # ----------
     if mode == tf.estimator.ModeKeys.EVAL:
         if prediction_type == PredictionType.CLASSIFICATION:
-            metrics = {'eval/accuracy': tf.metrics.accuracy(labels, predictions=prediction_labels)}
+            metrics = {
+                'eval/accuracy': tf.metrics.accuracy(labels, predictions=prediction_labels),
+                'eval/mIOU': tf.metrics.mean_iou(labels, prediction_labels, num_classes=model_params.n_classes,)
+                                                 # weights=tf.cast(training_params.weights_evaluation_miou, tf.float32))
+            }
         elif prediction_type == PredictionType.REGRESSION:
             metrics = {'eval/accuracy': tf.metrics.mean_squared_error(labels, predictions=prediction_labels)}
         elif prediction_type == PredictionType.MULTILABEL:
             metrics = {'eval/MSE': tf.metrics.mean_squared_error(tf.cast(labels, tf.float32),
                                                                  predictions=prediction_probs),
                        'eval/accuracy': tf.metrics.accuracy(tf.cast(labels, tf.bool),
-                                                            predictions=tf.cast(prediction_labels, tf.bool))
+                                                            predictions=tf.cast(prediction_labels, tf.bool)),
+                       'eval/mIOU': tf.metrics.mean_iou(labels, prediction_labels, num_classes=model_params.n_classes)
+                                                        # weights=training_params.weights_evaluation_miou)
                        }
     else:
         metrics = None
diff --git a/dh_segment/inference/loader.py b/dh_segment/inference/loader.py
index be64bc7..4949673 100644
--- a/dh_segment/inference/loader.py
+++ b/dh_segment/inference/loader.py
@@ -9,6 +9,16 @@
 
 
 class LoadedModel:
+    """
+    Loads an exported dhSegment model
+
+    :param model_base_dir: the model directory i.e. containing `saved_model.{pb|pbtxt}`. If not, it is assumed to \
+    be a TF exporter directory, and the latest export directory will be automatically selected.
+    :param predict_mode: defines the input/output format of the prediction output (see `.predict()`)
+    :param num_parallel_predictions: limits the number of conccurent calls of `predict` to avoid Out-Of-Memory \
+    issues if predicting on GPU
+    """
+
     def __init__(self, model_base_dir, predict_mode='filename', num_parallel_predictions=2):
         if os.path.exists(os.path.join(model_base_dir, 'saved_model.pbtxt')) or \
                 os.path.exists(os.path.join(model_base_dir, 'saved_model.pb')):
@@ -52,6 +62,29 @@ def __init__(self, model_base_dir, predict_mode='filename', num_parallel_predict
         self.sema = Semaphore(num_parallel_predictions)
 
     def predict(self, input_tensor, prediction_key=None):
+        """
+        Performs the prediction from the loaded model according to the prediction mode. \n
+        Prediction modes:
+
+        +-----------------------------+-----------------------------------------------+--------------------------------------+---------------------------------------------------------------------------------------------------+
+        | `prediction_mode`           | `input_tensor`                                | Output prediction dictionnary        | Comment                                                                                           |
+        +=============================+===============================================+======================================+===================================================================================================+
+        | `filename`                  | Single filename string                        | `labels`, `probs`, `original_shape`  | Loads the image, resizes it, and predicts                                                         |
+        +-----------------------------+-----------------------------------------------+--------------------------------------+---------------------------------------------------------------------------------------------------+
+        | `filename_original_shape`   | Single filename string                        | `labels`, `probs`                    | Loads the image, resizes it, predicts and scale the output to the original resolution of the file |
+        +-----------------------------+-----------------------------------------------+--------------------------------------+---------------------------------------------------------------------------------------------------+
+        | `image`                     | Single input image [1,H,W,3] float32 (0..255) | `labels`, `probs`, `original_shape`  | Resizes the image, and predicts                                                                   |
+        +-----------------------------+-----------------------------------------------+--------------------------------------+---------------------------------------------------------------------------------------------------+
+        | `image_original_shape`      | Single input image [1,H,W,3] float32 (0..255) | `labels`, `probs`                    | Resizes the image, predicts, and scale the output to the original resolution of the input         |
+        +-----------------------------+-----------------------------------------------+--------------------------------------+---------------------------------------------------------------------------------------------------+
+        | `image_resized`             | Single input image [1,H,W,3] float32 (0..255) | `labels`, `probs`                    | Predicts from the image input directly                                                            |
+        +-----------------------------+-----------------------------------------------+--------------------------------------+---------------------------------------------------------------------------------------------------+
+
+        :param input_tensor: a single input whose format should match the prediction mode
+        :param prediction_key: if not `None`, will returns the value of the corresponding key of the output dictionnary \
+        instead of the full dictionnary
+        :return: the prediction output
+        """
         with self.sema:
             if prediction_key:
                 desired_output = self._output_dict[prediction_key]
diff --git a/dh_segment/io/PAGE.py b/dh_segment/io/PAGE.py
index d9d5a11..5214735 100644
--- a/dh_segment/io/PAGE.py
+++ b/dh_segment/io/PAGE.py
@@ -8,6 +8,7 @@
 from uuid import uuid4
 from shapely.geometry import Polygon
 from abc import ABC
+import re
 
 # https://docs.python.org/3.5/library/xml.etree.elementtree.html#parsing-xml-with-namespaces
 _ns = {'p': 'http://schema.primaresearch.org/PAGE/gts/pagecontent/2013-07-15'}
@@ -17,7 +18,7 @@
 
 
 def _try_to_int(d: Optional[Union[str, int]])-> Optional[int]:
-    if isinstance(d, str):
+    if isinstance(d, (str, np.int32, np.int64)):
         return int(d)
     else:
         return d
@@ -54,6 +55,9 @@ def list_from_xml(cls, etree_elem: ET.Element) -> List['Point']:
         if etree_elem is None:
             # print('warning, trying to construct list of points from None, defaulting to []')
             return []
+        if etree_elem.attrib['points'] == "":
+            # print('warning, trying to construct list of points from empty string, defaulting to []')
+            return []
         t = etree_elem.attrib['points']
         result = []
         for p in t.split(' '):
@@ -99,12 +103,21 @@ def array_to_list(cls, array: np.ndarray) -> list:
         """
         return [list(pt) for pt in array]
 
+    @classmethod
+    def array_to_point(cls, array: np.ndarray) -> list:
+        """Converts an `np.array` to a list of `Point`
+
+        :param array: an array of coordinates. Must be of shape (N, 2)
+        :return: list of `Point`
+        """
+        return cls.list_to_point(list(array))
+
     @classmethod
     def list_to_point(cls, list_coords: list) -> List['Point']:
         """Converts a list of coordinates to a list of `Point`
 
         :param list_coords: list of coordinates, shape (N, 2)
-        :return: list of Points
+        :return: list of `Point`
         """
         return [cls(coord[1], coord[0]) for coord in list_coords if list_coords]
 
@@ -160,12 +173,15 @@ class Region(BaseElement):
 
     :ivar id: identifier of the `Region`
     :ivar coords: coordinates of the `Region`
+    :ivar custom_attribute: Any custom attribute that may be linked with the region
+        (usually this is added in PAGEXML files, not in JSON files)
     """
     tag = 'Region'
 
-    def __init__(self, id: str=None, coords: List[Point]=None):
+    def __init__(self, id: str=None, coords: List[Point]=None, custom_attribute: str=None):
         self.coords = coords if coords is not None else []
         self.id = id
+        self.custom_attribute = custom_attribute if custom_attribute is not None else ''
 
     @classmethod
     def from_xml(cls, etree_element: ET.Element) -> dict:
@@ -175,6 +191,7 @@ def from_xml(cls, etree_element: ET.Element) -> dict:
         :return: a dictionary with keys 'id' and 'coords'
         """
         return {'id': etree_element.attrib.get('id'),
+                'custom_attribute': etree_element.attrib.get('custom'),
                 'coords': Point.list_from_xml(etree_element.find('p:Coords', _ns))}
 
     def to_xml(self, name_element: str=None) -> ET.Element:
@@ -185,6 +202,7 @@ def to_xml(self, name_element: str=None) -> ET.Element:
         """
         et = ET.Element(name_element if name_element is not None else '')
         et.set('id', self.id if self.id is not None else '')
+        et.set('custom', self.custom_attribute if self.custom_attribute is not None else '')
         if not not self.coords:
             coords = ET.SubElement(et, 'Coords')
             coords.set('points', Point.list_point_to_string(self.coords))
@@ -209,6 +227,7 @@ def from_dict(cls, dictionary: dict) -> dict:
         :return: non serialized dictionary
         """
         return {'id': dictionary.get('id'),
+                'custom_attribute': dictionary.get('custom_attribute'),
                 'coords': Point.list_to_point(dictionary.get('coords'))
                 }
 
@@ -222,13 +241,14 @@ class TextLine(Region):
     :ivar text: `Text` class containing the transcription of the `TextLine`
     :ivar line_group_id: identifier of the line group the instance belongs to
     :ivar column_group_id: identifier of the column group the instance belongs to
-
+    :ivar custom_attribute: Any custom attribute that may be linked with the region
+        (usually this is added in PAGEXML files, not in JSON files)
     """
     tag = 'TextLine'
 
     def __init__(self, id: str = None, coords: List[Point] = None, baseline: List[Point] = None, text: Text = None,
-                 line_group_id: str = None, column_group_id: str = None):
-        super().__init__(id=id if id is not None else str(uuid4()), coords=coords)
+                 line_group_id: str = None, column_group_id: str = None, custom_attribute: str=None):
+        super().__init__(id=id if id is not None else str(uuid4()), coords=coords, custom_attribute=custom_attribute)
         self.baseline = baseline if baseline is not None else []
         self.text = text if text is not None else Text()
         self.line_group_id = line_group_id if line_group_id is not None else ''
@@ -321,13 +341,29 @@ class TextRegion(Region):
     :ivar coords: coordinates of the `TextRegion`
     :ivar text_equiv: the resulting text of the `Text` contained in the `TextLines`
     :ivar text_lines: a list of `TextLine` objects
+    :ivar region_type: the type of a TextRegion (can be any string). Example : header, paragraph, page-number...
+    :ivar custom_attribute: Any custom attribute that may be linked with the region
+        (usually this is added in PAGEXML files, not in JSON files)
     """
     tag = 'TextRegion'
 
-    def __init__(self, id: str=None, coords: List[Point]=None, text_lines: List[TextLine]=None, text_equiv: str=''):
-        super().__init__(id=id, coords=coords)
+    def __init__(self, id: str=None, coords: List[Point]=None, text_lines: List[TextLine]=None, text_equiv: str='',
+                 region_type: str=None, custom_attribute: str=None):
+        super().__init__(id=id, coords=coords, custom_attribute=custom_attribute)
         self.text_equiv = text_equiv if text_equiv is not None else ''
         self.text_lines = text_lines if text_lines is not None else []
+        self.type = region_type if region_type is not None else ''
+
+    def sort_text_lines(self, top_to_bottom: bool=True) -> None:
+        """
+        Sorts ``TextLine`` from top to bottom according to their mean y coordinate (centroid)
+        
+        :param top_to_bottom: order lines from top to bottom of image, default=True
+        """
+        if top_to_bottom:
+            self.text_lines.sort(key=lambda line: np.mean([c.y for c in line.coords]))
+        else:
+            raise NotImplementedError
 
     @classmethod
     def from_xml(cls, e: ET.Element) -> 'TextRegion':
@@ -335,11 +371,14 @@ def from_xml(cls, e: ET.Element) -> 'TextRegion':
         return TextRegion(
             **super().from_xml(e),
             text_lines=[TextLine.from_xml(tl) for tl in e.findall('p:TextLine', _ns)],
-            text_equiv=_get_text_equiv(e)
+            text_equiv=_get_text_equiv(e),
+            region_type=e.attrib.get('type')
         )
 
     def to_xml(self, name_element='TextRegion') -> ET.Element:
         text_et = super().to_xml(name_element=name_element)
+        if self.type is not None and self.type != '':
+            text_et.set('type', self.type)
         for tl in self.text_lines:
             text_et.append(tl.to_xml())
         text_equiv = ET.SubElement(text_et, 'TextEquiv')
@@ -355,7 +394,8 @@ def to_dict(self, non_serializable_keys: List[str]=list()):
     def from_dict(cls, dictionary: dict) -> 'TextRegion':
         return cls(**super().from_dict(dictionary),
                    text_lines=[TextLine.from_dict(tl) for tl in dictionary.get('text_lines', list())],
-                   text_equiv=dictionary.get('text_equiv')
+                   text_equiv=dictionary.get('text_equiv'),
+                   region_type=dictionary.get('region_type')
                    )
 
 
@@ -403,7 +443,7 @@ def from_dict(cls, dictionary: dict) -> 'TableRegion':
         return cls(**super().from_dict(dictionary),
                    rows=dictionary.get('rows'),
                    columns=dictionary.get('columns'),
-                   embeded_text=dictionary.get('embeded_text'))
+                   embedded_text=dictionary.get('embedded_text'))
 
 
 class SeparatorRegion(Region):
@@ -418,8 +458,8 @@ class SeparatorRegion(Region):
 
     tag = 'SeparatorRegion'
 
-    def __init__(self, id: str, coords: List[Point]=None):
-        super().__init__(id=id, coords=coords)
+    def __init__(self, id: str, coords: List[Point]=None, custom_attribute: str=None):
+        super().__init__(id=id, coords=coords, custom_attribute=custom_attribute)
 
     @classmethod
     def from_xml(cls, e: ET.Element) -> 'SeparatorRegion':
@@ -547,7 +587,8 @@ def __init__(self, id: str = None, coords: List[Point] = None, segment_ids: List
 
     @classmethod
     def from_dict(cls, dictionary: dict) -> 'GroupSegment':
-        return cls(**super().from_dict(dictionary))
+        return cls(**super().from_dict(dictionary),
+                   segment_ids=dictionary.get('segment_ids'))
 
 
 class Page(BaseElement):
@@ -573,7 +614,7 @@ class Page(BaseElement):
 
     def __init__(self, **kwargs):
         self.image_filename = kwargs.get('image_filename')
-        self.image_width = _try_to_int(kwargs.get('image_width'))
+        self.image_width = _try_to_int(kwargs.get('image_width'))  # Needs to be int type (not np.int32/64)
         self.image_height = _try_to_int(kwargs.get('image_height'))
         self.text_regions = kwargs.get('text_regions', [])
         self.graphic_regions = kwargs.get('graphic_regions', [])
@@ -635,6 +676,14 @@ def to_xml(self) -> ET.Element:
         #     page_et.append(self.metadata.to_xml())
         return page_et
 
+    def to_json(self) -> dict:
+        self_dict = vars(self)
+
+        serializable_keys = ['image_filename', 'image_height', 'image_width']
+        json_dict = json_serialize(self_dict, [k for k in self_dict.keys() if k not in serializable_keys])
+
+        return json_dict
+
     def write_to_file(self, filename: str, creator_name: str='dhSegment', comments: str='') -> None:
         """
         Export Page object to json or page-xml format. Will assume the format based on the extension of the filename,
@@ -653,17 +702,11 @@ def _write_xml():
             root.append(self.to_xml())
             for k, v in _attribs.items():
                 root.attrib[k] = v
-            ET.ElementTree(element=root).write(filename)
+            ET.ElementTree(element=root).write(filename, encoding='utf-8')
 
         def _write_json():
-            self_dict = vars(self)
-
-            # json_dict = dict()
-            serializable_keys = ['image_filename', 'image_height', 'image_width']
-            json_dict = json_serialize(self_dict, [k for k in self_dict.keys() if k not in serializable_keys])
-
             with open(filename, 'w', encoding='utf8') as file:
-                json.dump(json_dict, file, indent=4, sort_keys=True, allow_nan=False)
+                json.dump(self.to_json(), file, indent=4, sort_keys=True, allow_nan=False)
 
         # Updating metadata
         self.metadata.creator = creator_name
@@ -1005,3 +1048,24 @@ def save_baselines(filename, baselines, ratio=(1, 1), initial_shape=None):
                 image_height=int(initial_shape[0]*ratio[0]) if initial_shape is not None else None,
                 image_width=int(initial_shape[1]*ratio[1]) if initial_shape is not None else None)
     page.write_to_file(filename)
+
+
+def get_unique_tags_from_xml_text_regions(xml_filename: str,
+                                          tag_pattern: str='{type:.*;}'):
+    """
+    Get a list of all the values of labels/tags
+
+    :param xml_filename: filename of the xml file
+    :param tag_pattern: regular expression pattern to look for in `TextRegion.custom_attribute`
+    :return:
+    """
+    tagset = list()
+    page = parse_file(xml_filename)
+    for tr in page.text_regions:
+        custom_attribute = tr.custom_attribute
+        matches = re.findall(tag_pattern, custom_attribute)
+        assert len(matches) <= 1, "Found multiple matches in {}".format(custom_attribute)
+        if matches:
+            tagset.append(matches[0][6:-2])
+
+    return list(np.unique(tagset))
diff --git a/dh_segment/io/__init__.py b/dh_segment/io/__init__.py
index ca0fabb..531ba66 100644
--- a/dh_segment/io/__init__.py
+++ b/dh_segment/io/__init__.py
@@ -63,6 +63,40 @@
     PAGE.json_serialize
 
 ----
+
+.. _ref_via:
+
+VGG Image Annotator helpers
+---------------------------
+
+
+**VIA objects**
+
+.. autosummary::
+    via.WorkingItem
+    via.VIAttribute
+
+
+**Creating masks with VIA annotations**
+
+.. autosummary::
+    via.load_annotation_data
+    via.export_annotation_dict
+    via.get_annotations_per_file
+    via.parse_via_attributes
+    via.get_via_attributes
+    via.collect_working_items
+    via.create_masks
+
+
+**Formatting in VIA JSON format**
+
+.. autosummary::
+    via.create_via_region_from_coordinates
+    via.create_via_annotation_single_image
+
+----
+
 """
 
 
@@ -103,3 +137,5 @@
 from .input import *
 from .input_utils import *
 from . import PAGE
+from . import via
+
diff --git a/dh_segment/io/input.py b/dh_segment/io/input.py
index 453ca2d..eaf5bad 100644
--- a/dh_segment/io/input.py
+++ b/dh_segment/io/input.py
@@ -17,9 +17,9 @@ class InputCase(Enum):
     INPUT_CSV = 'INPUT_CSV'
 
 
-def input_fn(input_data: Union[str, List[str]], params: dict, input_label_dir: str=None,
-             data_augmentation: bool=False, batch_size: int=5, make_patches: bool=False, num_epochs: int=1,
-             num_threads: int=4, image_summaries: bool=False):
+def input_fn(input_data: Union[str, List[str]], params: dict, input_label_dir: str = None,
+             data_augmentation: bool = False, batch_size: int = 5, make_patches: bool = False, num_epochs: int = 1,
+             num_threads: int = 4, image_summaries: bool = False, progressbar_description: str = 'Dataset'):
     """
     Input_fn for estimator
     
@@ -33,6 +33,7 @@ def input_fn(input_data: Union[str, List[str]], params: dict, input_label_dir: s
     :param num_epochs: number of epochs to cycle trough data (set it to None for infinite repeat)
     :param num_threads: number of thread to use in parallele when usin tf.data.Dataset.map
     :param image_summaries: boolean, whether to make tf.Summary to watch on tensorboard
+    :param progressbar_description: what will appear in the progressbar showing the number of files read
     :return: fn
     """
     training_params = utils.TrainingParams.from_dict(params['training_params'])
@@ -96,8 +97,9 @@ def _scaling_and_patch_fn(input_image, label_image):
 
     # Data augmentation
     def _augment_data_fn(input_image, label_image): \
-        return data_augmentation_fn(input_image, label_image, training_params.data_augmentation_flip_lr,
-                                    training_params.data_augmentation_flip_ud, training_params.data_augmentation_color)
+            return data_augmentation_fn(input_image, label_image, training_params.data_augmentation_flip_lr,
+                                        training_params.data_augmentation_flip_ud,
+                                        training_params.data_augmentation_color)
 
     # Assign color to class id
     def _assign_color_to_class_id(input_image, label_image):
@@ -112,13 +114,14 @@ def _assign_color_to_class_id(input_image, label_image):
             output['weight_maps'] = local_entropy(tf.equal(label_image, 1),
                                                   sigma=training_params.local_entropy_sigma)
         return output
+
     # ---
 
     # Finding the list of images to be used
     if isinstance(input_data, list):
         input_case = InputCase.INPUT_LIST
         input_image_filenames = input_data
-        print('Found {} images'.format(len(input_image_filenames)))
+        #print('Found {} images'.format(len(input_image_filenames)))
 
     elif os.path.isdir(input_data):
         input_case = InputCase.INPUT_DIR
@@ -126,13 +129,14 @@ def _assign_color_to_class_id(input_image, label_image):
                                      recursive=True) + \
                                 glob(os.path.join(input_data, '**', '*.png'),
                                      recursive=True)
-        print('Found {} images'.format(len(input_image_filenames)))
+        #print('Found {} images'.format(len(input_image_filenames)))
 
     elif os.path.isfile(input_data) and \
             input_data.endswith('.csv'):
         input_case = InputCase.INPUT_CSV
     else:
-        raise NotImplementedError('Input data should be a directory, a csv file or a list of filenames but got {}'.format(input_data))
+        raise NotImplementedError(
+            'Input data should be a directory, a csv file or a list of filenames but got {}'.format(input_data))
 
     # Finding the list of labelled images if available
     has_labelled_data = False
@@ -161,23 +165,24 @@ def _assign_color_to_class_id(input_image, label_image):
         if not os.path.exists(img_filename):
             raise FileNotFoundError(img_filename)
     if has_labelled_data:
-        for img_filename in input_image_filenames:
-            if not os.path.exists(img_filename):
-                raise FileNotFoundError(img_filename)
+        for label_filename in label_image_filenames:
+            if not os.path.exists(label_filename):
+                raise FileNotFoundError(label_filename)
 
     # Tensorflow input_fn
     def fn():
         if not has_labelled_data:
             encoded_filenames = [f.encode() for f in input_image_filenames]
-            dataset = tf.data.Dataset.from_generator(lambda: tqdm(encoded_filenames, desc='Dataset'),
+            dataset = tf.data.Dataset.from_generator(lambda: tqdm(encoded_filenames, desc=progressbar_description),
                                                      tf.string, tf.TensorShape([]))
             dataset = dataset.repeat(count=num_epochs)
             dataset = dataset.map(lambda filename: {'images': load_and_resize_image(filename, 3,
                                                                                     training_params.input_resized_size)})
         else:
             encoded_filenames = [(i.encode(), l.encode()) for i, l in zip(input_image_filenames, label_image_filenames)]
-            dataset = tf.data.Dataset.from_generator(lambda: tqdm(utils.shuffled(encoded_filenames), desc='Dataset'),
-                                                         (tf.string, tf.string), (tf.TensorShape([]), tf.TensorShape([])))
+            dataset = tf.data.Dataset.from_generator(lambda: tqdm(utils.shuffled(encoded_filenames),
+                                                                  desc=progressbar_description),
+                                                     (tf.string, tf.string), (tf.TensorShape([]), tf.TensorShape([])))
 
             dataset = dataset.repeat(count=num_epochs)
             dataset = dataset.map(_load_image_fn, num_threads).flat_map(_scaling_and_patch_fn)
@@ -193,6 +198,8 @@ def fn():
 
         if make_patches and input_label_dir:
             base_shape_images = list(training_params.patch_shape)
+        elif make_patches and input_case == InputCase.INPUT_CSV:
+            base_shape_images = list(training_params.patch_shape)
         else:
             base_shape_images = [-1, -1]
         # Pad things
diff --git a/dh_segment/io/via.py b/dh_segment/io/via.py
new file mode 100644
index 0000000..a42e85a
--- /dev/null
+++ b/dh_segment/io/via.py
@@ -0,0 +1,959 @@
+#!/usr/bin/env python
+# coding: utf-8 
+
+__author__ = "maudehrmann, solivr"
+__license__ = "GPL"
+
+import json
+import os
+import re
+from tqdm import tqdm
+import numpy as np
+from skimage import transform
+from collections import namedtuple
+from imageio import imsave, imread
+import requests
+from PIL import Image
+from itertools import filterfalse, chain
+from typing import List, Tuple, Dict
+import cv2
+from . import PAGE
+
+
+# To define before using the corresponding functions
+# iiif_password = os.environ["IIIF_PWD"]
+iiif_password = ''
+
+
+WorkingItem = namedtuple(
+    "WorkingItem", [
+        'collection',
+        'image_name',
+        'original_x',
+        'original_y',
+        'reduced_x',
+        'reduced_y',
+        'iiif',
+        'annotations'
+    ]
+)
+WorkingItem.__doc__ = """
+A container for annotated images.
+
+:param str collection: name of the collection
+:param str image_name: name of the image
+:param int original_x: original image x size (width)
+:param int original_y: original image y size (height)
+:param int reduced_x: resized x size
+:param int reduced_y: resized y size
+:param str iiif: iiif url
+:param dict annotations: VIA 'region_attributes'
+"""
+
+
+VIAttribute = namedtuple(
+    "VIAttribute", [
+        'name',
+        'type',
+        'options'
+    ]
+)
+VIAttribute.__doc__ = """
+A container for VIA attributes.
+
+:param str name: The name of attribute
+:param str type: The type of the annotation (dropdown, markbox, ...)
+:param list options: The options / labels possible for this attribute.
+"""
+
+
+def parse_via_attributes(via_attributes: dict) -> List[VIAttribute]:
+    """
+    Parses the VIA attribute dictionary and returns a list of VIAttribute instances
+
+    :param via_attributes: attributes from VIA annotation ('_via_attributes' field)
+    :return: list of ``VIAttribute``
+    """
+
+    if {'file', 'region'}.issubset(set(via_attributes.keys())):
+        via_attributes = via_attributes['region']
+
+    list_attributes = list()
+    for k, v in via_attributes.items():
+        if v['type'] == 'text':
+            print('WARNING : Please do not use text type for attributes because it is more prone to errors/typos which '
+                  'can make the parsing fail. Use instead "checkbox", "dropdown" or "radio" with defined options.')
+            options = None
+        else:
+            options = list(v['options'].keys())
+
+        list_attributes.append(VIAttribute(k,
+                                           v['type'],
+                                           options))
+
+    return list_attributes
+
+
+def get_annotations_per_file(via_dict: dict, name_file: str) -> dict:
+    """
+    From VIA json content, get annotations relative to the given `name_file`.
+
+    :param via_dict: VIA annotations content (originally json)
+    :param name_file: the file to look for (it can be a iiif path or a file path)
+    :return: dict
+    """
+
+    # Check that the annotation_dict is a "via_project" file (project export),
+    # or a "via_region" file (annotation export)
+    if '_via_img_metadata' in via_dict.keys():
+        annotation_dict = via_dict['_via_img_metadata']
+    else:
+        annotation_dict = via_dict
+
+    #  If it looks like a iiif path add "-1"
+    if 'http' in name_file:
+        key = name_file + "-1"
+    else:
+        # find the key that contains the name_file
+        list_keys = list(filterfalse(lambda x: name_file not in x, list(annotation_dict.keys())))
+        assert len(list_keys) == 1, "There is more than one key for the file '{} : \n{}'".format(name_file, list_keys)
+        key = list_keys[0]
+
+    if key in annotation_dict.keys():
+        myannotation = annotation_dict[key]
+        if name_file == myannotation['filename']:
+            return myannotation['regions']
+        else:
+            return None
+
+
+def _compute_reduced_dimensions(x: int, y: int, target_h: int=2000) -> Tuple[int, int]:
+    """
+    Compute new dimensions with height set to `target_h`.
+
+    :param x: height
+    :param y: width
+    :param target_h: target height
+    :return: tuple
+    """
+    ratio = y / x
+    target_w = int(target_h * ratio)
+    return target_h, target_w
+
+
+def _collect_working_items_from_local_images(via_annotations: dict, images_dir: str, collection_name: str) \
+        -> List[WorkingItem]:
+    """
+    Given VIA annotation input, collect all info on `WorkingItem` object, when images come from local files
+
+    :param via_annotations: via_annotations: via annotations ('regions' field)
+    :param images_dir: directory where to find the images
+    :param collection_name: name of the collection
+    :return:
+    """
+
+    def _formatting(name_id: str) -> str:
+        name_id = re.sub('.jpg\d*', '.jpg', name_id)
+        name_id = re.sub('.png\d*', '.png', name_id)
+        return name_id
+
+    def _get_image_shape_without_loading(filename: str) -> Tuple[int, int]:
+        image = Image.open(filename)
+        shape = image.size
+        image.close()
+        return shape
+
+    working_items = list()
+
+    for key, v in tqdm(via_annotations.items()):
+        filename = _formatting(key)
+
+        absolute_filename = os.path.join(images_dir, filename)
+        shape_image = _get_image_shape_without_loading(absolute_filename)
+
+        regions = v['regions']
+
+        if regions:
+            wk_item = WorkingItem(collection=collection_name,
+                                  image_name=filename.split('.')[0],
+                                  original_x=shape_image[0],
+                                  original_y=shape_image[1],
+                                  reduced_x=None,
+                                  reduced_y=None,
+                                  iiif=None,
+                                  annotations=regions)
+
+            working_items.append(wk_item)
+
+    return working_items
+
+
+def _collect_working_items_from_iiif(via_annotations: dict, collection_name: str, iiif_user='my-team') -> dict:
+    """
+    Given VIA annotation input, collect all info on `WorkingItem` object, when the images come from IIIF urls
+
+    :param via_annotations: via_annotations: via annotations ('regions' field)
+    :param collection_name: name of the collection
+    :param iiif_user: user param for requests.Session().get()
+    :return:
+    """
+
+    working_items = list()
+    session = requests.Session()
+
+    for key, v in tqdm(via_annotations.items()):
+        iiif_url = v['filename']
+
+        image_name = os.path.basename(iiif_url.split('/full/full/')[0])
+
+        # get image dimensions
+        iiif_json = iiif_url.replace("default.jpg", "info.json")
+        resp_json = session.get(iiif_json, auth=(iiif_user, iiif_password))
+        if resp_json.status_code == requests.codes.ok:
+            y = resp_json.json()['height']
+            x = resp_json.json()['width']
+            # target_h, target_w = _compute_reduced_dimensions(x, y)
+            target_h, target_w = None, None
+        else:
+            x, y, target_w, target_h = None, None, None, None
+            resp_json.raise_for_status()
+
+        regions = v['regions']
+
+        if regions:
+            wk_item = WorkingItem(collection=collection_name,
+                                  image_name=image_name.split('.')[0],
+                                  original_x=x,
+                                  original_y=y,
+                                  reduced_x=target_w,
+                                  reduced_y=target_h,
+                                  iiif=iiif_url,
+                                  annotations=regions)
+
+            working_items.append(wk_item)
+
+    return working_items
+
+
+def collect_working_items(via_annotations: dict, collection_name: str, images_dir: str=None,
+                          via_version: int=2) -> List[WorkingItem]:
+    """
+    Given VIA annotation input, collect all info on `WorkingItem` object.
+    This function will take care of separating images from local files and images from IIIF urls.
+
+    :param via_annotations: via annotations ('regions' field)
+    :param images_dir: directory where to find the images
+    :param collection_name: name of the collection
+    :param via_version: version of the VIA tool used to produce the annotations (1 or 2)
+    :return: list of `WorkingItem`
+    """
+
+    via_annotations_v2 = via_annotations.copy()
+    if via_version == 1:
+        for key, value in via_annotations_v2.items():
+            list_regions = list()
+            for v_region in value['regions'].values():
+                list_regions.append(v_region)
+            via_annotations_v2[key]['regions'] = list_regions
+
+    local_annotations = {k: v for k, v in via_annotations_v2.items() if 'http' not in k}
+    url_annotations = {k: v for k, v in via_annotations_v2.items() if 'http' in k}
+
+    working_items = list()
+    if local_annotations:
+        assert images_dir is not None
+        working_items += _collect_working_items_from_local_images(local_annotations, images_dir, collection_name)
+    if url_annotations:
+        working_items += _collect_working_items_from_iiif(url_annotations, collection_name)
+
+    return working_items
+
+
+def _scale_down_original(working_item, img_out_dir: str) -> None:
+    """
+    Copy and reduce original image files.
+
+    :param img_out_dir: where to put the downscaled images
+    :param working_item: dict of `WorkingItems`
+    :return: None
+    """
+
+    def _getimage_from_iiif(url, user, pwd):
+        img = requests.get(url, auth=(user, pwd))
+        return imread(img.content)
+
+    image_set_dir = os.path.join(img_out_dir, working_item.collection, "images")
+    if not os.path.exists(image_set_dir):
+        try:
+            os.makedirs(image_set_dir)
+        except OSError as e:
+            if e.errno != os.errno.EEXIST:
+                raise
+            pass
+
+    outfile = os.path.join(image_set_dir, working_item.image_name + "_ds.png")
+    if not os.path.isfile(outfile):
+        img = _getimage_from_iiif(working_item.iiif, 'epfl-team', iiif_password)
+        img_resized = transform.resize(
+            img,
+            [working_item.reduced_y, working_item.reduced_x],
+            anti_aliasing=False,
+            preserve_range=True
+        )
+        imsave(outfile, img_resized.astype(np.uint8))
+
+
+def load_annotation_data(via_data_filename: str, only_img_annotations: bool=False, via_version: int=2) -> dict:
+    """
+    Load the content of via annotation files.
+
+    :param via_data_filename: via annotations json file
+    :param only_img_annotations: load only the images annotations ('_via_img_metadata' field)
+    :param via_version:
+    :return: the content of json file containing the region annotated
+    """
+
+    with open(via_data_filename, 'r', encoding='utf8') as f:
+        content = json.load(f)
+    if via_version == 2:
+        assert '_via_img_metadata' in content.keys(), "The file is not a valid VIA project export."
+
+        if only_img_annotations:
+            return content['_via_img_metadata']
+        else:
+            return content
+    else:
+        return content
+
+
+def export_annotation_dict(annotation_dict: dict, filename: str) -> None:
+    """
+    Export the annotations to json file.
+
+    :param annotation_dict: VIA annotations
+    :param filename: filename to export the data (json file)
+    :return:
+    """
+    with open(filename, 'w', encoding='utf8') as f:
+        json.dump(annotation_dict, f)
+
+
+def get_via_attributes(annotation_dict: dict, via_version: int=2) -> List[VIAttribute]:
+    """
+    Gets the attributes of the annotated data and returns a list of `VIAttribute`.
+
+    :param annotation_dict: json content of the VIA exported file
+    :param via_version: either 1 or 2 (for VIA v 1.0 or VIA v 2.0)
+    :return: A list containing VIAttributes
+    """
+
+    if via_version == 1:
+
+        list_attributes = [list(region['region_attributes'].keys())
+                           for value in annotation_dict.values()
+                           for region in value['regions'].values()]
+
+        # Find options
+        unique_attributes = list(np.unique(list(chain.from_iterable(list_attributes))))
+
+        dict_labels = {rgn_att: list() for rgn_att in unique_attributes}
+        for value in annotation_dict.values():
+            regions = value['regions']
+            for region in regions.values():
+                for k, v in region['region_attributes'].items():
+                    dict_labels[k].append(v)
+
+    elif via_version == 2:
+
+        if '_via_attributes' in annotation_dict.keys():  # If project_export is given
+            return parse_via_attributes(annotation_dict['_via_attributes'])
+
+        else:  # else if annotation_export is given
+
+            list_attributes = [list(region['region_attributes'].keys())
+                               for value in annotation_dict.values()
+                               for region in value['regions']]
+
+            # Find options
+            unique_attributes = list(np.unique(list(chain.from_iterable(list_attributes))))
+
+            dict_labels = {rgn_att: list() for rgn_att in unique_attributes}
+            for value in annotation_dict.values():
+                regions = value['regions']
+                for region in regions:
+                    for k, v in region['region_attributes'].items():
+                        dict_labels[k].append(v)
+
+    else:
+        raise NotImplementedError
+
+    # Instantiate VIAttribute objects
+    viattribute_list = list()
+    for attribute, options in dict_labels.items():
+
+        if all(isinstance(opt, str) for opt in options):
+            viattribute_list.append(VIAttribute(name=attribute,
+                                                type=None,
+                                                options=list(np.unique(options))))
+
+        elif all(isinstance(opt, dict) for opt in options):
+            viattribute_list.append(VIAttribute(name=attribute,
+                                                type=None,
+                                                options=list(np.unique(list(chain.from_iterable(options))))))
+
+        else:
+            raise NotImplementedError
+    return viattribute_list
+
+
+def _draw_mask(via_region: dict, mask: np.array, contours_only: bool=False) -> np.array:
+    """
+
+    :param via_region: region to draw (in VIA format)
+    :param mask: image mask to draw on
+    :param contours_only: if `True`, draws only the contours of the region, if `False`, fills the region
+    :return: the drawn mask
+    """
+
+    shape_attributes_dict = via_region['shape_attributes']
+
+    if shape_attributes_dict['name'] == 'rect':
+        x = shape_attributes_dict['x']
+        y = shape_attributes_dict['y']
+        w = shape_attributes_dict['width']
+        h = shape_attributes_dict['height']
+
+        contours = np.array([[x, y],
+                             [x + w, y],
+                             [x + w, y + h],
+                             [x, y + h]
+                             ]).reshape((-1, 1, 2))
+
+        mask = cv2.polylines(mask, [contours], True, 255, thickness=15) if contours_only \
+            else cv2.fillPoly(mask, [contours], 255)
+
+    elif shape_attributes_dict['name'] == 'polygon':
+        contours = np.stack([shape_attributes_dict['all_points_x'],
+                             shape_attributes_dict['all_points_y']], axis=1)[:, None, :]
+
+        mask = cv2.polylines(mask, [contours], True, 255, thickness=15) if contours_only \
+            else cv2.fillPoly(mask, [contours], 255)
+
+    elif shape_attributes_dict['name'] == 'circle':
+        center_point = (shape_attributes_dict['cx'], shape_attributes_dict['cy'])
+        radius = shape_attributes_dict['r']
+
+        mask = cv2.circle(mask, center_point, radius, 255, thickness=15) if contours_only \
+            else cv2.circle(mask, center_point, radius, 255, thickness=-1)
+
+    elif shape_attributes_dict['name'] == 'polyline':
+        contours = np.stack([shape_attributes_dict['all_points_x'],
+                             shape_attributes_dict['all_points_y']], axis=1)[:, None, :]
+
+        mask = cv2.polylines(mask, [contours], False, 255, thickness=15)
+
+    else:
+        raise NotImplementedError(
+            'Mask annotation for shape of type "{}" has not been implemented yet'
+                .format(shape_attributes_dict['name']))
+
+    return mask
+
+
+def _write_mask(mask: np.ndarray, masks_dir: str, collection: str, image_name: str, label: str) -> None:
+    """
+    Save a mask with filename containing 'label'.
+
+    :param mask: mask b&w image (H, W)
+    :param masks_dir: directory to output mask
+    :param collection: name of the collection
+    :param image_name: name of the image
+    :param label: label of the mask
+    :return:
+    """
+
+    outdir = os.path.join(masks_dir, collection, image_name)
+    if not os.path.exists(outdir):
+        os.makedirs(outdir)
+    label = label.strip(' \n').replace(" ", "_").lower() if label is not None else 'nolabel'
+    outfile = os.path.join(outdir, image_name + "-mask-" + label + ".png")
+    imsave(outfile, mask.astype(np.uint8))
+
+
+def create_masks(masks_dir: str, working_items: List[WorkingItem], via_attributes: List[VIAttribute],
+                 collection: str, contours_only: bool=False) -> dict:
+    """
+    For each annotation, create a corresponding binary mask and resize it (h = 2000). Only valid for VIA 2.0.
+    Several annotations of the same class on the same image produce one image with several masks.
+
+    :param masks_dir: where to output the masks
+    :param working_items: infos to work with
+    :param via_attributes: VIAttributes computed by ``get_via_attributes`` function.
+    :param collection: name of the nollection
+    :param contours_only: creates the binary masks only for the contours of the object (thickness of contours : 20 px)
+    :return: annotation_summary, a dictionary containing a list of labels per image
+    """
+
+    def resize_and_write_mask(mask_image: np.ndarray, working_item: WorkingItem, label_item: str) -> None:
+        """
+        Resize only if needed (if working_item.reduced != working_item.original)
+
+        :param mask_image: mask image to write
+        :param working_item: `WorkingItem` object
+        :param label_item: label name to append to filename
+        :return:
+        """
+
+        if not working_item.reduced_y and not working_item.reduced_x:
+            _write_mask(mask_image, masks_dir, collection, working_item.image_name, label_item)
+
+        elif working_item.reduced_x != working_item.original_x and working_item.reduced_y != working_item.original_y:
+            mask_resized = transform.resize(mask_image,
+                                            [working_item.reduced_y, working_item.reduced_x],
+                                            anti_aliasing=False,
+                                            preserve_range=True,
+                                            order=0)
+            _write_mask(mask_resized, masks_dir, collection, working_item.image_name, label_item)
+
+        else:
+            _write_mask(mask_image, masks_dir, collection, working_item.image_name, label_item)
+    # -------------------
+
+    print("Creating masks in {}...".format(masks_dir))
+
+    annotation_summary = dict()
+
+    for wi in tqdm(working_items, desc="workingItem2mask"):
+        labels = list()
+
+        # the image has no annotation, writing a black mask:
+        if not wi.annotations:
+            mask = np.zeros([wi.original_y, wi.original_x], np.uint8)
+            resize_and_write_mask(mask, wi, None)
+            labels.append("nolabel")
+
+        # check all possible labels for the image and create mask:
+        else:
+            for attribute in via_attributes:
+                for option in attribute.options:
+                    # get annotations that have the current attribute
+                    selected_regions = list(filter(lambda r: attribute.name in r['region_attributes'].keys(),
+                                                   wi.annotations))
+                    # get annotations that have the current attribute and option
+                    if selected_regions:
+                        selected_regions = list(filter(lambda r: r['region_attributes'][attribute.name] == option,
+                                                       selected_regions))
+                    else:
+                        continue
+
+                    if selected_regions:
+                        # create a 0 matrix (black background)
+                        mask = np.zeros([wi.original_y, wi.original_x], np.uint8)
+
+                        # nb: if 2 labels are on the same page, they belongs to the same mask
+                        for sr in selected_regions:
+                            mask = _draw_mask(sr, mask, contours_only)
+
+                        label = '{}-{}'.format(attribute.name, option).lower()
+                        resize_and_write_mask(mask, wi, label)
+                        # add to existing labels
+                        labels.append(label)
+
+        # write summary: list of existing labels per image
+        annotation_summary[wi.image_name] = labels
+        outfile = os.path.join(masks_dir, collection, collection + "-classes.txt")
+        with open(outfile, 'a') as fh:
+            for a in annotation_summary:
+                fh.write(a + "\t" + str(annotation_summary[a]) + "\n")
+
+    print("Done.")
+    return annotation_summary
+
+
+def _get_coordinates_from_xywh(via_regions: List[dict]) -> List[np.array]:
+    """
+    From VIA region dictionaries, get the coordinates array (N,2) of the annotations
+
+    :param via_regions:
+    :return:
+    """
+    list_coordinates_regions = list()
+    for region in via_regions:
+        shape_attributes_dict = region['shape_attributes']
+        if shape_attributes_dict['name'] == 'rect':
+            x = shape_attributes_dict['x']
+            y = shape_attributes_dict['y']
+            w = shape_attributes_dict['width']
+            h = shape_attributes_dict['height']
+
+            coordinates = np.array([[x, y],
+                                    [x + w, y],
+                                    [x + w, y + h],
+                                    [x, y + h]
+                                    ])
+            list_coordinates_regions.append(coordinates)
+        elif shape_attributes_dict['name'] == 'polygon':
+            coordinates = np.stack([shape_attributes_dict['all_points_x'],
+                                    shape_attributes_dict['all_points_y']], axis=1)
+            list_coordinates_regions.append(coordinates)
+        elif shape_attributes_dict['name'] == 'polyline':
+            coordinates = np.stack([shape_attributes_dict['all_points_x'],
+                                    shape_attributes_dict['all_points_y']], axis=1)
+            list_coordinates_regions.append(coordinates)
+        else:
+            raise NotImplementedError(
+                "This method has not been implemenetd yet for {}".format(shape_attributes_dict['name']))
+
+    return list_coordinates_regions
+
+
+# EXPORT
+# ------
+
+def _get_xywh_from_coordinates(coordinates: np.array) -> Tuple[int, int, int, int]:
+    """
+    From coordinates points get x,y, width, height
+
+    :param coordinates: (N,2) coordinates (x,y)
+    :return: x, y, w, h
+    """
+
+    x = np.min(coordinates[:, 0])
+    y = np.min(coordinates[:, 1])
+    w = np.max(coordinates[:, 0]) - x
+    h = np.max(coordinates[:, 1]) - y
+
+    return x, y, w, h
+
+
+def create_via_region_from_coordinates(coordinates: np.array, region_attributes: dict, type_region: str) -> dict:
+    """
+    Formats coordinates to a VIA region (dict).
+
+    :param coordinates: (N, 2) coordinates (x, y)
+    :param region_attributes: dictionary with keys : name of labels, values : values of labels
+    :param type_region: via region annotation type ('rect', 'polygon')
+    :return: a region in VIA style (dict/json)
+    """
+    assert type_region in ['rect', 'polygon', 'circle']
+
+    if type_region == 'rect':
+        x, y, w, h = _get_xywh_from_coordinates(coordinates)
+        shape_atributes = {
+            'name': 'rect',
+            'height': int(h),
+            'width': int(w),
+            'x': int(x),
+            'y': int(y)
+        }
+    elif type_region == 'polygon':
+        points_x = list(coordinates[:, 0])
+        points_y = list(coordinates[:, 1])
+
+        shape_atributes = {
+            'name': 'polygon',
+            'all_points_x': [int(p) for p in points_x],
+            'all_points_y': [int(p) for p in points_y],
+        }
+    elif type_region == 'circle':
+        raise NotImplementedError('The type {} is not supported for the export.'.format(type))
+
+    return {'region_attributes': region_attributes,
+            'shape_attributes': shape_atributes}
+
+
+def create_via_annotation_single_image(img_filename: str, via_regions: List[dict],
+                                       file_attributes: dict=None) -> Dict[str, dict]:
+    """
+    Returns a dictionary item {key: annotation} in VIA format to further export to .json file
+
+    :param img_filename: path to the image
+    :param via_regions: regions in VIA format (output from ``create_via_region_from_coordinates``)
+    :param file_attributes: file attributes (usually None)
+    :return: dictionary item with key and annotations in VIA format
+    """
+    if 'http' in img_filename:
+        basename = img_filename
+        file_size = -1
+    else:
+        basename = os.path.basename(img_filename)
+        file_size = os.path.getsize(img_filename)
+
+    via_key = '{}{}'.format(basename, file_size)
+
+    via_annotation = {
+        'file_attributes': file_attributes if file_attributes is not None else dict(),
+        'filename': basename,
+        'size': file_size,
+        'regions': via_regions
+    }
+
+    return {via_key: via_annotation}
+
+
+# PAGE CONVERSION
+# ---------------
+
+def convert_via_region_page_text_region(working_item: WorkingItem, structure_label: str) -> PAGE.Page:
+    """
+
+    :param working_item:
+    :param structure_label:
+    :return:
+    """
+
+    # TODO : this is not yet generic because we're missing the automatic detection of the structure label
+
+    region_coordinates = _get_coordinates_from_xywh(working_item.annotations)
+
+    page = PAGE.Page(image_filename=working_item.image_name + 'jpg',
+                     image_width=working_item.original_x,
+                     image_height=working_item.original_y,
+                     graphic_regions=[
+                         PAGE.TextRegion(coords=PAGE.Point.array_to_point(coords),
+                                         custom_attribute='structure{{type:{};}}'.format(structure_label))
+                         for coords in region_coordinates])
+    return page
+
+
+# def convert_via_region_to_text_region(via_regions: List[dict], structure_label: str) -> PAGE.TextRegion:
+#     """
+#
+#     :param via_region:
+#     :param structure_label:
+#     :return:
+#     """
+#
+#     # TODO : this is not yet generic because we're missing the automatic detection of the structure label
+#
+#     region_coordinates = _get_coordinates_from_xywh(working_item.annotations)
+#
+#     page = PAGE.Page(image_filename=working_item.image_name + 'jpg',
+#                      image_width=working_item.original_x,
+#                      image_height=working_item.original_y,
+#                      graphic_regions=[
+#                          PAGE.TextRegion(coords=PAGE.Point.array_to_point(coords),
+#                                          custom_attribute='structure{{type:{};}}'.format(structure_label))
+#                          for coords in region_coordinates])
+#     return page
+
+
+"""
+Example of usage
+
+
+collection = 'mycollection'
+annotation_file = 'via_sample.json'
+masks_dir = '/home/project/generated_masks'
+images_dir = './my_images'
+
+# Load all the data in the annotation file (the file may be an exported project or an export of the annotations)
+via_data = load_annotation_data(annotation_file)
+
+# In the case of an exported project file, you can set ``only_img_annotations=True`` to get only
+# the region annotations
+via_annotations = load_annotation_data(annotation_file, only_img_annotations=True)
+
+# Collect the annotated regions
+working_items = collect_working_items(via_annotations, collection, images_dir)
+
+# Collect the attributes and options
+if '_via_attributes' in via_data.keys():
+    list_attributes = parse_via_attributes(via_data['_via_attributes'])
+else:
+    list_attributes = get_via_attributes(via_annotations)
+
+# Create one mask per option per attribute
+create_masks(masks_dir, wi,via_attributes, collection)
+"""
+
+
+"""
+Content of a via_project exported file
+
+{'_via_attributes': {
+    ...
+    },
+ '_via_img_metadata': {
+    ...
+    },
+ '_via_settings': {
+    'core': {
+        'buffer_size': 18,
+        'default_filepath': '',
+        'filepath': {}
+    },
+    'project': {
+        'name': 'via_project_7Feb2019_10h7m'
+    },
+    'ui': {
+        'annotation_editor_fontsize': 0.8,
+        'annotation_editor_height': 25,
+        'image': {
+            'region_label': 'region_id',
+            'region_label_font': '10px Sans'
+        },
+        'image_grid': {
+            'img_height': 80,
+            'rshape_fill': 'none',
+            'rshape_fill_opacity': 0.3,
+            'rshape_stroke': 'yellow',
+            'rshape_stroke_width': 2,
+            'show_image_policy': 'all',
+            'show_region_shape': True
+        },
+        'leftsidebar_width': 18
+    }
+ }
+}
+
+"""
+
+"""
+"_via_attributes": {
+    "region": {
+        "attribute1": {
+            "type":"text",
+            "description":"",
+            "default_value":""
+        },
+        "attribute2": {
+            "type":"dropdown",
+            "description":"",
+            "options": {
+                "op1":"",
+                "op2":""
+                },
+            "default_options":{}
+        },
+        "attribute3": {
+            "type":"checkbox",
+            "description":"",
+            "options": {
+                "op1":"",
+                "op2":""
+            },
+            "default_options":{}
+        },
+        "attribute 4": {
+            "type":"radio",
+            "description":"",
+            "options": {
+                "op1":"",
+                "op2":""
+            },
+            "default_options":{}
+        }
+    },
+    "file":{}
+}
+
+"""
+
+"""
+'_via_img_metadata': {
+    'image_filename1.jpg2209797': {
+        'file_attributes': {},
+        'filename': 'image_filename1.jpg',
+        'regions':
+            [{
+                'region_attributes': {
+                    'attribute1': {
+                        'op1': True,
+                        'op2': True
+                    },
+                    'attribute 2': 'label1',
+                    'attribute 3': 'op1'
+                },
+                'shape_attributes': {
+                    'height': 2277,
+                    'name': 'rect',
+                    'width': 1541,
+                    'x': 225,
+                    'y': 458
+                }
+            },
+            {
+                'region_attributes': {
+                    'attribute 4': 'op1',
+                    'attribute 1': {},
+                    'attribute 2': 'label1',
+                    'attribute 3': 'op2'
+                },
+                'shape_attributes': {
+                    'height': 2255,
+                    'name': 'rect',
+                    'width': 1554,
+                    'x': 1845,
+                    'y': 476
+                }
+            }],
+            'size': 2209797},
+    'https://libimages.princeton.edu/loris/pudl0001/5138415/00000011.jp2/full/full/0/default.jpg-1': {
+        'file_attributes': {},
+        'filename': 'https://libimages.princeton.edu/loris/pudl0001/5138415/00000011.jp2/full/full/0/default.jpg',
+        'regions':
+            [{
+                'region_attributes': {
+                    'attribute 4': 'op2',
+                    'attribute 1': {
+                        'op1': True
+                    },
+                    'attribute 2': 'label3',
+                    'attribute 3': 'op1'
+                },
+                'shape_attributes': {
+                    'height': 1026,
+                    'name': 'rect',
+                    'width': 1430,
+                    'x': 145,
+                    'y': 525
+                }
+            },
+            {
+                'region_attributes': {
+                    'attribute 4': 'op2',
+                    'attribute 1': {
+                        'op1': True},
+                    'attribute 2': 'label 3 ',
+                    'attribute 3': 'op1',
+                },
+                'shape_attributes': {
+                    'all_points_x': [2612, 2498, 2691, 2757, 2962, 3034, 2636],
+                    'all_points_y': [5176, 5616, 5659, 5363, 5375, 5110, 5122],
+                    'name': 'polygon'
+                }
+            },
+            {
+                'region_attributes': {
+                    'attribute 4': 'op2',
+                    'attribute 1': {
+                        'op1': True},
+                    'attribute 2': 'label 3 ',
+                    'attribute 3': 'op1',
+                },
+                'shape_attributes': {
+                    'cx': 2793,
+                    'cy': 881,
+                    'name': 'circle',
+                    'r': 524
+                }
+            },
+            {
+                'region_attributes': {
+                    'attribute 4': 'op1',
+                    'attribute 1': {
+                        'op2': True},
+                    'attribute 2': 'label1',
+                    'attribute 3': 'op2',
+                },
+                'shape_attributes': {
+                    'all_points_x': [3246, 5001],
+                    'all_points_y': [422, 380],
+                    'name': 'polyline'
+                }
+            }],
+        'size': -1
+    }
+}
+"""
diff --git a/dh_segment/network/__init__.py b/dh_segment/network/__init__.py
index 553184c..e5aa398 100644
--- a/dh_segment/network/__init__.py
+++ b/dh_segment/network/__init__.py
@@ -1,12 +1,14 @@
 _MODEL = [
-    'inference_vgg16',
-    'inference_resnet_v1_50',
-    'inference_u_net',
-    'vgg_16_fn',
-    'resnet_v1_50_fn'
+    'Encoder',
+    'Decoder',
+    'SimpleDecoder',
 ]
 
-__all__ = _MODEL
+_PRETRAINED = [
+    'ResnetV1_50',
+    'VGG16'
+]
+__all__ = _MODEL + _PRETRAINED
 
 from .model import *
 from .pretrained_models import *
diff --git a/dh_segment/network/model.py b/dh_segment/network/model.py
index cc55bae..b20de04 100644
--- a/dh_segment/network/model.py
+++ b/dh_segment/network/model.py
@@ -1,310 +1,136 @@
 #!/usr/bin/env python
 
 import tensorflow as tf
-from ..utils import ModelParams
 from tensorflow.contrib import layers  # TODO migration to tf.layers ?
-from tensorflow.contrib.slim.nets import resnet_v1
 from tensorflow.contrib.slim import arg_scope
-from .pretrained_models import vgg_16_fn, resnet_v1_50_fn
-from collections import OrderedDict
+from abc import ABC, abstractmethod
+from typing import List, Union, Tuple, Optional, Dict
 
 
-def inference_vgg16(images: tf.Tensor, params: ModelParams, num_classes: int, use_batch_norm=False, weight_decay=0.0,
-                    is_training=False) -> tf.Tensor:
-    with tf.name_scope('vgg_augmented'):
-
-        if use_batch_norm:
-            if params.batch_renorm:
-                renorm_clipping = {'rmax': 100, 'rmin': 0.1, 'dmax': 10}
-                renorm_momentum = 0.98
-            else:
-                renorm_clipping = None
-                renorm_momentum = 0.99
-            batch_norm_fn = lambda x: tf.layers.batch_normalization(x, axis=-1, training=is_training, name='batch_norm',
-                                                                    renorm=params.batch_renorm,
-                                                                    renorm_clipping=renorm_clipping,
-                                                                    renorm_momentum=renorm_momentum)
-        else:
-            batch_norm_fn = None
-
-        def upsample_conv(pooled_layer, previous_layer, layer_params, number):
-            with tf.name_scope('deconv{}'.format(number)):
-                if previous_layer.get_shape()[1].value and previous_layer.get_shape()[2].value:
-                    target_shape = previous_layer.get_shape()[1:3]
-                else:
-                    target_shape = tf.shape(previous_layer)[1:3]
-                upsampled_layer = tf.image.resize_images(pooled_layer, target_shape,
-                                                         method=tf.image.ResizeMethod.BILINEAR)
-                input_tensor = tf.concat([upsampled_layer, previous_layer], 3)
-
-                for i, (nb_filters, filter_size) in enumerate(layer_params):
-                    input_tensor = layers.conv2d(
-                        inputs=input_tensor,
-                        num_outputs=nb_filters,
-                        kernel_size=[filter_size, filter_size],
-                        normalizer_fn=batch_norm_fn,
-                        scope="conv{}_{}".format(number, i + 1)
-                    )
-            return input_tensor
+class Encoder(ABC):
+    @abstractmethod
+    def __call__(self, images: tf.Tensor, is_training=False) -> List[tf.Tensor]:
+        """
 
-        # Original VGG :
-        vgg_net, intermediate_levels = vgg_16_fn(images, blocks=5, weight_decay=weight_decay)
-        out_tensor = vgg_net
+        :param images: [NxHxWx3] float32 [0..255] input images
+        :return: a list of the feature maps in decreasing spatial resolution (first element is most likely the input \
+        image itself, then the output of the first pooling op, etc...)
+        """
+        pass
 
-        # Intermediate convolution
-        if params.intermediate_conv is not None:
-            with tf.name_scope('intermediate_convs'):
-                for layer_params in params.intermediate_conv:
-                    for k, (nb_filters, filter_size) in enumerate(layer_params):
-                        out_tensor = layers.conv2d(inputs=out_tensor,
-                                                   num_outputs=nb_filters,
-                                                   kernel_size=[filter_size, filter_size],
-                                                   normalizer_fn=batch_norm_fn,
-                                                   scope='conv_{}'.format(k + 1))
+    def pretrained_information(self) -> Tuple[Optional[str], Union[None, List, Dict]]:
+        """
 
-        # Upsampling :
-        with tf.name_scope('upsampling'):
-            selected_upscale_params = [l for i, l in enumerate(params.upscale_params)
-                                       if params.selected_levels_upscaling[i]]
+        :return: The filename of the pretrained checkpoint and the corresponding variables (List of Dict mapping) \
+        or `None` if no-pretraining is done
+        """
+        return None, None
 
-            assert len(params.selected_levels_upscaling) == len(intermediate_levels), \
-                'Upscaling : {} is different from {}'.format(len(params.selected_levels_upscaling),
-                                                             len(intermediate_levels))
 
-            selected_intermediate_levels = [l for i, l in enumerate(intermediate_levels)
-                                            if params.selected_levels_upscaling[i]]
+class Decoder(ABC):
+    @abstractmethod
+    def __call__(self, feature_maps: List[tf.Tensor], num_classes: int, is_training=False) -> tf.Tensor:
+        """
 
-            # Upsampling loop
-            n_layer = 1
-            for i in reversed(range(len(selected_intermediate_levels))):
-                out_tensor = upsample_conv(out_tensor, selected_intermediate_levels[i],
-                                           selected_upscale_params[i], n_layer)
-                n_layer += 1
+        :param feature_maps: list of feature maps, in decreasing spatial resolution, first one being at the original \
+        resolution
+        :return: [N,H,W,num_classes] float32 tensor of logit scores
+        """
+        pass
+
+
+class SimpleDecoder(Decoder):
+    """
+
+    :ivar upsampling_dims:
+    :ivar max_depth:
+    :ivar weight_decay:
+    :ivar self.batch_norm_fn:
+    """
+    def __init__(self, upsampling_dims: List[int], max_depth: int = None, weight_decay: float=0.):
+        self.upsampling_dims = upsampling_dims
+        self.max_depth = max_depth
+        self.weight_decay = weight_decay
+        renorm = True
+        self.batch_norm_params = {
+            "renorm": renorm,
+            "renorm_clipping": {'rmax': 100, 'rmin': 0.1, 'dmax': 10},
+            "renorm_momentum": 0.98
+        }
+
+    def __call__(self, feature_maps: List[tf.Tensor], num_classes: int, is_training=False):
+
+        batch_norm_fn = lambda x: tf.layers.batch_normalization(x, axis=-1, training=is_training,
+                                                                name='batch_norm', **self.batch_norm_params)
+
+        # Upsampling
+        with tf.variable_scope('SimpleDecoder'):
+            with arg_scope([layers.conv2d],
+                           normalizer_fn=batch_norm_fn,
+                           weights_regularizer=layers.l2_regularizer(self.weight_decay)):
+
+                assert len(self.upsampling_dims) + 1 == len(feature_maps), \
+                    'Upscaling : length of {} does not match {}'.format(len(self.upsampling_dims),
+                                                                        len(feature_maps))
+
+                # Force layers to not be too big to reduce memory usage
+                for i, l in enumerate(feature_maps):
+                    if self.max_depth and l.get_shape()[-1] > self.max_depth:
+                        feature_maps[i] = layers.conv2d(
+                            inputs=l,
+                            num_outputs=self.max_depth,
+                            kernel_size=[1, 1],
+                            scope="dimreduc_{}".format(i),
+                            normalizer_fn=batch_norm_fn,
+                            activation_fn=None
+                        )
+
+                # Deconvolving loop
+                out_tensor = feature_maps[-1]
+                for i, f_map in reversed(list(enumerate(feature_maps[:-1]))):
+                    out_tensor = _upsample_concat(out_tensor, f_map, scope_name='upsample_{}'.format(i))
+                    out_tensor = layers.conv2d(inputs=out_tensor,
+                                               num_outputs=self.upsampling_dims[i],
+                                               kernel_size=[3, 3],
+                                               scope="conv_{}".format(i))
 
             logits = layers.conv2d(inputs=out_tensor,
                                    num_outputs=num_classes,
                                    activation_fn=None,
                                    kernel_size=[1, 1],
-                                   scope="conv{}-logits".format(n_layer))
-
-        return logits  # [B,h,w,Classes]
-
-
-def inference_resnet_v1_50(images, params, num_classes, use_batch_norm=False, weight_decay=0.0,
-                           is_training=False) -> tf.Tensor:
-    if use_batch_norm:
-        if params.batch_renorm:
-            renorm_clipping = {'rmax': 100, 'rmin': 0.1, 'dmax': 1}
-            renorm_momentum = 0.98
-        else:
-            renorm_clipping = None
-            renorm_momentum = 0.99
-        batch_norm_fn = lambda x: tf.layers.batch_normalization(x, axis=-1, training=is_training, name='batch_norm',
-                                                                renorm=params.batch_renorm,
-                                                                renorm_clipping=renorm_clipping,
-                                                                renorm_momentum=renorm_momentum)
-    else:
-        batch_norm_fn = None
-
-    def upsample_conv(input_tensor, previous_intermediate_layer, layer_params, number) -> tf.Tensor:
-        """
-        Deconvolution (upscaling) layers
-
-        :param input_tensor:
-        :param previous_intermediate_layer:
-        :param layer_params:
-        :param number:
-        :return:
-        """
-        with tf.variable_scope('deconv_{}'.format(number)):
-            if previous_intermediate_layer.get_shape()[1].value and \
-                    previous_intermediate_layer.get_shape()[2].value:
-                target_shape = previous_intermediate_layer.get_shape()[1:3]
-            else:
-                target_shape = tf.shape(previous_intermediate_layer)[1:3]
-            upsampled_layer = tf.image.resize_images(input_tensor, target_shape,
-                                                     method=tf.image.ResizeMethod.BILINEAR)
-            net = tf.concat([upsampled_layer, previous_intermediate_layer], 3)
-
-            filter_size, nb_bottlenecks = layer_params
-            if nb_bottlenecks > 0:
-                for i in range(nb_bottlenecks):
-                    net = resnet_v1.bottleneck(
-                        inputs=net,
-                        depth=filter_size,
-                        depth_bottleneck=filter_size // 4,
-                        stride=1
-                    )
-            else:
-                net = layers.conv2d(
-                    inputs=net,
-                    num_outputs=filter_size,
-                    kernel_size=[3, 3],
-                    scope="conv{}".format(number)
-                )
-
-        return net
-
-    # Original ResNet
-    blocks_needed = max([i for i, is_needed in enumerate(params.selected_levels_upscaling) if is_needed])
-    resnet_net, intermediate_layers = resnet_v1_50_fn(images, is_training=False, blocks=blocks_needed,
-                                                      weight_decay=weight_decay, renorm=False,
-                                                      corrected_version=params.correct_resnet_version)
-
-    # Upsampling
-    with tf.variable_scope('upsampling'):
-        with arg_scope([layers.conv2d],
-                       normalizer_fn=batch_norm_fn,
-                       weights_regularizer=layers.l2_regularizer(weight_decay)):
-            selected_upscale_params = [l for i, l in enumerate(params.upscale_params)
-                                       if params.selected_levels_upscaling[i]]
-
-            assert len(selected_upscale_params) == len(intermediate_layers), \
-                'Upscaling : {} is different from {}'.format(len(selected_upscale_params),
-                                                             len(intermediate_layers))
-
-            selected_intermediate_levels = [l for i, l in enumerate(intermediate_layers)
-                                            if params.selected_levels_upscaling[i]]
-
-            # Rescaled image values to [0,1]
-            selected_intermediate_levels.insert(0, images/255.0)
-
-            # Force layers to not be too big to reduce memory usage
-            for i, l in enumerate(selected_intermediate_levels):
-                if l.get_shape()[-1] > params.max_depth:
-                    selected_intermediate_levels[i] = layers.conv2d(
-                        inputs=l,
-                        num_outputs=params.max_depth,
-                        kernel_size=[1, 1],
-                        scope="dimreduc_{}".format(i),
-                        # normalizer_fn=batch_norm_fn,
-                        activation_fn=None
-                    )
+                                   scope="conv-logits")
 
-            # Deconvolving loop
-            out_tensor = selected_intermediate_levels[-1]
-            n_layer = 1
-            for i in reversed(range(len(selected_intermediate_levels) - 1)):
-                out_tensor = upsample_conv(out_tensor, selected_intermediate_levels[i],
-                                           selected_upscale_params[i], n_layer)
+        return logits
 
-                n_layer += 1
 
-            if images.get_shape()[1].value and images.get_shape()[2].value:
-                target_shape = images.get_shape()[1:3]
-            else:
-                target_shape = tf.shape(images)[1:3]
-            out_tensor = tf.image.resize_images(out_tensor, target_shape,
-                                                method=tf.image.ResizeMethod.BILINEAR)
+def _get_image_shape_tensor(tensor: tf.Tensor) -> Union[Tuple[int, int], tf.Tensor]:
+    """
+    Get the image shape of the tensor
 
-        logits = layers.conv2d(inputs=out_tensor,
-                               num_outputs=num_classes,
-                               activation_fn=None,
-                               kernel_size=[1, 1],
-                               scope="conv{}-logits".format(n_layer))
-
-    return logits
-
-
-def conv_bn_layer(input_tensor, kernel_size, output_channels, stride=1, bn=False,
-                  is_training=True, relu=True):
-    # with tf.variable_scope(name) as scope:
-    conv_layer = layers.conv2d(inputs=input_tensor,
-                               num_outputs=output_channels,
-                               kernel_size=kernel_size,
-                               stride=stride,
-                               activation_fn=tf.identity,
-                               padding='SAME')
-    if bn and relu:
-        # How to use Batch Norm: https://github.com/martin-gorner/tensorflow-mnist-tutorial/blob/master/README_BATCHNORM.md
-
-        # Why scale is false when using ReLU as the next activation
-        # https://datascience.stackexchange.com/questions/22073/why-is-scale-parameter-on-batch-normalization-not-needed-on-relu/22127
-
-        # Using fuse operation: https://www.tensorflow.org/performance/performance_guide#common_fused_ops
-        conv_layer = layers.batch_norm(inputs=conv_layer, center=True, scale=False, is_training=is_training, fused=True)
-        conv_layer = tf.nn.relu(conv_layer)
-
-    if bn and not relu:
-        conv_layer = layers.batch_norm(inputs=conv_layer, center=True, scale=True, is_training=is_training)
-
-    # print('Conv layer {0} -> {1}'.format(input_tensor.get_shape().as_list(),conv_layer.get_shape().as_list()))
-    return conv_layer
-
-
-def _get_image_shape_tensor(tensor: tf.Tensor):
+    :param tensor: Input image tensor [N,H,W,...]
+    :return: a (int, int) tuple if shape is defined, otherwise the corresponding tf.Tensor value
+    """
     if tensor.get_shape()[1].value and \
-                    tensor.get_shape()[2].value:
+            tensor.get_shape()[2].value:
         target_shape = tensor.get_shape()[1:3]
     else:
         target_shape = tf.shape(tensor)[1:3]
     return target_shape
 
 
-def inference_u_net(images: tf.Tensor, params: ModelParams, num_classes: int, use_batch_norm=False, weight_decay=0.0,
-                    is_training=False) -> tf.Tensor:
-    enc_layers = OrderedDict()
-    dec_layers = OrderedDict()
-
-    with tf.variable_scope('U-Net'):
-
-        with tf.variable_scope('Encoder'):
-
-            conv_layer = layers.conv2d(images, num_outputs=64, kernel_size=(3, 3), padding='SAME',
-                                       activation_fn=tf.identity)
-
-            enc_layers['conv_layer_enc_64'] = conv_bn_layer(conv_layer, kernel_size=(3, 3),
-                                                            output_channels=64,
-                                                            bn=True, is_training=is_training, relu=True)
-
-            conv_layer = layers.max_pool2d(inputs=enc_layers['conv_layer_enc_64'], kernel_size=(2, 2), stride=2)
-
-            for n_feat in [128, 256, 512]:
-                enc_layers['conv_layer_enc_' + str(n_feat)] = conv_bn_layer(conv_layer, kernel_size=(3, 3),
-                                                                            output_channels=n_feat,
-                                                                            bn=True,
-                                                                            is_training=is_training, relu=True)
-
-                enc_layers['conv_layer_enc_' + str(n_feat)] = conv_bn_layer(
-                    enc_layers['conv_layer_enc_' + str(n_feat)], kernel_size=(3, 3),
-                    output_channels=n_feat,
-                    bn=True, is_training=is_training, relu=True)
-
-                conv_layer = layers.max_pool2d(inputs=enc_layers['conv_layer_enc_' + str(n_feat)], kernel_size=(2, 2), stride=2)
-
-            conv_layer_enc_1024 = conv_bn_layer(conv_layer, kernel_size=(3, 3),
-                                                output_channels=1024,
-                                                bn=True, is_training=is_training, relu=True)
-
-        with tf.variable_scope('Decoder'):
-            dec_layers['conv_layer_dec_512'] = conv_bn_layer(conv_layer_enc_1024, kernel_size=(3, 3),
-                                                             output_channels=512,
-                                                             bn=True, is_training=is_training, relu=True)
-
-            reduced_patchsize = _get_image_shape_tensor(enc_layers['conv_layer_enc_512'])
-            dec_layers['conv_layer_dec_512'] = tf.image.resize_images(dec_layers['conv_layer_dec_512'], size=reduced_patchsize,
-                                                                      method=tf.image.ResizeMethod.BILINEAR)
-
-            for n_feat in [512, 256, 128, 64]:
-
-                dec_layers['conv_layer_dec_' + str(n_feat * 2)] = tf.concat([dec_layers['conv_layer_dec_' + str(n_feat)],
-                                                                             enc_layers['conv_layer_enc_' + str(n_feat)]],
-                                                                            axis=3)
-                dec_layers['conv_layer_dec_' + str(n_feat)] = conv_bn_layer(
-                    dec_layers['conv_layer_dec_' + str(n_feat * 2)], kernel_size=(3, 3),
-                    output_channels=n_feat,
-                    bn=True, is_training=is_training, relu=True)
-                if n_feat > 64:
-                    dec_layers['conv_layer_dec_' + str(int(n_feat / 2))] = conv_bn_layer(
-                        dec_layers['conv_layer_dec_' + str(n_feat)], kernel_size=(3, 3),
-                        output_channels=n_feat / 2,
-                        bn=True, is_training=is_training, relu=True)
-
-                    reduced_patchsize = _get_image_shape_tensor(enc_layers['conv_layer_enc_' + str(int(n_feat / 2))])
-                    dec_layers['conv_layer_dec_' + str(int(n_feat / 2))] = tf.image.resize_images(
-                        dec_layers['conv_layer_dec_' + str(int(n_feat / 2))],
-                        size=reduced_patchsize,
-                        method=tf.image.ResizeMethod.BILINEAR)
-
-            return layers.conv2d(dec_layers['conv_layer_dec_64'], num_outputs=num_classes, kernel_size=(3, 3),
-                                 padding='SAME', activation_fn=tf.identity)
+def _upsample_concat(pooled_layer: tf.Tensor, previous_layer: tf.Tensor, scope_name: str='UpsampleConcat'):
+    """
+
+    :param pooled_layer: [N,H,W,C] coarse layer
+    :param previous_layer: [N,H',W',C'] fine layer (H'>H, and W'>W)
+    :param scope_name:
+    :return: [N,H',W',C+C'] concatenation of upsampled-`pooled_layer` and `previous_layer`
+    """
+    with tf.name_scope(scope_name):
+        # Upsamples the coarse level
+        target_shape = _get_image_shape_tensor(previous_layer)
+        upsampled_layer = tf.image.resize_images(pooled_layer, target_shape,
+                                                 method=tf.image.ResizeMethod.BILINEAR)
+        # Concatenate the upsampled-coarse and the other feature_map
+        input_tensor = tf.concat([upsampled_layer, previous_layer], 3)
+    return input_tensor
diff --git a/dh_segment/network/pretrained_models.py b/dh_segment/network/pretrained_models.py
deleted file mode 100644
index 9f69af6..0000000
--- a/dh_segment/network/pretrained_models.py
+++ /dev/null
@@ -1,123 +0,0 @@
-from tensorflow.contrib import slim, layers
-import tensorflow as tf
-from tensorflow.contrib.slim import nets
-import numpy as np
-
-_VGG_MEANS = [123.68, 116.78, 103.94]
-
-
-def mean_substraction(input_tensor, means=_VGG_MEANS):
-    return tf.subtract(input_tensor, np.array(means)[None, None, None, :], name='MeanSubstraction')
-
-
-def vgg_16_fn(input_tensor: tf.Tensor, scope='vgg_16', blocks=5, weight_decay=0.0005) \
-        -> (tf.Tensor, list):  # list of tf.Tensors (layers)
-    intermediate_levels = []
-    # intermediate_levels.append(input_tensor)
-    with slim.arg_scope(nets.vgg.vgg_arg_scope(weight_decay=weight_decay)):
-        with tf.variable_scope(scope, 'vgg_16', [input_tensor]) as sc:
-            input_tensor = mean_substraction(input_tensor)
-            intermediate_levels.append(input_tensor)
-            end_points_collection = sc.original_name_scope + '_end_points'
-            # Collect outputs for conv2d, fully_connected and max_pool2d.
-            with slim.arg_scope(
-                    [layers.conv2d, layers.fully_connected, layers.max_pool2d],
-                    outputs_collections=end_points_collection):
-                net = layers.repeat(
-                    input_tensor, 2, layers.conv2d, 64, [3, 3], scope='conv1')
-                intermediate_levels.append(net)
-                net = layers.max_pool2d(net, [2, 2], scope='pool1')
-                if blocks >= 2:
-                    net = layers.repeat(net, 2, layers.conv2d, 128, [3, 3], scope='conv2')
-                    intermediate_levels.append(net)
-                    net = layers.max_pool2d(net, [2, 2], scope='pool2')
-                if blocks >= 3:
-                    net = layers.repeat(net, 3, layers.conv2d, 256, [3, 3], scope='conv3')
-                    intermediate_levels.append(net)
-                    net = layers.max_pool2d(net, [2, 2], scope='pool3')
-                if blocks >= 4:
-                    net = layers.repeat(net, 3, layers.conv2d, 512, [3, 3], scope='conv4')
-                    intermediate_levels.append(net)
-                    net = layers.max_pool2d(net, [2, 2], scope='pool4')
-                if blocks >= 5:
-                    net = layers.repeat(net, 3, layers.conv2d, 512, [3, 3], scope='conv5')
-                    intermediate_levels.append(net)
-                    net = layers.max_pool2d(net, [2, 2], scope='pool5')
-
-                return net, intermediate_levels
-
-
-def resnet_v1_50_fn(input_tensor: tf.Tensor, is_training=False, blocks=4, weight_decay=0.0001,
-                    renorm=True, corrected_version=False) -> tf.Tensor:
-    with slim.arg_scope(nets.resnet_v1.resnet_arg_scope(weight_decay=weight_decay, batch_norm_decay=0.999)), \
-         slim.arg_scope([layers.batch_norm], renorm_decay=0.95, renorm=renorm):
-        input_tensor = mean_substraction(input_tensor)
-        assert 0 < blocks <= 4
-
-        if corrected_version:
-            def corrected_resnet_v1_block(scope, base_depth, num_units, stride):
-                  """Helper function for creating a resnet_v1 bottleneck block.
-
-                  Args:
-                    scope: The scope of the block.
-                    base_depth: The depth of the bottleneck layer for each unit.
-                    num_units: The number of units in the block.
-                    stride: The stride of the block, implemented as a stride in the last unit.
-                      All other units have stride=1.
-
-                  Returns:
-                    A resnet_v1 bottleneck block.
-                  """
-                  return nets.resnet_utils.Block(scope, nets.resnet_v1.bottleneck,[{
-                      'depth': base_depth * 4,
-                      'depth_bottleneck': base_depth,
-                      'stride': stride
-                  }] + [{
-                      'depth': base_depth * 4,
-                      'depth_bottleneck': base_depth,
-                      'stride': 1
-                  }] * (num_units - 1))
-
-            blocks_list = [
-                corrected_resnet_v1_block('block1', base_depth=64, num_units=3, stride=1),
-                corrected_resnet_v1_block('block2', base_depth=128, num_units=4, stride=2),
-                corrected_resnet_v1_block('block3', base_depth=256, num_units=6, stride=2),
-                corrected_resnet_v1_block('block4', base_depth=512, num_units=3, stride=2),
-            ]
-            desired_endpoints = [
-                'resnet_v1_50/conv1',
-                'resnet_v1_50/block1/unit_3/bottleneck_v1',
-                'resnet_v1_50/block2/unit_4/bottleneck_v1',
-                'resnet_v1_50/block3/unit_6/bottleneck_v1',
-                'resnet_v1_50/block4/unit_3/bottleneck_v1'
-            ]
-        else:
-            blocks_list = [
-                nets.resnet_v1.resnet_v1_block('block1', base_depth=64, num_units=3, stride=2),
-                nets.resnet_v1.resnet_v1_block('block2', base_depth=128, num_units=4, stride=2),
-                nets.resnet_v1.resnet_v1_block('block3', base_depth=256, num_units=6, stride=2),
-                nets.resnet_v1.resnet_v1_block('block4', base_depth=512, num_units=3, stride=1),
-            ]
-            desired_endpoints = [
-                'resnet_v1_50/conv1',
-                'resnet_v1_50/block1/unit_2/bottleneck_v1',
-                'resnet_v1_50/block2/unit_3/bottleneck_v1',
-                'resnet_v1_50/block3/unit_5/bottleneck_v1',
-                'resnet_v1_50/block4/unit_3/bottleneck_v1'
-            ]
-
-        net, endpoints = nets.resnet_v1.resnet_v1(input_tensor,
-                                                  blocks=blocks_list[:blocks],
-                                                  num_classes=None,
-                                                  is_training=is_training,
-                                                  global_pool=False,
-                                                  output_stride=None,
-                                                  include_root_block=True,
-                                                  reuse=None,
-                                                  scope='resnet_v1_50')
-
-        intermediate_layers = list()
-        for d in desired_endpoints[:blocks + 1]:
-            intermediate_layers.append(endpoints[d])
-
-        return net, intermediate_layers
diff --git a/dh_segment/network/pretrained_models/__init__.py b/dh_segment/network/pretrained_models/__init__.py
new file mode 100644
index 0000000..c95406f
--- /dev/null
+++ b/dh_segment/network/pretrained_models/__init__.py
@@ -0,0 +1,3 @@
+from .resnet50 import ResnetV1_50
+from .vgg16 import VGG16
+from .mobilenet.encoder import MobileNetV2
\ No newline at end of file
diff --git a/dh_segment/network/pretrained_models/mobilenet/__init__.py b/dh_segment/network/pretrained_models/mobilenet/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/dh_segment/network/pretrained_models/mobilenet/conv_blocks.py b/dh_segment/network/pretrained_models/mobilenet/conv_blocks.py
new file mode 100644
index 0000000..498ce77
--- /dev/null
+++ b/dh_segment/network/pretrained_models/mobilenet/conv_blocks.py
@@ -0,0 +1,358 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Convolution blocks for mobilenet."""
+import contextlib
+import functools
+
+import tensorflow as tf
+
+slim = tf.contrib.slim
+
+
+def _fixed_padding(inputs, kernel_size, rate=1):
+    """Pads the input along the spatial dimensions independently of input size.
+
+  Pads the input such that if it was used in a convolution with 'VALID' padding,
+  the output would have the same dimensions as if the unpadded input was used
+  in a convolution with 'SAME' padding.
+
+  Args:
+    inputs: A tensor of size [batch, height_in, width_in, channels].
+    kernel_size: The kernel to be used in the conv2d or max_pool2d operation.
+    rate: An integer, rate for atrous convolution.
+
+  Returns:
+    output: A tensor of size [batch, height_out, width_out, channels] with the
+      input, either intact (if kernel_size == 1) or padded (if kernel_size > 1).
+  """
+    kernel_size_effective = [kernel_size[0] + (kernel_size[0] - 1) * (rate - 1),
+                             kernel_size[0] + (kernel_size[0] - 1) * (rate - 1)]
+    pad_total = [kernel_size_effective[0] - 1, kernel_size_effective[1] - 1]
+    pad_beg = [pad_total[0] // 2, pad_total[1] // 2]
+    pad_end = [pad_total[0] - pad_beg[0], pad_total[1] - pad_beg[1]]
+    padded_inputs = tf.pad(inputs, [[0, 0], [pad_beg[0], pad_end[0]],
+                                    [pad_beg[1], pad_end[1]], [0, 0]])
+    return padded_inputs
+
+
+def _make_divisible(v, divisor, min_value=None):
+    if min_value is None:
+        min_value = divisor
+    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
+    # Make sure that round down does not go down by more than 10%.
+    if new_v < 0.9 * v:
+        new_v += divisor
+    return new_v
+
+
+def _split_divisible(num, num_ways, divisible_by=8):
+    """Evenly splits num, num_ways so each piece is a multiple of divisible_by."""
+    assert num % divisible_by == 0
+    assert num / num_ways >= divisible_by
+    # Note: want to round down, we adjust each split to match the total.
+    base = num // num_ways // divisible_by * divisible_by
+    result = []
+    accumulated = 0
+    for i in range(num_ways):
+        r = base
+        while accumulated + r < num * (i + 1) / num_ways:
+            r += divisible_by
+        result.append(r)
+        accumulated += r
+    assert accumulated == num
+    return result
+
+
+@contextlib.contextmanager
+def _v1_compatible_scope_naming(scope):
+    if scope is None:  # Create uniqified separable blocks.
+        with tf.variable_scope(None, default_name='separable') as s, \
+                tf.name_scope(s.original_name_scope):
+            yield ''
+    else:
+        # We use scope_depthwise, scope_pointwise for compatibility with V1 ckpts.
+        # which provide numbered scopes.
+        scope += '_'
+        yield scope
+
+
+@slim.add_arg_scope
+def split_separable_conv2d(input_tensor,
+                           num_outputs,
+                           scope=None,
+                           normalizer_fn=None,
+                           stride=1,
+                           rate=1,
+                           endpoints=None,
+                           use_explicit_padding=False):
+    """Separable mobilenet V1 style convolution.
+
+  Depthwise convolution, with default non-linearity,
+  followed by 1x1 depthwise convolution.  This is similar to
+  slim.separable_conv2d, but differs in tha it applies batch
+  normalization and non-linearity to depthwise. This  matches
+  the basic building of Mobilenet Paper
+  (https://arxiv.org/abs/1704.04861)
+
+  Args:
+    input_tensor: input
+    num_outputs: number of outputs
+    scope: optional name of the scope. Note if provided it will use
+    scope_depthwise for deptwhise, and scope_pointwise for pointwise.
+    normalizer_fn: which normalizer function to use for depthwise/pointwise
+    stride: stride
+    rate: output rate (also known as dilation rate)
+    endpoints: optional, if provided, will export additional tensors to it.
+    use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
+      inputs so that the output dimensions are the same as if 'SAME' padding
+      were used.
+
+  Returns:
+    output tesnor
+  """
+
+    with _v1_compatible_scope_naming(scope) as scope:
+        dw_scope = scope + 'depthwise'
+        endpoints = endpoints if endpoints is not None else {}
+        kernel_size = [3, 3]
+        padding = 'SAME'
+        if use_explicit_padding:
+            padding = 'VALID'
+            input_tensor = _fixed_padding(input_tensor, kernel_size, rate)
+        net = slim.separable_conv2d(
+            input_tensor,
+            None,
+            kernel_size,
+            depth_multiplier=1,
+            stride=stride,
+            rate=rate,
+            normalizer_fn=normalizer_fn,
+            padding=padding,
+            scope=dw_scope)
+
+        endpoints[dw_scope] = net
+
+        pw_scope = scope + 'pointwise'
+        net = slim.conv2d(
+            net,
+            num_outputs, [1, 1],
+            stride=1,
+            normalizer_fn=normalizer_fn,
+            scope=pw_scope)
+        endpoints[pw_scope] = net
+    return net
+
+
+def expand_input_by_factor(n, divisible_by=8):
+    return lambda num_inputs, **_: _make_divisible(num_inputs * n, divisible_by)
+
+
+@slim.add_arg_scope
+def expanded_conv(input_tensor,
+                  num_outputs,
+                  expansion_size=expand_input_by_factor(6),
+                  stride=1,
+                  rate=1,
+                  kernel_size=(3, 3),
+                  residual=True,
+                  normalizer_fn=None,
+                  project_activation_fn=tf.identity,
+                  split_projection=1,
+                  split_expansion=1,
+                  expansion_transform=None,
+                  depthwise_location='expansion',
+                  depthwise_channel_multiplier=1,
+                  endpoints=None,
+                  use_explicit_padding=False,
+                  padding='SAME',
+                  scope=None):
+    """Depthwise Convolution Block with expansion.
+
+  Builds a composite convolution that has the following structure
+  expansion (1x1) -> depthwise (kernel_size) -> projection (1x1)
+
+  Args:
+    input_tensor: input
+    num_outputs: number of outputs in the final layer.
+    expansion_size: the size of expansion, could be a constant or a callable.
+      If latter it will be provided 'num_inputs' as an input. For forward
+      compatibility it should accept arbitrary keyword arguments.
+      Default will expand the input by factor of 6.
+    stride: depthwise stride
+    rate: depthwise rate
+    kernel_size: depthwise kernel
+    residual: whether to include residual connection between input
+      and output.
+    normalizer_fn: batchnorm or otherwise
+    project_activation_fn: activation function for the project layer
+    split_projection: how many ways to split projection operator
+      (that is conv expansion->bottleneck)
+    split_expansion: how many ways to split expansion op
+      (that is conv bottleneck->expansion) ops will keep depth divisible
+      by this value.
+    expansion_transform: Optional function that takes expansion
+      as a single input and returns output.
+    depthwise_location: where to put depthwise covnvolutions supported
+      values None, 'input', 'output', 'expansion'
+    depthwise_channel_multiplier: depthwise channel multiplier:
+    each input will replicated (with different filters)
+    that many times. So if input had c channels,
+    output will have c x depthwise_channel_multpilier.
+    endpoints: An optional dictionary into which intermediate endpoints are
+      placed. The keys "expansion_output", "depthwise_output",
+      "projection_output" and "expansion_transform" are always populated, even
+      if the corresponding functions are not invoked.
+    use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
+      inputs so that the output dimensions are the same as if 'SAME' padding
+      were used.
+    padding: Padding type to use if `use_explicit_padding` is not set.
+    scope: optional scope.
+
+  Returns:
+    Tensor of depth num_outputs
+
+  Raises:
+    TypeError: on inval
+  """
+    with tf.variable_scope(scope, default_name='expanded_conv') as s, \
+            tf.name_scope(s.original_name_scope):
+        prev_depth = input_tensor.get_shape().as_list()[3]
+        if depthwise_location not in [None, 'input', 'output', 'expansion']:
+            raise TypeError('%r is unknown value for depthwise_location' %
+                            depthwise_location)
+        if use_explicit_padding:
+            if padding != 'SAME':
+                raise TypeError('`use_explicit_padding` should only be used with '
+                                '"SAME" padding.')
+            padding = 'VALID'
+        depthwise_func = functools.partial(
+            slim.separable_conv2d,
+            num_outputs=None,
+            kernel_size=kernel_size,
+            depth_multiplier=depthwise_channel_multiplier,
+            stride=stride,
+            rate=rate,
+            normalizer_fn=normalizer_fn,
+            padding=padding,
+            scope='depthwise')
+        # b1 -> b2 * r -> b2
+        #   i -> (o * r) (bottleneck) -> o
+        input_tensor = tf.identity(input_tensor, 'input')
+        net = input_tensor
+
+        if depthwise_location == 'input':
+            if use_explicit_padding:
+                net = _fixed_padding(net, kernel_size, rate)
+            net = depthwise_func(net, activation_fn=None)
+
+        if callable(expansion_size):
+            inner_size = expansion_size(num_inputs=prev_depth)
+        else:
+            inner_size = expansion_size
+
+        if inner_size > net.shape[3]:
+            net = split_conv(
+                net,
+                inner_size,
+                num_ways=split_expansion,
+                scope='expand',
+                stride=1,
+                normalizer_fn=normalizer_fn)
+            net = tf.identity(net, 'expansion_output')
+        if endpoints is not None:
+            endpoints['expansion_output'] = net
+
+        if depthwise_location == 'expansion':
+            if use_explicit_padding:
+                net = _fixed_padding(net, kernel_size, rate)
+            net = depthwise_func(net)
+
+        net = tf.identity(net, name='depthwise_output')
+        if endpoints is not None:
+            endpoints['depthwise_output'] = net
+        if expansion_transform:
+            net = expansion_transform(expansion_tensor=net, input_tensor=input_tensor)
+        # Note in contrast with expansion, we always have
+        # projection to produce the desired output size.
+        net = split_conv(
+            net,
+            num_outputs,
+            num_ways=split_projection,
+            stride=1,
+            scope='project',
+            normalizer_fn=normalizer_fn,
+            activation_fn=project_activation_fn)
+        if endpoints is not None:
+            endpoints['projection_output'] = net
+        if depthwise_location == 'output':
+            if use_explicit_padding:
+                net = _fixed_padding(net, kernel_size, rate)
+            net = depthwise_func(net, activation_fn=None)
+
+        if callable(residual):  # custom residual
+            net = residual(input_tensor=input_tensor, output_tensor=net)
+        elif (residual and
+              # stride check enforces that we don't add residuals when spatial
+              # dimensions are None
+              stride == 1 and
+              # Depth matches
+              net.get_shape().as_list()[3] ==
+              input_tensor.get_shape().as_list()[3]):
+            net += input_tensor
+        return tf.identity(net, name='output')
+
+
+def split_conv(input_tensor,
+               num_outputs,
+               num_ways,
+               scope,
+               divisible_by=8,
+               **kwargs):
+    """Creates a split convolution.
+
+  Split convolution splits the input and output into
+  'num_blocks' blocks of approximately the same size each,
+  and only connects $i$-th input to $i$ output.
+
+  Args:
+    input_tensor: input tensor
+    num_outputs: number of output filters
+    num_ways: num blocks to split by.
+    scope: scope for all the operators.
+    divisible_by: make sure that every part is divisiable by this.
+    **kwargs: will be passed directly into conv2d operator
+  Returns:
+    tensor
+  """
+    b = input_tensor.get_shape().as_list()[3]
+
+    if num_ways == 1 or min(b // num_ways,
+                            num_outputs // num_ways) < divisible_by:
+        # Don't do any splitting if we end up with less than 8 filters
+        # on either side.
+        return slim.conv2d(input_tensor, num_outputs, [1, 1], scope=scope, **kwargs)
+
+    outs = []
+    input_splits = _split_divisible(b, num_ways, divisible_by=divisible_by)
+    output_splits = _split_divisible(
+        num_outputs, num_ways, divisible_by=divisible_by)
+    inputs = tf.split(input_tensor, input_splits, axis=3, name='split_' + scope)
+    base = scope
+    for i, (input_tensor, out_size) in enumerate(zip(inputs, output_splits)):
+        scope = base + '_part_%d' % (i,)
+        n = slim.conv2d(input_tensor, out_size, [1, 1], scope=scope, **kwargs)
+        n = tf.identity(n, scope + '_output')
+        outs.append(n)
+    return tf.concat(outs, 3, name=scope + '_concat')
diff --git a/dh_segment/network/pretrained_models/mobilenet/encoder.py b/dh_segment/network/pretrained_models/mobilenet/encoder.py
new file mode 100644
index 0000000..4516b98
--- /dev/null
+++ b/dh_segment/network/pretrained_models/mobilenet/encoder.py
@@ -0,0 +1,53 @@
+from ...model import Encoder
+import tensorflow as tf
+from .mobilenet_v2 import training_scope, mobilenet_base
+from typing import Tuple, Optional, Union, List, Dict
+from tensorflow.contrib import slim
+import os
+from ....utils.misc import get_data_folder, download_file
+import tarfile
+
+
+class MobileNetV2(Encoder):
+    def __init__(self, train_batchnorm: bool=False, weight_decay: float=0.00004, batch_renorm: bool=True):
+        self.train_batchnorm = train_batchnorm
+        self.weight_decay = weight_decay
+        self.batch_renorm = batch_renorm
+        pretrained_dir = os.path.join(get_data_folder(), 'mobilenet_v2')
+        self.pretrained_file = os.path.join(pretrained_dir, 'mobilenet_v2_1.0_224.ckpt')
+        if not os.path.exists(self.pretrained_file+'.index'):
+            print("Could not find pre-trained file {}, downloading it!".format(self.pretrained_file))
+            tar_filename = os.path.join(get_data_folder(), 'resnet_v1_50.tar.gz')
+            download_file('https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.0_224.tgz', tar_filename)
+            tar = tarfile.open(tar_filename)
+            tar.extractall(path=pretrained_dir)
+            tar.close()
+            os.remove(tar_filename)
+            assert os.path.exists(self.pretrained_file+'.index')
+            print('Pre-trained weights downloaded!')
+
+    def __call__(self, images: tf.Tensor, is_training=False) -> List[tf.Tensor]:
+        outputs = []
+
+        with slim.arg_scope(training_scope(weight_decay=self.weight_decay,
+                                           is_training=is_training and self.train_batchnorm)):
+            normalized_images = (images / 127.5) - 1.0
+            outputs.append(normalized_images)
+
+            desired_endpoints = [
+                'layer_2',
+                'layer_4',
+                'layer_7',
+                'layer_14',
+                'layer_18'
+            ]
+
+            _, endpoints = mobilenet_base(normalized_images)
+            for d in desired_endpoints:
+                outputs.append(endpoints[d])
+
+        return outputs
+
+    def pretrained_information(self) -> Tuple[Optional[str], Union[None, List, Dict]]:
+        return self.pretrained_file, [v for v in tf.global_variables()
+                                      if 'MobilenetV2' in v.name and 'renorm' not in v.name]
\ No newline at end of file
diff --git a/dh_segment/network/pretrained_models/mobilenet/mobilenet.py b/dh_segment/network/pretrained_models/mobilenet/mobilenet.py
new file mode 100644
index 0000000..8c47dd9
--- /dev/null
+++ b/dh_segment/network/pretrained_models/mobilenet/mobilenet.py
@@ -0,0 +1,466 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Mobilenet Base Class."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import collections
+import contextlib
+import copy
+import os
+
+import tensorflow as tf
+
+slim = tf.contrib.slim
+
+
+@slim.add_arg_scope
+def apply_activation(x, name=None, activation_fn=None):
+    return activation_fn(x, name=name) if activation_fn else x
+
+
+def _fixed_padding(inputs, kernel_size, rate=1):
+    """Pads the input along the spatial dimensions independently of input size.
+
+  Pads the input such that if it was used in a convolution with 'VALID' padding,
+  the output would have the same dimensions as if the unpadded input was used
+  in a convolution with 'SAME' padding.
+
+  Args:
+    inputs: A tensor of size [batch, height_in, width_in, channels].
+    kernel_size: The kernel to be used in the conv2d or max_pool2d operation.
+    rate: An integer, rate for atrous convolution.
+
+  Returns:
+    output: A tensor of size [batch, height_out, width_out, channels] with the
+      input, either intact (if kernel_size == 1) or padded (if kernel_size > 1).
+  """
+    kernel_size_effective = [kernel_size[0] + (kernel_size[0] - 1) * (rate - 1),
+                             kernel_size[0] + (kernel_size[0] - 1) * (rate - 1)]
+    pad_total = [kernel_size_effective[0] - 1, kernel_size_effective[1] - 1]
+    pad_beg = [pad_total[0] // 2, pad_total[1] // 2]
+    pad_end = [pad_total[0] - pad_beg[0], pad_total[1] - pad_beg[1]]
+    padded_inputs = tf.pad(inputs, [[0, 0], [pad_beg[0], pad_end[0]],
+                                    [pad_beg[1], pad_end[1]], [0, 0]])
+    return padded_inputs
+
+
+def _make_divisible(v, divisor, min_value=None):
+    if min_value is None:
+        min_value = divisor
+    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
+    # Make sure that round down does not go down by more than 10%.
+    if new_v < 0.9 * v:
+        new_v += divisor
+    return new_v
+
+
+@contextlib.contextmanager
+def _set_arg_scope_defaults(defaults):
+    """Sets arg scope defaults for all items present in defaults.
+
+  Args:
+    defaults: dictionary/list of pairs, containing a mapping from
+    function to a dictionary of default args.
+
+  Yields:
+    context manager where all defaults are set.
+  """
+    if hasattr(defaults, 'items'):
+        items = list(defaults.items())
+    else:
+        items = defaults
+    if not items:
+        yield
+    else:
+        func, default_arg = items[0]
+        with slim.arg_scope(func, **default_arg):
+            with _set_arg_scope_defaults(items[1:]):
+                yield
+
+
+@slim.add_arg_scope
+def depth_multiplier(output_params,
+                     multiplier,
+                     divisible_by=8,
+                     min_depth=8,
+                     **unused_kwargs):
+    if 'num_outputs' not in output_params:
+        return
+    d = output_params['num_outputs']
+    output_params['num_outputs'] = _make_divisible(d * multiplier, divisible_by,
+                                                   min_depth)
+
+
+_Op = collections.namedtuple('Op', ['op', 'params', 'multiplier_func'])
+
+
+def op(opfunc, **params):
+    multiplier = params.pop('multiplier_transorm', depth_multiplier)
+    return _Op(opfunc, params=params, multiplier_func=multiplier)
+
+
+class NoOpScope(object):
+    """No-op context manager."""
+
+    def __enter__(self):
+        return None
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        return False
+
+
+def safe_arg_scope(funcs, **kwargs):
+    """Returns `slim.arg_scope` with all None arguments removed.
+
+  Arguments:
+    funcs: Functions to pass to `arg_scope`.
+    **kwargs: Arguments to pass to `arg_scope`.
+
+  Returns:
+    arg_scope or No-op context manager.
+
+  Note: can be useful if None value should be interpreted as "do not overwrite
+    this parameter value".
+  """
+    filtered_args = {name: value for name, value in kwargs.items()
+                     if value is not None}
+    if filtered_args:
+        return slim.arg_scope(funcs, **filtered_args)
+    else:
+        return NoOpScope()
+
+
+@slim.add_arg_scope
+def mobilenet_base(  # pylint: disable=invalid-name
+        inputs,
+        conv_defs,
+        multiplier=1.0,
+        final_endpoint=None,
+        output_stride=None,
+        use_explicit_padding=False,
+        scope=None,
+        is_training=False):
+    """Mobilenet base network.
+
+  Constructs a network from inputs to the given final endpoint. By default
+  the network is constructed in inference mode. To create network
+  in training mode use:
+
+  with slim.arg_scope(mobilenet.training_scope()):
+     logits, endpoints = mobilenet_base(...)
+
+  Args:
+    inputs: a tensor of shape [batch_size, height, width, channels].
+    conv_defs: A list of op(...) layers specifying the net architecture.
+    multiplier: Float multiplier for the depth (number of channels)
+      for all convolution ops. The value must be greater than zero. Typical
+      usage will be to set this value in (0, 1) to reduce the number of
+      parameters or computation cost of the model.
+    final_endpoint: The name of last layer, for early termination for
+    for V1-based networks: last layer is "layer_14", for V2: "layer_20"
+    output_stride: An integer that specifies the requested ratio of input to
+      output spatial resolution. If not None, then we invoke atrous convolution
+      if necessary to prevent the network from reducing the spatial resolution
+      of the activation maps. Allowed values are 1 or any even number, excluding
+      zero. Typical values are 8 (accurate fully convolutional mode), 16
+      (fast fully convolutional mode), and 32 (classification mode).
+
+      NOTE- output_stride relies on all consequent operators to support dilated
+      operators via "rate" parameter. This might require wrapping non-conv
+      operators to operate properly.
+
+    use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
+      inputs so that the output dimensions are the same as if 'SAME' padding
+      were used.
+    scope: optional variable scope.
+    is_training: How to setup batch_norm and other ops. Note: most of the time
+      this does not need be set directly. Use mobilenet.training_scope() to set
+      up training instead. This parameter is here for backward compatibility
+      only. It is safe to set it to the value matching
+      training_scope(is_training=...). It is also safe to explicitly set
+      it to False, even if there is outer training_scope set to to training.
+      (The network will be built in inference mode). If this is set to None,
+      no arg_scope is added for slim.batch_norm's is_training parameter.
+
+  Returns:
+    tensor_out: output tensor.
+    end_points: a set of activations for external use, for example summaries or
+                losses.
+
+  Raises:
+    ValueError: depth_multiplier <= 0, or the target output_stride is not
+                allowed.
+  """
+    if multiplier <= 0:
+        raise ValueError('multiplier is not greater than zero.')
+
+    # Set conv defs defaults and overrides.
+    conv_defs_defaults = conv_defs.get('defaults', {})
+    conv_defs_overrides = conv_defs.get('overrides', {})
+    if use_explicit_padding:
+        conv_defs_overrides = copy.deepcopy(conv_defs_overrides)
+        conv_defs_overrides[
+            (slim.conv2d, slim.separable_conv2d)] = {'padding': 'VALID'}
+
+    if output_stride is not None:
+        if output_stride == 0 or (output_stride > 1 and output_stride % 2):
+            raise ValueError('Output stride must be None, 1 or a multiple of 2.')
+
+    # a) Set the tensorflow scope
+    # b) set padding to default: note we might consider removing this
+    # since it is also set by mobilenet_scope
+    # c) set all defaults
+    # d) set all extra overrides.
+    with _scope_all(scope, default_scope='Mobilenet'), \
+         safe_arg_scope([slim.batch_norm], is_training=is_training), \
+         _set_arg_scope_defaults(conv_defs_defaults), \
+         _set_arg_scope_defaults(conv_defs_overrides):
+        # The current_stride variable keeps track of the output stride of the
+        # activations, i.e., the running product of convolution strides up to the
+        # current network layer. This allows us to invoke atrous convolution
+        # whenever applying the next convolution would result in the activations
+        # having output stride larger than the target output_stride.
+        current_stride = 1
+
+        # The atrous convolution rate parameter.
+        rate = 1
+
+        net = inputs
+        # Insert default parameters before the base scope which includes
+        # any custom overrides set in mobilenet.
+        end_points = {}
+        scopes = {}
+        for i, opdef in enumerate(conv_defs['spec']):
+            params = dict(opdef.params)
+            opdef.multiplier_func(params, multiplier)
+            stride = params.get('stride', 1)
+            if output_stride is not None and current_stride == output_stride:
+                # If we have reached the target output_stride, then we need to employ
+                # atrous convolution with stride=1 and multiply the atrous rate by the
+                # current unit's stride for use in subsequent layers.
+                layer_stride = 1
+                layer_rate = rate
+                rate *= stride
+            else:
+                layer_stride = stride
+                layer_rate = 1
+                current_stride *= stride
+            # Update params.
+            params['stride'] = layer_stride
+            # Only insert rate to params if rate > 1.
+            if layer_rate > 1:
+                params['rate'] = layer_rate
+            # Set padding
+            if use_explicit_padding:
+                if 'kernel_size' in params:
+                    net = _fixed_padding(net, params['kernel_size'], layer_rate)
+                else:
+                    params['use_explicit_padding'] = True
+
+            end_point = 'layer_%d' % (i + 1)
+            try:
+                net = opdef.op(net, **params)
+            except Exception:
+                print('Failed to create op %i: %r params: %r' % (i, opdef, params))
+                raise
+            end_points[end_point] = net
+            scope = os.path.dirname(net.name)
+            scopes[scope] = end_point
+            if final_endpoint is not None and end_point == final_endpoint:
+                break
+
+        # Add all tensors that end with 'output' to
+        # endpoints
+        for t in net.graph.get_operations():
+            scope = os.path.dirname(t.name)
+            bn = os.path.basename(t.name)
+            if scope in scopes and t.name.endswith('output'):
+                end_points[scopes[scope] + '/' + bn] = t.outputs[0]
+        return net, end_points
+
+
+@contextlib.contextmanager
+def _scope_all(scope, default_scope=None):
+    with tf.variable_scope(scope, default_name=default_scope) as s, \
+            tf.name_scope(s.original_name_scope):
+        yield s
+
+
+@slim.add_arg_scope
+def mobilenet(inputs,
+              num_classes=1001,
+              prediction_fn=slim.softmax,
+              reuse=None,
+              scope='Mobilenet',
+              base_only=False,
+              **mobilenet_args):
+    """Mobilenet model for classification, supports both V1 and V2.
+
+  Note: default mode is inference, use mobilenet.training_scope to create
+  training network.
+
+
+  Args:
+    inputs: a tensor of shape [batch_size, height, width, channels].
+    num_classes: number of predicted classes. If 0 or None, the logits layer
+      is omitted and the input features to the logits layer (before dropout)
+      are returned instead.
+    prediction_fn: a function to get predictions out of logits
+      (default softmax).
+    reuse: whether or not the network and its variables should be reused. To be
+      able to reuse 'scope' must be given.
+    scope: Optional variable_scope.
+    base_only: if True will only create the base of the network (no pooling
+    and no logits).
+    **mobilenet_args: passed to mobilenet_base verbatim.
+      - conv_defs: list of conv defs
+      - multiplier: Float multiplier for the depth (number of channels)
+      for all convolution ops. The value must be greater than zero. Typical
+      usage will be to set this value in (0, 1) to reduce the number of
+      parameters or computation cost of the model.
+      - output_stride: will ensure that the last layer has at most total stride.
+      If the architecture calls for more stride than that provided
+      (e.g. output_stride=16, but the architecture has 5 stride=2 operators),
+      it will replace output_stride with fractional convolutions using Atrous
+      Convolutions.
+
+  Returns:
+    logits: the pre-softmax activations, a tensor of size
+      [batch_size, num_classes]
+    end_points: a dictionary from components of the network to the corresponding
+      activation tensor.
+
+  Raises:
+    ValueError: Input rank is invalid.
+  """
+    is_training = mobilenet_args.get('is_training', False)
+    input_shape = inputs.get_shape().as_list()
+    if len(input_shape) != 4:
+        raise ValueError('Expected rank 4 input, was: %d' % len(input_shape))
+
+    with tf.variable_scope(scope, 'Mobilenet', reuse=reuse) as scope:
+        inputs = tf.identity(inputs, 'input')
+        net, end_points = mobilenet_base(inputs, scope=scope, **mobilenet_args)
+        if base_only:
+            return net, end_points
+
+        net = tf.identity(net, name='embedding')
+
+        with tf.variable_scope('Logits'):
+            net = global_pool(net)
+            end_points['global_pool'] = net
+            if not num_classes:
+                return net, end_points
+            net = slim.dropout(net, scope='Dropout', is_training=is_training)
+            # 1 x 1 x num_classes
+            # Note: legacy scope name.
+            logits = slim.conv2d(
+                net,
+                num_classes, [1, 1],
+                activation_fn=None,
+                normalizer_fn=None,
+                biases_initializer=tf.zeros_initializer(),
+                scope='Conv2d_1c_1x1')
+
+            logits = tf.squeeze(logits, [1, 2])
+
+            logits = tf.identity(logits, name='output')
+        end_points['Logits'] = logits
+        if prediction_fn:
+            end_points['Predictions'] = prediction_fn(logits, 'Predictions')
+    return logits, end_points
+
+
+def global_pool(input_tensor, pool_op=tf.nn.avg_pool):
+    """Applies avg pool to produce 1x1 output.
+
+  NOTE: This function is funcitonally equivalenet to reduce_mean, but it has
+  baked in average pool which has better support across hardware.
+
+  Args:
+    input_tensor: input tensor
+    pool_op: pooling op (avg pool is default)
+  Returns:
+    a tensor batch_size x 1 x 1 x depth.
+  """
+    shape = input_tensor.get_shape().as_list()
+    if shape[1] is None or shape[2] is None:
+        kernel_size = tf.convert_to_tensor(
+            [1, tf.shape(input_tensor)[1],
+             tf.shape(input_tensor)[2], 1])
+    else:
+        kernel_size = [1, shape[1], shape[2], 1]
+    output = pool_op(
+        input_tensor, ksize=kernel_size, strides=[1, 1, 1, 1], padding='VALID')
+    # Recover output shape, for unknown shape.
+    output.set_shape([None, 1, 1, None])
+    return output
+
+
+def training_scope(is_training=True,
+                   weight_decay=0.00004,
+                   stddev=0.09,
+                   dropout_keep_prob=0.8,
+                   bn_decay=0.997):
+    """Defines Mobilenet training scope.
+
+  Usage:
+     with tf.contrib.slim.arg_scope(mobilenet.training_scope()):
+       logits, endpoints = mobilenet_v2.mobilenet(input_tensor)
+
+     # the network created will be trainble with dropout/batch norm
+     # initialized appropriately.
+  Args:
+    is_training: if set to False this will ensure that all customizations are
+      set to non-training mode. This might be helpful for code that is reused
+      across both training/evaluation, but most of the time training_scope with
+      value False is not needed. If this is set to None, the parameters is not
+      added to the batch_norm arg_scope.
+
+    weight_decay: The weight decay to use for regularizing the model.
+    stddev: Standard deviation for initialization, if negative uses xavier.
+    dropout_keep_prob: dropout keep probability (not set if equals to None).
+    bn_decay: decay for the batch norm moving averages (not set if equals to
+      None).
+
+  Returns:
+    An argument scope to use via arg_scope.
+  """
+    # Note: do not introduce parameters that would change the inference
+    # model here (for example whether to use bias), modify conv_def instead.
+    batch_norm_params = {
+        'decay': bn_decay,
+        'is_training': is_training
+    }
+    if stddev < 0:
+        weight_intitializer = slim.initializers.xavier_initializer()
+    else:
+        weight_intitializer = tf.truncated_normal_initializer(stddev=stddev)
+
+    # Set weight_decay for weights in Conv and FC layers.
+    with slim.arg_scope(
+            [slim.conv2d, slim.fully_connected, slim.separable_conv2d],
+            weights_initializer=weight_intitializer,
+            normalizer_fn=slim.batch_norm), \
+         slim.arg_scope([mobilenet_base, mobilenet], is_training=is_training), \
+         safe_arg_scope([slim.batch_norm], **batch_norm_params), \
+         safe_arg_scope([slim.dropout], is_training=is_training,
+                        keep_prob=dropout_keep_prob), \
+         slim.arg_scope([slim.conv2d], \
+                        weights_regularizer=slim.l2_regularizer(weight_decay)), \
+         slim.arg_scope([slim.separable_conv2d], weights_regularizer=None) as s:
+        return s
diff --git a/dh_segment/network/pretrained_models/mobilenet/mobilenet_v2.py b/dh_segment/network/pretrained_models/mobilenet/mobilenet_v2.py
new file mode 100644
index 0000000..f2df180
--- /dev/null
+++ b/dh_segment/network/pretrained_models/mobilenet/mobilenet_v2.py
@@ -0,0 +1,219 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Implementation of Mobilenet V2.
+
+Architecture: https://arxiv.org/abs/1801.04381
+
+The base model gives 72.2% accuracy on ImageNet, with 300MMadds,
+3.4 M parameters.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import copy
+import functools
+
+import tensorflow as tf
+
+from . import conv_blocks as ops
+from . import mobilenet as lib
+
+slim = tf.contrib.slim
+op = lib.op
+
+expand_input = ops.expand_input_by_factor
+
+# pyformat: disable
+# Architecture: https://arxiv.org/abs/1801.04381
+V2_DEF = dict(
+    defaults={
+        # Note: these parameters of batch norm affect the architecture
+        # that's why they are here and not in training_scope.
+        (slim.batch_norm,): {'center': True, 'scale': True},
+        (slim.conv2d, slim.fully_connected, slim.separable_conv2d): {
+            'normalizer_fn': slim.batch_norm, 'activation_fn': tf.nn.relu6
+        },
+        (ops.expanded_conv,): {
+            'expansion_size': expand_input(6),
+            'split_expansion': 1,
+            'normalizer_fn': slim.batch_norm,
+            'residual': True
+        },
+        (slim.conv2d, slim.separable_conv2d): {'padding': 'SAME'}
+    },
+    spec=[
+        op(slim.conv2d, stride=2, num_outputs=32, kernel_size=[3, 3]),
+        op(ops.expanded_conv,
+           expansion_size=expand_input(1, divisible_by=1),
+           num_outputs=16),
+        op(ops.expanded_conv, stride=2, num_outputs=24),
+        op(ops.expanded_conv, stride=1, num_outputs=24),
+        op(ops.expanded_conv, stride=2, num_outputs=32),
+        op(ops.expanded_conv, stride=1, num_outputs=32),
+        op(ops.expanded_conv, stride=1, num_outputs=32),
+        op(ops.expanded_conv, stride=2, num_outputs=64),
+        op(ops.expanded_conv, stride=1, num_outputs=64),
+        op(ops.expanded_conv, stride=1, num_outputs=64),
+        op(ops.expanded_conv, stride=1, num_outputs=64),
+        op(ops.expanded_conv, stride=1, num_outputs=96),
+        op(ops.expanded_conv, stride=1, num_outputs=96),
+        op(ops.expanded_conv, stride=1, num_outputs=96),
+        op(ops.expanded_conv, stride=2, num_outputs=160),
+        op(ops.expanded_conv, stride=1, num_outputs=160),
+        op(ops.expanded_conv, stride=1, num_outputs=160),
+        op(ops.expanded_conv, stride=1, num_outputs=320),
+        op(slim.conv2d, stride=1, kernel_size=[1, 1], num_outputs=1280)
+    ],
+)
+
+
+# pyformat: enable
+
+
+@slim.add_arg_scope
+def mobilenet(input_tensor,
+              num_classes=1001,
+              depth_multiplier=1.0,
+              scope='MobilenetV2',
+              conv_defs=None,
+              finegrain_classification_mode=False,
+              min_depth=None,
+              divisible_by=None,
+              activation_fn=None,
+              **kwargs):
+    """Creates mobilenet V2 network.
+
+  Inference mode is created by default. To create training use training_scope
+  below.
+
+  with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope()):
+     logits, endpoints = mobilenet_v2.mobilenet(input_tensor)
+
+  Args:
+    input_tensor: The input tensor
+    num_classes: number of classes
+    depth_multiplier: The multiplier applied to scale number of
+    channels in each layer. Note: this is called depth multiplier in the
+    paper but the name is kept for consistency with slim's model builder.
+    scope: Scope of the operator
+    conv_defs: Allows to override default conv def.
+    finegrain_classification_mode: When set to True, the model
+    will keep the last layer large even for small multipliers. Following
+    https://arxiv.org/abs/1801.04381
+    suggests that it improves performance for ImageNet-type of problems.
+      *Note* ignored if final_endpoint makes the builder exit earlier.
+    min_depth: If provided, will ensure that all layers will have that
+    many channels after application of depth multiplier.
+    divisible_by: If provided will ensure that all layers # channels
+    will be divisible by this number.
+    activation_fn: Activation function to use, defaults to tf.nn.relu6 if not
+      specified.
+    **kwargs: passed directly to mobilenet.mobilenet:
+      prediction_fn- what prediction function to use.
+      reuse-: whether to reuse variables (if reuse set to true, scope
+      must be given).
+  Returns:
+    logits/endpoints pair
+
+  Raises:
+    ValueError: On invalid arguments
+  """
+    if conv_defs is None:
+        conv_defs = V2_DEF
+    if 'multiplier' in kwargs:
+        raise ValueError('mobilenetv2 doesn\'t support generic '
+                         'multiplier parameter use "depth_multiplier" instead.')
+    if finegrain_classification_mode:
+        conv_defs = copy.deepcopy(conv_defs)
+        if depth_multiplier < 1:
+            conv_defs['spec'][-1].params['num_outputs'] /= depth_multiplier
+    if activation_fn:
+        conv_defs = copy.deepcopy(conv_defs)
+        defaults = conv_defs['defaults']
+        conv_defaults = (
+            defaults[(slim.conv2d, slim.fully_connected, slim.separable_conv2d)])
+        conv_defaults['activation_fn'] = activation_fn
+
+    depth_args = {}
+    # NB: do not set depth_args unless they are provided to avoid overriding
+    # whatever default depth_multiplier might have thanks to arg_scope.
+    if min_depth is not None:
+        depth_args['min_depth'] = min_depth
+    if divisible_by is not None:
+        depth_args['divisible_by'] = divisible_by
+
+    with slim.arg_scope((lib.depth_multiplier,), **depth_args):
+        return lib.mobilenet(
+            input_tensor,
+            num_classes=num_classes,
+            conv_defs=conv_defs,
+            scope=scope,
+            multiplier=depth_multiplier,
+            **kwargs)
+
+
+mobilenet.default_image_size = 224
+
+
+def wrapped_partial(func, *args, **kwargs):
+    partial_func = functools.partial(func, *args, **kwargs)
+    functools.update_wrapper(partial_func, func)
+    return partial_func
+
+
+# Wrappers for mobilenet v2 with depth-multipliers. Be noticed that
+# 'finegrain_classification_mode' is set to True, which means the embedding
+# layer will not be shrinked when given a depth-multiplier < 1.0.
+mobilenet_v2_140 = wrapped_partial(mobilenet, depth_multiplier=1.4)
+mobilenet_v2_050 = wrapped_partial(mobilenet, depth_multiplier=0.50,
+                                   finegrain_classification_mode=True)
+mobilenet_v2_035 = wrapped_partial(mobilenet, depth_multiplier=0.35,
+                                   finegrain_classification_mode=True)
+
+
+@slim.add_arg_scope
+def mobilenet_base(input_tensor, depth_multiplier=1.0, **kwargs):
+    """Creates base of the mobilenet (no pooling and no logits) ."""
+    return mobilenet(input_tensor,
+                     depth_multiplier=depth_multiplier,
+                     base_only=True, **kwargs)
+
+
+def training_scope(**kwargs):
+    """Defines MobilenetV2 training scope.
+
+  Usage:
+     with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope()):
+       logits, endpoints = mobilenet_v2.mobilenet(input_tensor)
+
+  with slim.
+
+  Args:
+    **kwargs: Passed to mobilenet.training_scope. The following parameters
+    are supported:
+      weight_decay- The weight decay to use for regularizing the model.
+      stddev-  Standard deviation for initialization, if negative uses xavier.
+      dropout_keep_prob- dropout keep probability
+      bn_decay- decay for the batch norm moving averages.
+
+  Returns:
+    An `arg_scope` to use for the mobilenet v2 model.
+  """
+    return lib.training_scope(**kwargs)
+
+
+__all__ = ['training_scope', 'mobilenet_base', 'mobilenet', 'V2_DEF']
diff --git a/dh_segment/network/pretrained_models/resnet50.py b/dh_segment/network/pretrained_models/resnet50.py
new file mode 100644
index 0000000..f4de0bb
--- /dev/null
+++ b/dh_segment/network/pretrained_models/resnet50.py
@@ -0,0 +1,120 @@
+from tensorflow.contrib import slim, layers
+import tensorflow as tf
+from tensorflow.contrib.slim import nets
+from ..model import Encoder
+import os
+import tarfile
+from ...utils.misc import get_data_folder, download_file
+from .vgg16 import mean_substraction
+
+
+class ResnetV1_50(Encoder):
+    """ResNet-50 implementation
+
+    :ivar train_batchnorm: Option to use batch norm
+    :ivar blocks: number of blocks (resnet blocks)
+    :ivar weight_decay: value of weight decay
+    :ivar batch_renorm: Option to use batch renorm
+    :ivar corrected_version: option to use the original resnet implementation (True) but less efficient than \
+    `slim`'s implementation
+    :ivar pretrained_file: path to the file (.ckpt) containing the pretrained weights
+    """
+    def __init__(self, train_batchnorm: bool=False, blocks: int=4, weight_decay: float=0.0001,
+                 batch_renorm: bool=True, corrected_version: bool=False):
+        self.train_batchnorm = train_batchnorm
+        self.blocks = blocks
+        self.weight_decay = weight_decay
+        self.batch_renorm = batch_renorm
+        self.corrected_version = corrected_version
+        self.pretrained_file = os.path.join(get_data_folder(), 'resnet_v1_50.ckpt')
+        if not os.path.exists(self.pretrained_file):
+            print("Could not find pre-trained file {}, downloading it!".format(self.pretrained_file))
+            tar_filename = os.path.join(get_data_folder(), 'resnet_v1_50.tar.gz')
+            download_file('http://download.tensorflow.org/models/resnet_v1_50_2016_08_28.tar.gz', tar_filename)
+            tar = tarfile.open(tar_filename)
+            tar.extractall(path=get_data_folder())
+            tar.close()
+            os.remove(tar_filename)
+            assert os.path.exists(self.pretrained_file)
+            print('Pre-trained weights downloaded!')
+
+    def pretrained_information(self):
+        return self.pretrained_file, [v for v in tf.global_variables()
+                                      if 'resnet_v1_50' in v.name
+                                      and 'renorm' not in v.name]
+
+    def __call__(self, images: tf.Tensor, is_training=False):
+        outputs = []
+
+        with slim.arg_scope(nets.resnet_v1.resnet_arg_scope(weight_decay=self.weight_decay, batch_norm_decay=0.999)), \
+             slim.arg_scope([layers.batch_norm], renorm_decay=0.95, renorm=self.batch_renorm):
+            mean_substracted_tensor = mean_substraction(images)
+            assert 0 < self.blocks <= 4
+
+            if self.corrected_version:
+                def corrected_resnet_v1_block(scope: str, base_depth: int, num_units: int, stride: int) -> tf.Tensor:
+                    """
+                    Helper function for creating a resnet_v1 bottleneck block.
+
+                    :param scope: The scope of the block.
+                    :param base_depth: The depth of the bottleneck layer for each unit.
+                    :param num_units: The number of units in the block.
+                    :param stride: The stride of the block, implemented as a stride in the last unit.
+                                   All other units have stride=1.
+                    :return: A resnet_v1 bottleneck block.
+                    """
+                    return nets.resnet_utils.Block(scope, nets.resnet_v1.bottleneck, [{
+                        'depth': base_depth * 4,
+                        'depth_bottleneck': base_depth,
+                        'stride': stride
+                    }] + [{
+                        'depth': base_depth * 4,
+                        'depth_bottleneck': base_depth,
+                        'stride': 1
+                    }] * (num_units - 1))
+
+                blocks_list = [
+                    corrected_resnet_v1_block('block1', base_depth=64, num_units=3, stride=1),
+                    corrected_resnet_v1_block('block2', base_depth=128, num_units=4, stride=2),
+                    corrected_resnet_v1_block('block3', base_depth=256, num_units=6, stride=2),
+                    corrected_resnet_v1_block('block4', base_depth=512, num_units=3, stride=2),
+                ]
+                desired_endpoints = [
+                    'resnet_v1_50/conv1',
+                    'resnet_v1_50/block1/unit_3/bottleneck_v1',
+                    'resnet_v1_50/block2/unit_4/bottleneck_v1',
+                    'resnet_v1_50/block3/unit_6/bottleneck_v1',
+                    'resnet_v1_50/block4/unit_3/bottleneck_v1'
+                ]
+            else:
+                blocks_list = [
+                    nets.resnet_v1.resnet_v1_block('block1', base_depth=64, num_units=3, stride=2),
+                    nets.resnet_v1.resnet_v1_block('block2', base_depth=128, num_units=4, stride=2),
+                    nets.resnet_v1.resnet_v1_block('block3', base_depth=256, num_units=6, stride=2),
+                    nets.resnet_v1.resnet_v1_block('block4', base_depth=512, num_units=3, stride=1),
+                ]
+                desired_endpoints = [
+                    'resnet_v1_50/conv1',
+                    'resnet_v1_50/block1/unit_2/bottleneck_v1',
+                    'resnet_v1_50/block2/unit_3/bottleneck_v1',
+                    'resnet_v1_50/block3/unit_5/bottleneck_v1',
+                    'resnet_v1_50/block4/unit_3/bottleneck_v1'
+                ]
+
+            net, endpoints = nets.resnet_v1.resnet_v1(mean_substracted_tensor,
+                                                      blocks=blocks_list[:self.blocks],
+                                                      num_classes=None,
+                                                      is_training=self.train_batchnorm and is_training,
+                                                      global_pool=False,
+                                                      output_stride=None,
+                                                      include_root_block=True,
+                                                      reuse=None,
+                                                      scope='resnet_v1_50')
+
+            # Add standardized original images
+            outputs.append(mean_substracted_tensor/127.0)
+
+            for d in desired_endpoints[:self.blocks + 1]:
+                outputs.append(endpoints[d])
+
+            return outputs
diff --git a/dh_segment/network/pretrained_models/vgg16.py b/dh_segment/network/pretrained_models/vgg16.py
new file mode 100644
index 0000000..30d5954
--- /dev/null
+++ b/dh_segment/network/pretrained_models/vgg16.py
@@ -0,0 +1,77 @@
+from tensorflow.contrib import slim, layers
+import tensorflow as tf
+from tensorflow.contrib.slim import nets
+import numpy as np
+from ..model import Encoder
+import os
+import tarfile
+from ...utils.misc import get_data_folder, download_file
+
+_VGG_MEANS = [123.68, 116.78, 103.94]
+
+
+def mean_substraction(input_tensor, means=_VGG_MEANS):
+    return tf.subtract(input_tensor, np.array(means)[None, None, None, :], name='MeanSubstraction')
+
+
+class VGG16(Encoder):
+    """VGG-16 implementation
+
+    :ivar blocks: number of blocks (vgg blocks)
+    :ivar weight_decay: weight decay value
+    :ivar pretrained_file: path to the file (.ckpt) containing the pretrained weights
+    """
+    def __init__(self, blocks: int=5, weight_decay: float=0.0005):
+        self.blocks = blocks
+        self.weight_decay = weight_decay
+        self.pretrained_file = os.path.join(get_data_folder(), 'vgg_16.ckpt')
+        if not os.path.exists(self.pretrained_file):
+            print("Could not find pre-trained file {}, downloading it!".format(self.pretrained_file))
+            tar_filename = os.path.join(get_data_folder(), 'vgg_16.tar.gz')
+            download_file('http://download.tensorflow.org/models/vgg_16_2016_08_28.tar.gz', tar_filename)
+            tar = tarfile.open(tar_filename)
+            tar.extractall(path=get_data_folder())
+            tar.close()
+            os.remove(tar_filename)
+            assert os.path.exists(self.pretrained_file)
+            print('Pre-trained weights downloaded!')
+
+    def pretrained_information(self):
+        return self.pretrained_file, [v for v in tf.global_variables()
+                                      if 'vgg_16' in v.name
+                                      and 'renorm' not in v.name]
+
+    def __call__(self, images: tf.Tensor, is_training=False):
+        outputs = []
+
+        with slim.arg_scope(nets.vgg.vgg_arg_scope(weight_decay=self.weight_decay)):
+            with tf.variable_scope(None, 'vgg_16', [images]) as sc:
+                input_tensor = mean_substraction(images)
+                outputs.append(input_tensor)
+                end_points_collection = sc.original_name_scope + '_end_points'
+                # Collect outputs for conv2d, fully_connected and max_pool2d.
+                with slim.arg_scope(
+                        [layers.conv2d, layers.fully_connected, layers.max_pool2d],
+                        outputs_collections=end_points_collection):
+                    net = layers.repeat(
+                        input_tensor, 2, layers.conv2d, 64, [3, 3], scope='conv1')
+                    net = layers.max_pool2d(net, [2, 2], scope='pool1')
+                    outputs.append(net)
+                    if self.blocks >= 2:
+                        net = layers.repeat(net, 2, layers.conv2d, 128, [3, 3], scope='conv2')
+                        net = layers.max_pool2d(net, [2, 2], scope='pool2')
+                        outputs.append(net)
+                    if self.blocks >= 3:
+                        net = layers.repeat(net, 3, layers.conv2d, 256, [3, 3], scope='conv3')
+                        net = layers.max_pool2d(net, [2, 2], scope='pool3')
+                        outputs.append(net)
+                    if self.blocks >= 4:
+                        net = layers.repeat(net, 3, layers.conv2d, 512, [3, 3], scope='conv4')
+                        net = layers.max_pool2d(net, [2, 2], scope='pool4')
+                        outputs.append(net)
+                    if self.blocks >= 5:
+                        net = layers.repeat(net, 3, layers.conv2d, 512, [3, 3], scope='conv5')
+                        net = layers.max_pool2d(net, [2, 2], scope='pool5')
+                        outputs.append(net)
+
+                    return outputs
diff --git a/dh_segment/post_processing/binarization.py b/dh_segment/post_processing/binarization.py
index 6f4df98..0345eb5 100644
--- a/dh_segment/post_processing/binarization.py
+++ b/dh_segment/post_processing/binarization.py
@@ -38,7 +38,7 @@ def cleaning_binary(mask: np.ndarray, kernel_size: int=5) -> np.ndarray:
     ksize_close = (kernel_size, kernel_size)
     mask = cv2.morphologyEx((mask.astype(np.uint8, copy=False) * 255), cv2.MORPH_OPEN, kernel=np.ones(ksize_open))
     mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel=np.ones(ksize_close))
-    return mask / 255
+    return np.uint8(mask / 255)
 
 
 def hysteresis_thresholding(probs: np.array, low_threshold: float, high_threshold: float,
diff --git a/dh_segment/post_processing/boxes_detection.py b/dh_segment/post_processing/boxes_detection.py
index 04ce858..8a12d08 100644
--- a/dh_segment/post_processing/boxes_detection.py
+++ b/dh_segment/post_processing/boxes_detection.py
@@ -25,7 +25,7 @@ def find_boxes(boxes_mask: np.ndarray, mode: str= 'min_rectangle', min_area: flo
     assert len(boxes_mask.shape) == 2, \
         'Input mask must be a 2D array ! Mask is now of shape {}'.format(boxes_mask.shape)
 
-    _, contours, _ = cv2.findContours(boxes_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    contours, _ = cv2.findContours(boxes_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
     if contours is None:
         print('No contour found')
         return None
diff --git a/dh_segment/post_processing/polygon_detection.py b/dh_segment/post_processing/polygon_detection.py
index 0614612..a970bec 100644
--- a/dh_segment/post_processing/polygon_detection.py
+++ b/dh_segment/post_processing/polygon_detection.py
@@ -25,6 +25,8 @@ def find_polygonal_regions(image_mask: np.ndarray, min_area: float=0.1, n_max_po
     found_polygons = list()
 
     for c in contours:
+        if len(c) < 3:  # A polygon cannot have less than 3 points
+            continue
         polygon = geometry.Polygon([point[0] for point in c])
         # Check that polygon has area greater than minimal area
         if polygon.area >= min_area*np.prod(image_mask.shape[:2]):
diff --git a/train.py b/dh_segment/train.py
similarity index 76%
rename from train.py
rename to dh_segment/train.py
index 9cf0b44..3211d74 100644
--- a/train.py
+++ b/dh_segment/train.py
@@ -1,122 +1,127 @@
-import os
-import tensorflow as tf
-# Tensorflow logging level
-from logging import WARNING  # import  DEBUG, INFO, ERROR for more/less verbosity
-
-tf.logging.set_verbosity(WARNING)
-from dh_segment import estimator_fn, input, utils
-import json
-from glob import glob
-import numpy as np
-
-try:
-    import better_exceptions
-except ImportError:
-    print('/!\ W -- Not able to import package better_exceptions')
-    pass
-from tqdm import trange
-from sacred import Experiment
-import pandas as pd
-
-ex = Experiment('dhSegment_experiment')
-
-
-@ex.config
-def default_config():
-    train_data = None  # Directory with training data
-    eval_data = None  # Directory with validation data
-    model_output_dir = None  # Directory to output tf model
-    restore_model = False  # Set to true to continue training
-    classes_file = None  # txt file with classes values (unused for REGRESSION)
-    gpu = ''  # GPU to be used for training
-    prediction_type = utils.PredictionType.CLASSIFICATION  # One of CLASSIFICATION, REGRESSION or MULTILABEL
-    pretrained_model_name = 'resnet50'
-    model_params = utils.ModelParams(pretrained_model_name=pretrained_model_name).to_dict()  # Model parameters
-    training_params = utils.TrainingParams().to_dict()  # Training parameters
-    if prediction_type == utils.PredictionType.CLASSIFICATION:
-        assert classes_file is not None
-        model_params['n_classes'] = utils.get_n_classes_from_file(classes_file)
-    elif prediction_type == utils.PredictionType.REGRESSION:
-        model_params['n_classes'] = 1
-    elif prediction_type == utils.PredictionType.MULTILABEL:
-        assert classes_file is not None
-        model_params['n_classes'] = utils.get_n_classes_from_file_multilabel(classes_file)
-
-
-@ex.automain
-def run(train_data, eval_data, model_output_dir, gpu, training_params, _config):
-    # Create output directory
-    if not os.path.isdir(model_output_dir):
-        os.makedirs(model_output_dir)
-    else:
-        assert _config.get('restore_model'), \
-            '{0} already exists, you cannot use it as output directory. ' \
-            'Set "restore_model=True" to continue training, or delete dir "rm -r {0}"'.format(model_output_dir)
-    # Save config
-    with open(os.path.join(model_output_dir, 'config.json'), 'w') as f:
-        json.dump(_config, f, indent=4, sort_keys=True)
-
-    # Create export directory for saved models
-    saved_model_dir = os.path.join(model_output_dir, 'export')
-    if not os.path.isdir(saved_model_dir):
-        os.makedirs(saved_model_dir)
-
-    training_params = utils.TrainingParams.from_dict(training_params)
-
-    session_config = tf.ConfigProto()
-    session_config.gpu_options.visible_device_list = str(gpu)
-    session_config.gpu_options.per_process_gpu_memory_fraction = 0.9
-    estimator_config = tf.estimator.RunConfig().replace(session_config=session_config,
-                                                        save_summary_steps=10,
-                                                        keep_checkpoint_max=1)
-    estimator = tf.estimator.Estimator(estimator_fn.model_fn, model_dir=model_output_dir,
-                                       params=_config, config=estimator_config)
-
-    def get_dirs_or_files(input_data):
-        if os.path.isdir(input_data):
-            train_input, train_labels_input = os.path.join(input_data, 'images'), os.path.join(input_data, 'labels')
-            # Check if training dir exists
-            if not os.path.isdir(train_input):
-                raise FileNotFoundError(train_input)
-            if not os.path.isdir(train_labels_input):
-                raise FileNotFoundError(train_labels_input)
-        elif os.path.isfile(train_data) and train_data.endswith('.csv'):
-            train_input = train_data
-            train_labels_input = None
-        else:
-            raise TypeError('input_data {} is neither a directory nor a csv file'.format(input_data))
-        return train_input, train_labels_input
-
-    train_input, train_labels_input = get_dirs_or_files(train_data)
-    if eval_data is not None:
-        eval_input, eval_labels_input = get_dirs_or_files(eval_data)
-
-    # Configure exporter
-    serving_input_fn = input.serving_input_filename(training_params.input_resized_size)
-    exporter = tf.estimator.BestExporter(serving_input_receiver_fn=serving_input_fn, exports_to_keep=2)
-
-    for i in trange(0, training_params.n_epochs, training_params.evaluate_every_epoch, desc='Evaluated epochs'):
-        estimator.train(input.input_fn(train_input,
-                                       input_label_dir=train_labels_input,
-                                       num_epochs=training_params.evaluate_every_epoch,
-                                       batch_size=training_params.batch_size,
-                                       data_augmentation=training_params.data_augmentation,
-                                       make_patches=training_params.make_patches,
-                                       image_summaries=True,
-                                       params=_config,
-                                       num_threads=32))
-
-        if eval_data is not None:
-            eval_result = estimator.evaluate(input.input_fn(eval_input,
-                                                            input_label_dir=eval_labels_input,
-                                                            batch_size=1,
-                                                            data_augmentation=False,
-                                                            make_patches=False,
-                                                            image_summaries=False,
-                                                            params=_config,
-                                                            num_threads=32))
-        else:
-            eval_result = None
-
-        exporter.export(estimator, saved_model_dir, checkpoint_path=None, eval_result=eval_result,
-                        is_the_final_export=False)
+import os
+import tensorflow as tf
+# Tensorflow logging level
+from logging import WARNING  # import  DEBUG, INFO, ERROR for more/less verbosity
+
+os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  # or any {'0', '1', '2'}
+tf.logging.set_verbosity(WARNING)
+from dh_segment import estimator_fn, utils
+from dh_segment.io import input
+import json
+from tqdm import trange
+from sacred import Experiment
+
+ex = Experiment('dhSegment_experiment')
+
+
+@ex.config
+def default_config():
+    train_data = None  # Directory with training data
+    eval_data = None  # Directory with validation data
+    model_output_dir = None  # Directory to output tf model
+    restore_model = False  # Set to true to continue training
+    classes_file = None  # txt file with classes values (unused for REGRESSION)
+    gpu = ''  # GPU to be used for training
+    prediction_type = utils.PredictionType.CLASSIFICATION  # One of CLASSIFICATION, REGRESSION or MULTILABEL
+    model_params = utils.ModelParams().to_dict()  # Model parameters
+    training_params = utils.TrainingParams().to_dict()  # Training parameters
+    if prediction_type == utils.PredictionType.CLASSIFICATION:
+        assert classes_file is not None
+        model_params['n_classes'] = utils.get_n_classes_from_file(classes_file)
+    elif prediction_type == utils.PredictionType.REGRESSION:
+        model_params['n_classes'] = 1
+    elif prediction_type == utils.PredictionType.MULTILABEL:
+        assert classes_file is not None
+        model_params['n_classes'] = utils.get_n_classes_from_file_multilabel(classes_file)
+
+
+@ex.main
+def run(train_data, eval_data, model_output_dir, gpu, training_params, _config):
+    # Create output directory
+    if not os.path.isdir(model_output_dir):
+        os.makedirs(model_output_dir)
+    else:
+        assert _config.get('restore_model'), \
+            '{0} already exists, you cannot use it as output directory. ' \
+            'Set "restore_model=True" to continue training, or delete dir "rm -r {0}"'.format(model_output_dir)
+    # Save config
+    with open(os.path.join(model_output_dir, 'config.json'), 'w') as f:
+        json.dump(_config, f, indent=4, sort_keys=True)
+
+    # Create export directory for saved models
+    saved_model_dir = os.path.join(model_output_dir, 'export')
+    if not os.path.isdir(saved_model_dir):
+        os.makedirs(saved_model_dir)
+
+    training_params = utils.TrainingParams.from_dict(training_params)
+
+    session_config = tf.ConfigProto()
+    session_config.gpu_options.visible_device_list = str(gpu)
+    session_config.gpu_options.per_process_gpu_memory_fraction = 0.9
+    estimator_config = tf.estimator.RunConfig().replace(session_config=session_config,
+                                                        save_summary_steps=10,
+                                                        keep_checkpoint_max=1)
+    estimator = tf.estimator.Estimator(estimator_fn.model_fn, model_dir=model_output_dir,
+                                       params=_config, config=estimator_config)
+
+    def get_dirs_or_files(input_data):
+        if os.path.isdir(input_data):
+            image_input, labels_input = os.path.join(input_data, 'images'), os.path.join(input_data, 'labels')
+            # Check if training dir exists
+            assert os.path.isdir(image_input), "{} is not a directory".format(image_input)
+            assert os.path.isdir(labels_input), "{} is not a directory".format(labels_input)
+
+        elif os.path.isfile(input_data) and input_data.endswith('.csv'):
+            image_input = input_data
+            labels_input = None
+        else:
+            raise TypeError('input_data {} is neither a directory nor a csv file'.format(input_data))
+        return image_input, labels_input
+
+    train_input, train_labels_input = get_dirs_or_files(train_data)
+    if eval_data is not None:
+        eval_input, eval_labels_input = get_dirs_or_files(eval_data)
+
+    # Configure exporter
+    serving_input_fn = input.serving_input_filename(training_params.input_resized_size)
+    exporter = tf.estimator.BestExporter(serving_input_receiver_fn=serving_input_fn, exports_to_keep=2)
+
+    #if eval_data is not None:
+    #    exporter = tf.estimator.BestExporter(serving_input_receiver_fn=serving_input_fn, exports_to_keep=2)
+    #else:
+    #    exporter = tf.estimator.LatestExporter(name='SimpleExporter', serving_input_receiver_fn=serving_input_fn,
+    #                                           exports_to_keep=5)
+
+    nb_cores = os.cpu_count()
+    if nb_cores:
+        num_threads = min(nb_cores//2, 16)
+    else:
+        num_threads = 4
+
+    for i in trange(0, training_params.n_epochs, training_params.evaluate_every_epoch, desc='Evaluated epochs'):
+        estimator.train(input.input_fn(train_input,
+                                       input_label_dir=train_labels_input,
+                                       num_epochs=training_params.evaluate_every_epoch,
+                                       batch_size=training_params.batch_size,
+                                       data_augmentation=training_params.data_augmentation,
+                                       make_patches=training_params.make_patches,
+                                       image_summaries=True,
+                                       params=_config,
+                                       num_threads=num_threads,
+                                       progressbar_description="Training".format(i)))
+
+        if eval_data is not None:
+            eval_result = estimator.evaluate(input.input_fn(eval_input,
+                                                            input_label_dir=eval_labels_input,
+                                                            batch_size=1,
+                                                            data_augmentation=False,
+                                                            make_patches=False,
+                                                            image_summaries=False,
+                                                            params=_config,
+                                                            num_threads=num_threads,
+                                                            progressbar_description="Evaluation"))
+        else:
+            eval_result = None
+
+        exporter.export(estimator, saved_model_dir, checkpoint_path=None, eval_result=eval_result,
+                        is_the_final_export=False)
diff --git a/dh_segment/utils/__init__.py b/dh_segment/utils/__init__.py
index 07f9d98..ebafdcf 100644
--- a/dh_segment/utils/__init__.py
+++ b/dh_segment/utils/__init__.py
@@ -46,9 +46,6 @@
 
 _PARAMSCONFIG = [
     'PredictionType',
-    'VGG16ModelParams',
-    'ResNetModelParams',
-    'UNetModelParams',
     'ModelParams',
     'TrainingParams'
 ]
diff --git a/dh_segment/utils/labels.py b/dh_segment/utils/labels.py
index 2f35ae6..4bb4277 100644
--- a/dh_segment/utils/labels.py
+++ b/dh_segment/utils/labels.py
@@ -4,6 +4,7 @@
 import tensorflow as tf
 import numpy as np
 import os
+from typing import Tuple
 
 
 def label_image_to_class(label_image: tf.Tensor, classes_file: str) -> tf.Tensor:
@@ -29,6 +30,13 @@ def class_to_label_image(class_label: tf.Tensor, classes_file: str) -> tf.Tensor
 
 
 def multilabel_image_to_class(label_image: tf.Tensor, classes_file: str) -> tf.Tensor:
+    """
+    Combines image annotations with classes info of the txt file to create the input label for the training.
+
+    :param label_image: annotated image [H,W,Ch] or [B,H,W,Ch] (Ch = color channels)
+    :param classes_file: the filename of the txt file containing the class info
+    :return: [H,W,Cl] or [B,H,W,Cl] (Cl = number of classes)
+    """
     classes_color_values, colors_labels = get_classes_color_from_file_multilabel(classes_file)
     # Convert label_image [H,W,3] to the classes [H,W,C],int32 according to the classes [C,3]
     with tf.name_scope('LabelAssign'):
@@ -71,7 +79,15 @@ def get_n_classes_from_file(classes_file: str) -> int:
     return get_classes_color_from_file(classes_file).shape[0]
 
 
-def get_classes_color_from_file_multilabel(classes_file: str) -> np.ndarray:
+def get_classes_color_from_file_multilabel(classes_file: str) -> Tuple[np.ndarray, np.array]:
+    """
+    Get classes and code labels from txt file.
+    This function deals with the case of elements with multiple labels.
+
+    :param classes_file: file containing the classes (usually named *classes.txt*)
+    :return: for each class the RGB color (array size [N, 3]); and the label's code  (array size [N, C]),
+        with N the number of combinations and C the number of classes
+    """
     if not os.path.exists(classes_file):
         raise FileNotFoundError(classes_file)
     result = np.loadtxt(classes_file).astype(np.float32)
diff --git a/dh_segment/utils/misc.py b/dh_segment/utils/misc.py
index 5a1b77a..102a21c 100644
--- a/dh_segment/utils/misc.py
+++ b/dh_segment/utils/misc.py
@@ -5,6 +5,14 @@
 import json
 import pickle
 from hashlib import sha1
+from typing import Any
+import importlib
+import os
+import urllib.request
+import tarfile
+import os
+from tqdm import tqdm
+from random import shuffle
 
 
 def parse_json(filename):
@@ -29,3 +37,62 @@ def dump_pickle(filename, obj):
 
 def hash_dict(params):
     return sha1(json.dumps(params, sort_keys=True).encode()).hexdigest()
+
+
+def shuffled(l: list) -> list:
+    ll = l.copy()
+    shuffle(ll)
+    return ll
+
+
+def get_class_from_name(full_class_name: str) -> Any:
+    """
+    Tries to load the class from its naming, will import the corresponding module.
+    Raises an Error if it does not work.
+
+    :param full_class_name: full name of the class, for instance `foo.bar.Baz`
+    :return: the loaded class
+    """
+    module_name, class_name = full_class_name.rsplit('.', maxsplit=1)
+    # load the module, will raise ImportError if module cannot be loaded
+    m = importlib.import_module(module_name)
+    # get the class, will raise AttributeError if class cannot be found
+    c = getattr(m, class_name)
+    return c
+
+
+def get_data_folder() -> str:
+    folder = os.path.join(os.path.expanduser('~'), '.dh_segment')
+    os.makedirs(folder, exist_ok=True)
+    return folder
+
+
+def download_file(url: str, output_file: str):
+    """
+
+    :param url:
+    :param output_file:
+    :return:
+    """
+    def progress_hook(t):
+        last_b = [0]
+
+        def update_to(b=1, bsize=1, tsize=None):
+            """
+            b  : int, optional
+                Number of blocks transferred so far [default: 1].
+            bsize  : int, optional
+                Size of each block (in tqdm units) [default: 1].
+            tsize  : int, optional
+                Total size (in tqdm units). If [default: None] remains unchanged.
+            """
+            if tsize is not None:
+                t.total = tsize
+            t.update((b - last_b[0]) * bsize)
+            last_b[0] = b
+
+        return update_to
+
+    with tqdm(unit='B', unit_scale=True, unit_divisor=1024, miniters=1,
+              desc="Downloading pre-trained weights") as t:
+        urllib.request.urlretrieve(url, output_file, reporthook=progress_hook(t))
diff --git a/dh_segment/utils/params_config.py b/dh_segment/utils/params_config.py
index e926b06..6ba245f 100644
--- a/dh_segment/utils/params_config.py
+++ b/dh_segment/utils/params_config.py
@@ -2,9 +2,9 @@
 __author__ = "solivr"
 __license__ = "GPL"
 
-import os
-import warnings
-from random import shuffle
+from .misc import get_class_from_name
+from ..network.model import Encoder, Decoder
+from typing import Type, Optional
 
 
 class PredictionType:
@@ -19,7 +19,7 @@ class PredictionType:
     MULTILABEL = 'MULTILABEL'
 
     @classmethod
-    def parse(cls, prediction_type):
+    def parse(cls, prediction_type) -> 'PredictionType':
         if prediction_type == 'CLASSIFICATION':
             return PredictionType.CLASSIFICATION
         elif prediction_type == 'REGRESSION':
@@ -48,104 +48,41 @@ def check_params(self):
         pass
 
 
-class VGG16ModelParams:
-    PRETRAINED_MODEL_FILE = 'pretrained_models/vgg_16.ckpt'
-    INTERMEDIATE_CONV = [
-        [(256, 3)]
-    ]
-    UPSCALE_PARAMS = [
-        [(32, 3)],
-        [(64, 3)],
-        [(128, 3)],
-        [(256, 3)],
-        [(512, 3)],
-        [(512, 3)]
-    ]
-    SELECTED_LAYERS_UPSCALING = [
-        True,
-        True,  # Must have same length as vgg_upscale_params
-        True,
-        True,
-        False,
-        False
-    ]
-    CORRECTED_VERSION = None
-
-
-class ResNetModelParams:
-    PRETRAINED_MODEL_FILE = 'pretrained_models/resnet_v1_50.ckpt'
-    INTERMEDIATE_CONV = None
-    UPSCALE_PARAMS = [
-        # (Filter size (depth bottleneck's output), number of bottleneck)
-        (32, 0),
-        (64, 0),
-        (128, 0),
-        (256, 0),
-        (512, 0)
-    ]
-    SELECTED_LAYERS_UPSCALING = [
-        # Must have the same length as resnet_upscale_params
-        True,
-        True,
-        True,
-        True,
-        True
-    ]
-    CORRECT_VERSION = False
-
-
-class UNetModelParams:
-    PRETRAINED_MODEL_FILE = None
-    INTERMEDIATE_CONV = None
-    UPSCALE_PARAMS = None
-    SELECTED_LAYERS_UPSCALING = None
-    CORRECT_VERSION = False
-
-
 class ModelParams(BaseParams):
-    """Parameters related to the model
-
+    """
+    Parameters related to the model
+    :param encoder_name:
+    :param encoder_params:
+    :param decoder_name:
+    :param decoder_params:
+    :param n_classes:
     """
     def __init__(self, **kwargs):
-        self.batch_norm = kwargs.get('batch_norm', True)  # type: bool
-        self.batch_renorm = kwargs.get('batch_renorm', True)  # type: bool
-        self.weight_decay = kwargs.get('weight_decay', 1e-6)  # type: float
+        self.encoder_network = kwargs.get('encoder_network', 'dh_segment.network.pretrained_models.ResnetV1_50')  # type: str
+        self.encoder_network_params = kwargs.get('encoder_network_params', dict())  # type: dict
+        self.decoder_network = kwargs.get('decoder_network', 'dh_segment.network.SimpleDecoder')  # type: str
+        self.decoder_network_params = kwargs.get('decoder_network_params', {
+            'upsampling_dims': [32, 64, 128, 256, 512]
+        })  # type: dict
+        self.full_network = kwargs.get('full_network', None)  # type: Optional[str]
+        self.full_network_params = kwargs.get('full_network_params', dict())  # type: dict
         self.n_classes = kwargs.get('n_classes', None)  # type: int
-        self.pretrained_model_name = kwargs.get('pretrained_model_name', None)  # type: str
-        self.max_depth = kwargs.get('max_depth', 512)  # type: int
-
-        if self.pretrained_model_name == 'vgg16':
-            model_class = VGG16ModelParams
-        elif self.pretrained_model_name == 'resnet50':
-            model_class = ResNetModelParams
-        elif self.pretrained_model_name == 'unet':
-            model_class = UNetModelParams
-        else:
-            raise NotImplementedError
 
-        self.pretrained_model_file = kwargs.get('pretrained_model_file', model_class.PRETRAINED_MODEL_FILE)
-        self.intermediate_conv = kwargs.get('intermediate_conv', model_class.INTERMEDIATE_CONV)
-        self.upscale_params = kwargs.get('upscale_params', model_class.UPSCALE_PARAMS)
-        self.selected_levels_upscaling = kwargs.get('selected_levels_upscaling', model_class.SELECTED_LAYERS_UPSCALING)
-        self.correct_resnet_version = kwargs.get('correct_resnet_version', model_class.CORRECT_VERSION)
         self.check_params()
 
-    def check_params(self):
-        # Pretrained model name check
-        # assert self.upscale_params is not None and self.selected_levels_upscaling is not None, \
-        #     'Model parameters cannot be None'
-        if self.upscale_params is not None and self.selected_levels_upscaling is not None:
+    def get_encoder(self) -> Type[Encoder]:
+        encoder = get_class_from_name(self.encoder_network)
+        assert issubclass(encoder, Encoder), "{} is not an Encoder".format(encoder)
+        return encoder
 
-            assert len(self.upscale_params) == len(self.selected_levels_upscaling), \
-                'Upscaling levels and selection levels must have the same lengths (in model_params definition), ' \
-                '{} != {}'.format(len(self.upscale_params),
-                                  len(self.selected_levels_upscaling))
+    def get_decoder(self) -> Type[Decoder]:
+        decoder = get_class_from_name(self.decoder_network)
+        assert issubclass(decoder, Decoder), "{} is not a Decoder".format(decoder)
+        return decoder
 
-            # assert os.path.isfile(self.pretrained_model_file), \
-            #     'Pretrained weights file {} not found'.format(self.pretrained_model_file)
-            if not os.path.isfile(self.pretrained_model_file):
-                warnings.warn('WARNING - Default pretrained weights file in {} was not found. '
-                              'Have you changed the default pretrained file ?'.format(self.pretrained_model_file))
+    def check_params(self):
+        self.get_encoder()
+        self.get_decoder()
 
 
 class TrainingParams(BaseParams):
@@ -208,6 +145,7 @@ def __init__(self, **kwargs):
         self.patch_shape = kwargs.get('patch_shape', (300, 300))
         self.input_resized_size = int(kwargs.get('input_resized_size', 72e4))  # (600*1200)
         self.weights_labels = kwargs.get('weights_labels')
+        self.weights_evaluation_miou = kwargs.get('weights_evaluation_miou', None)
         self.training_margin = kwargs.get('training_margin', 16)
         self.local_entropy_ratio = kwargs.get('local_entropy_ratio', 0.)
         self.local_entropy_sigma = kwargs.get('local_entropy_sigma', 3)
@@ -216,4 +154,4 @@ def __init__(self, **kwargs):
     def check_params(self) -> None:
         """Checks if there is no parameter inconsistency
         """
-        assert self.training_margin*2 < min(self.patch_shape)
\ No newline at end of file
+        assert self.training_margin*2 < min(self.patch_shape)
diff --git a/dh_segment_train b/dh_segment_train
new file mode 100644
index 0000000..6beaefd
--- /dev/null
+++ b/dh_segment_train
@@ -0,0 +1,7 @@
+#!/usr/bin/env python
+
+from dh_segment.train import ex
+import sys
+
+if __name__ == '__main__':
+    ex.run_commandline(sys.argv+["--force"])
diff --git a/doc/changelog.rst b/doc/changelog.rst
index 0ecfd19..bc7bc83 100644
--- a/doc/changelog.rst
+++ b/doc/changelog.rst
@@ -2,4 +2,32 @@
 Changelog
 =========
 
-TBC
\ No newline at end of file
+Unreleased
+----------
+
+0.4.0 - 2019-04-10
+------------------
+Added
+^^^^^
+
+* Input data can be a .csv file with format ``<filename-image>,<filename-label>``.
+* ``dh_segment.io.via`` helper functions to generate/export groundtruth from/to VGG Image Annotation tool.
+* ``Point.array_to_point`` to export a ``np.array`` into a list of ``Point``.
+* PAGEXML Regions can now contain a custom attribute (Transkribus output of region annotation)
+* ``Page.to_json()`` method for json formatting.
+
+Changed
+^^^^^^^
+
+* ``tensorflow`` v1.13 and ``opencv`` v4.0 are now used.
+* mIOU metric for evaluation during training (instead of accuracy).
+* TextLines are sorted according to their mean `y` coordinate when exported.
+
+Fixed
+^^^^^
+
+* Variable names typos in ``input.py`` and ``train.py``.
+* Documentation of the quickstart demo.
+
+Removed
+^^^^^^^
diff --git a/doc/reference/io.rst b/doc/reference/io.rst
index 48756f1..f3159a8 100644
--- a/doc/reference/io.rst
+++ b/doc/reference/io.rst
@@ -76,3 +76,8 @@ Input / Output
 .. automodule:: dh_segment.io.PAGE
     :members:
     :undoc-members:
+
+.. automodule:: dh_segment.io.via
+    :members:
+    :undoc-members:
+    :exclude-members: main, init_logger
\ No newline at end of file
diff --git a/doc/start/annotating.rst b/doc/start/annotating.rst
new file mode 100644
index 0000000..8ea11ed
--- /dev/null
+++ b/doc/start/annotating.rst
@@ -0,0 +1,52 @@
+Creating groundtruth data
+-------------------------
+
+Using GIMP or Photoshop
+^^^^^^^^^^^^^^^^^^^^^^^
+Create directly your masks using your favorite image editor. You just have to draw the regions you want to extract
+with a different color for each label.
+
+Using VGG Image Annotator (VIA)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+`VGG Image Annotator (VIA) <http://www.robots.ox.ac.uk/~vgg/software/via/>`_ is an image annotation tool that can be
+used to define regions in an image and create textual descriptions of those regions. You can either use it
+`online <http://www.robots.ox.ac.uk/~vgg/software/via/via.html>`_ or
+`download the application <http://www.robots.ox.ac.uk/~vgg/software/via/downloads/via-2.0.5.zip>`_.
+
+From the exported annotations (in JSON format), you'll have to generate the corresponding image masks.
+See the :ref:`ref_via` in the ``via`` module.
+
+When assigning attributes to your annotated regions, you should favour attributes of type "dropdown", "checkbox"
+and "radio" and avoid "text" type in order to ease the parsing of the exported file (avoid typos and formatting errors).
+
+**Example of how to create individual masks from VIA annotation file**
+
+.. code:: python
+
+    from dh_segment.io import via
+
+    collection = 'mycollection'
+    annotation_file = 'via_sample.json'
+    masks_dir = '/home/project/generated_masks'
+    images_dir = './my_images'
+
+    # Load all the data in the annotation file
+    # (the file may be an exported project or an export of the annotations)
+    via_data = via.load_annotation_data(annotation_file)
+
+    # In the case of an exported project file, you can set ``only_img_annotations=True``
+    # to get only the image annotations
+    via_annotations = via.load_annotation_data(annotation_file, only_img_annotations=True)
+
+    # Collect the annotated regions
+    working_items = via.collect_working_items(via_annotations, collection, images_dir)
+
+    # Collect the attributes and options
+    if '_via_attributes' in via_data.keys():
+        list_attributes = via.parse_via_attributes(via_data['_via_attributes'])
+    else:
+        list_attributes = via.get_via_attributes(via_annotations)
+
+    # Create one mask per option per attribute
+    via.create_masks(masks_dir, working_items, list_attributes, collection)
+
diff --git a/doc/start/demo.rst b/doc/start/demo.rst
index d8b4d74..0a4c68d 100644
--- a/doc/start/demo.rst
+++ b/doc/start/demo.rst
@@ -11,8 +11,12 @@ In order to limit memory usage, the images in the dataset we provide have been d
 
 **How to**
 
+0. If you have not yet done so, clone the repository : ::
+
+    git clone https://github.com/dhlab-epfl/dhSegment.git
+
 1. Get the annotated dataset `here`_, which already contains the folders ``images`` and ``labels``
-for training, validation and testing set. Unzip it into ``model/pages``. ::
+for training, validation and testing set. Unzip it into ``demo/pages``. ::
 
     cd demo/
     wget https://github.com/dhlab-epfl/dhSegment/releases/download/v0.2/pages.zip
diff --git a/doc/start/index.rst b/doc/start/index.rst
index 208f598..e48554d 100644
--- a/doc/start/index.rst
+++ b/doc/start/index.rst
@@ -3,5 +3,6 @@ Quickstart
 
 .. toctree::
     install
+    annotating
     training
     demo
\ No newline at end of file
diff --git a/doc/start/install.rst b/doc/start/install.rst
index 734a3de..391d627 100644
--- a/doc/start/install.rst
+++ b/doc/start/install.rst
@@ -1,24 +1,33 @@
 Installation
 ------------
 
+Using ``pip``
+^^^^^^^^^^^^^
+
+1. Clone the repository using ``git clone https://github.com/dhlab-epfl/dhSegment.git``
+
+2. Create and activate a virtualenv ::
+
+        virtualenv myvirtualenvs/dh_segment
+        source myvirtualenvs/dh_segment/bin/activate
+
+3. Install the dependencies using ``pip`` (this will look for the ``setup.py`` file) ::
+
+        pip install git+https://github.com/dhlab-epfl/dhSegment
+
 Using Anaconda
 ^^^^^^^^^^^^^^
 
-- Install Anaconda or Miniconda
+1. Install Anaconda or Miniconda (`installation procedure <https://conda.io/docs/user-guide/install/index.html#>`_)
 
-- Create a virtual environment with all the packages ``conda env create -f environment.yml``
+2. Clone the repository: ``git clone https://github.com/dhlab-epfl/dhSegment.git``
 
-- Then activate the environment with ``source activate dh_segment``
+3. Create a virtual environment with all the packages: ``conda env create -f environment.yml``
 
-- It might be possible that the following needs to be added to your ``~/.bashrc`` ::
+4. Then activate the environment with ``source activate dh_segment``
 
-    export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64"
-    export CUDA_HOME=/usr/local/cuda
 
-- To be able to import the package (i.e ``import dh_segment``) in your code, you have to run : ::
+5. To be able to import the package (i.e ``import dh_segment``) in your code, you have to run : ::
 
     python setup.py install
 
-
-Using ``pip``
-^^^^^^^^^^^^^
\ No newline at end of file
diff --git a/doc/start/training.rst b/doc/start/training.rst
index 1e21039..f033aec 100644
--- a/doc/start/training.rst
+++ b/doc/start/training.rst
@@ -4,6 +4,8 @@ Training
 .. note:: A good nvidia GPU (6GB RAM at least) is most likely necessary to train your own models. We assume CUDA
     and cuDNN are installed.
 
+**Input data**
+
 You need to have your training data in a folder containing ``images`` folder and ``labels`` folder.
 The pairs (images, labels) need to have the same name (it is not mandatory to have the same extension file,
 however we recommend having the label images as ``.png`` files).
@@ -14,19 +16,62 @@ a specific color.
 .. note:: It is now also possible to use a `csv` file  containing the pairs ``original_image_filename``,
     ``label_image_filename`` as input data.
 
+To input a ``csv`` file instead of the two folders ``images`` and ``labels``,
+the content should be formatted in the following way: ::
+
+    mypath/myfolder/original_image_filename1,mypath/myfolder/label_image_filename1
+    mypath/myfolder/original_image_filename2,mypath/myfolder/label_image_filename2
+
+
+
+**The class.txt file**
+
 The file containing the classes has the format shown below, where each row corresponds to one class
 (including 'negative' or 'background' class) and each row has 3 values for the 3 RGB values.
 Of course each class needs to have a different code. ::
 
-    class.txt
+    classes.txt
 
     0 0 0
     0 255 0
     ...
 
 
+**Config file with ``sacred``**
+
 `sacred`_ package is used to deal with experiments and trainings. Have a look at the documentation to use it properly.
 
 In order to train a model, you should run ``python train.py with <config.json>``
 
-.. _sacred: https://sacred.readthedocs.io/en/latest/quickstart.html
\ No newline at end of file
+.. _sacred: https://sacred.readthedocs.io/en/latest/quickstart.html
+
+
+Multilabel classification training
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+In case you want to be able to assign multiple labels to elements, the ``classes.txt`` file must be changed.
+Besides the color code, you need to add an *attribution* code to each color. The attribution code has length `n_classes`
+and indicates which classes are assigned to the color.
+
+Take for example 3 classes {A, B, C} and the following possible labelling combinations:
+
+- A (color code ``(0 255 0)``) with attribution code ``1 0 0``
+- B (color code ``(255 0 0)``) with attribution code ``0 1 0``
+- C (color code ``(0 0 255)``) with attribution code ``0 0 1``
+- AB (color code ``(128 128 128)``) with attribution code ``1 1 0``
+- BC (color code ``(0 255 255)``) with attribution code ``0 1 1``
+
+The attributions code has value ``1`` when the label is assigned and ``0`` when it's not.
+(The attribution code ``1 0 1`` would mean that the color annotates elements that belong to classes A and C)
+
+In our example the ``classes.txt`` file would then look like : ::
+
+
+    classes.txt
+
+    0 0 0 0 0 0
+    0 255 0 1 0 0
+    255 0 0 0 1 0
+    0 0 255 0 0 1
+    128 128 128 1 1 0
+    0 255 255 0 1 1
diff --git a/environment.yml b/environment.yml
index 05f572f..f032a88 100644
--- a/environment.yml
+++ b/environment.yml
@@ -2,23 +2,24 @@ name: dh_segment
 channels:
   - defaults
 dependencies:
-  - imageio=2.3.0
-  - opencv=3.4.1
-  - pandas=0.23.0
-  - pillow=5.1.0
+  - imageio=2.5.0
+  - numpy=1.16.2
+  - pandas=0.24.2
+  - pillow=5.4.1
   - python=3.6
-  - scikit-image=0.13.1
-  - scikit-learn=0.19.1
-  - scipy=1.1.0
-  - setuptools=39.1.0
+  - scikit-image=0.14.2
+  - scikit-learn=0.20.3
+  - scipy=1.2.1
+  - setuptools=40.8.0
   - shapely=1.6.4
-  - tqdm=4.23.3
+  - tensorflow-gpu==1.13.1
+  - tqdm=4.31.1
+  - requests=2.21.0
   - pip:
     - better-exceptions==0.2.1
-    - sacred==0.7.3
-    - tensorflow-gpu==1.11
-    - sphinx==1.8.1
-    - sphinx-autodoc-typehints==1.3.0
-    - sphinx-rtd-theme==0.4.1
-    - sphinxcontrib-bibtex==0.4.0
-
+    - opencv-python==4.0.1.23
+    - sacred==0.7.4
+    - sphinx
+    - sphinx-autodoc-typehints
+    - sphinx-rtd-theme
+    - sphinxcontrib-bibtex
diff --git a/general_config.json b/general_config.json
index 3101094..a49a268 100644
--- a/general_config.json
+++ b/general_config.json
@@ -14,17 +14,13 @@
       "evaluate_every_epoch" : 10
   },
   "model_params": {
-    "batch_norm": true,
-    "batch_renorm": true,
-    "selected_levels_upscaling": [
-      true,
-      true,
-      true,
-      true,
-      true
-    ]
+    "encoder_network_params": {
+      "weight_decay": 1e-6
+    }
   },
-  "pretrained_model_name" : "resnet50",
   "prediction_type": "CLASSIFICATION",
-  "gpu" : "0"
+  "train_data" : "<your-path>",
+  "eval_data" : "<your-path>",
+  "classes_file" : "<your-path>/classes.txt",
+  "model_output_dir" : "<your-path>"
 }
\ No newline at end of file
diff --git a/pretrained_models/download_resnet_pretrained_model.py b/pretrained_models/download_resnet_pretrained_model.py
deleted file mode 100644
index 42943fe..0000000
--- a/pretrained_models/download_resnet_pretrained_model.py
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/usr/bin/env python
-
-import urllib.request
-import tarfile
-import os
-from tqdm import tqdm
-
-
-def progress_hook(t):
-    last_b = [0]
-
-    def update_to(b=1, bsize=1, tsize=None):
-        """
-        b  : int, optional
-            Number of blocks transferred so far [default: 1].
-        bsize  : int, optional
-            Size of each block (in tqdm units) [default: 1].
-        tsize  : int, optional
-            Total size (in tqdm units). If [default: None] remains unchanged.
-        """
-        if tsize is not None:
-            t.total = tsize
-        t.update((b - last_b[0]) * bsize)
-        last_b[0] = b
-
-    return update_to
-
-
-if __name__ == '__main__':
-    tar_filename = 'resnet_v1_50.tar.gz'
-    with tqdm(unit='B', unit_scale=True, unit_divisor=1024, miniters=1,
-              desc="Downloading pre-trained weights") as t:
-        urllib.request.urlretrieve('http://download.tensorflow.org/models/resnet_v1_50_2016_08_28.tar.gz', tar_filename,
-                                   reporthook=progress_hook(t))
-    tar = tarfile.open(tar_filename)
-    tar.extractall()
-    tar.close()
-    print('Resnet pre-trained weights downloaded!')
-    os.remove(tar_filename)
diff --git a/pretrained_models/download_vgg_pretrained_model.py b/pretrained_models/download_vgg_pretrained_model.py
deleted file mode 100644
index d38d89f..0000000
--- a/pretrained_models/download_vgg_pretrained_model.py
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/usr/bin/env python
-
-import urllib.request
-import tarfile
-import os
-from tqdm import tqdm
-
-
-def progress_hook(t):
-    last_b = [0]
-
-    def update_to(b=1, bsize=1, tsize=None):
-        """
-        b  : int, optional
-            Number of blocks transferred so far [default: 1].
-        bsize  : int, optional
-            Size of each block (in tqdm units) [default: 1].
-        tsize  : int, optional
-            Total size (in tqdm units). If [default: None] remains unchanged.
-        """
-        if tsize is not None:
-            t.total = tsize
-        t.update((b - last_b[0]) * bsize)
-        last_b[0] = b
-
-    return update_to
-
-
-if __name__ == '__main__':
-    tar_filename = 'vgg_16.tar.gz'
-    with tqdm(unit='B', unit_scale=True, unit_divisor=1024, miniters=1,
-              desc="Downloading pre-trained weights") as t:
-        urllib.request.urlretrieve('http://download.tensorflow.org/models/vgg_16_2016_08_28.tar.gz', tar_filename,
-                                   reporthook=progress_hook(t))
-    tar = tarfile.open(tar_filename)
-    tar.extractall()
-    tar.close()
-    print('VGG-16 pre-trained weights downloaded!')
-    os.remove(tar_filename)
diff --git a/setup.py b/setup.py
index aca9532..cc444cb 100644
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,7 @@
 from setuptools import setup, find_packages
 
 setup(name='dh_segment',
-      version='0.3',
+      version='0.4.0',
       license='GPL',
       url='https://github.com/dhlab-epfl/dhSegment',
       description='Generic framework for historical document processing',
@@ -11,23 +11,27 @@
           'Paper': 'https://arxiv.org/abs/1804.10371',
           'Source Code': 'https://github.com/dhlab-epfl/dhSegment'
       },
+      scripts=['dh_segment_train'],
       install_requires=[
-        'tensorflow',
-        'numpy',
-        'imageio',
-        'pandas',
-        'scipy',
-        'shapely',
-        'scikit-learn',
-        'opencv-python',
-        'tqdm',
+        #'tensorflow-gpu==1.13.1',
+        'numpy==1.16.2',
+        'imageio==2.5.0',
+        'pandas==0.24.2',
+        'scipy==1.2.1',
+        'shapely==1.6.4',
+        'scikit-learn==0.20.3',
+        'scikit-image==0.15.0',
+        'opencv-python==4.0.1.23',
+        'tqdm==4.31.1',
+        'sacred==0.7.4',
+        'requests==2.21.0'
       ],
       extras_require={
           'doc': [
-              'sphinx',
-              'sphinx-autodoc-typehints',
-              'sphinx-rtd-theme',
-              'sphinxcontrib-bibtex',
+              'sphinx==1.8.1',
+              'sphinx-autodoc-typehints==1.3.0',
+              'sphinx-rtd-theme==0.4.1',
+              'sphinxcontrib-bibtex==0.4.0',
               'sphinxcontrib-websupport'
           ],
       },