diff --git a/.gitignore b/.gitignore
index 3b8d9dacf..05bb781c1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -115,3 +115,5 @@ data/
 
 \.DS_Store
 
+
+*.ipynb
diff --git a/.pyup.yml b/.pyup.yml
new file mode 100644
index 000000000..f9f59f8a1
--- /dev/null
+++ b/.pyup.yml
@@ -0,0 +1,33 @@
+#Taken from https://pyup.io/docs/bot/config/
+
+# configure updates globally
+# default: all
+# allowed: all, insecure, False
+update: all
+
+# configure dependency pinning globally
+# default: True
+# allowed: True, False
+pin: True
+
+# set the default branch
+# default: empty, the default branch on GitHub
+branch: develop
+
+# update schedule
+# default: empty
+# allowed: "every day", "every week", ..
+schedule: "every day"
+
+# search for requirement files
+# default: True
+# allowed: True, False
+search: True
+
+# configure the branch prefix the bot is using
+# default: pyup-
+branch_prefix: pyup/
+
+# allow to close stale PRs
+# default: True
+close_prs: True
diff --git a/AUTHORS.rst b/AUTHORS.rst
index 8f0028ac2..efb6e7642 100644
--- a/AUTHORS.rst
+++ b/AUTHORS.rst
@@ -9,8 +9,3 @@ Development Lead
 * Bertrand Charpentier <bertrand.charpentier@live.fr>
 * Nathan De Lara <nathan.delara@telecom-paristech.fr>
 * Quentin Lutz <quentin.lutz@telecom-paristech.fr>
-
-Contributors
-------------
-
-* Alexandre Hollocou <alexandre@hollocou.fr>
diff --git a/README.rst b/README.rst
index 934678476..9a02650bc 100644
--- a/README.rst
+++ b/README.rst
@@ -1,3 +1,8 @@
+.. figure:: ../logo_sknetwork.png
+    :align: right
+    :width: 100px
+    :alt: logo sknetwork
+
 ==============
 scikit-network
 ==============
@@ -25,11 +30,25 @@ Graph algorithms
 * Documentation: https://scikit-network.readthedocs.io.
 
 
-Features
---------
+How to use scikit-network?
+--------------------------
+
+Graphs have a unified format, namely ``scipy.sparse.csr_matrix``.
+
+
+About the documentation
+-----------------------
+
+We use the following notations in the documentation:
+
+* :math:`A` denotes the adjacency matrix for undirected and directed graphs.
+
+* :math:`B` denotes the biadjacency matrix for bipartite graphs (possibly non-square).
+
+* :math:`d = A1` or :math:`B1` is the out-degree vector and :math:`D = \text{diag}(d)` the associated diagonal matrix.
+
+* :math:`f = A^T1` or :math:`B^T1` is the in-degree vector and :math:`F = \text{diag}(f)` the associated diagonal matrix.
 
-* Embedding
-* Clustering
-* Hierarchy
+* :math:`w = 1^TA1` or :math:`1 ^TB1` is the total weight of the graph.
 
 
diff --git a/docs/conf.py b/docs/conf.py
index dbce19d43..21e51db8a 100755
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -58,8 +58,8 @@
 
 # General information about the project.
 project = u'scikit-network'
-copyright = u"2018, Bertrand Charpentier"
-author = u"Bertrand Charpentier"
+copyright = u"2019, scikit-network"
+author = u"scikit-network team"
 
 # The version info for the project you're documenting, acts as replacement
 # for |version| and |release|, also used in various other places throughout
@@ -94,7 +94,7 @@
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
 #
-#html_theme = 'alabaster'
+html_theme = 'sphinx_rtd_theme'
 
 # Theme options are theme-specific and customize the look and feel of a
 # theme further.  For a list of options available for each theme, see the
@@ -105,7 +105,7 @@
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
+html_static_path = []
 
 
 # -- Options for HTMLHelp output ---------------------------------------
diff --git a/docs/index.rst b/docs/index.rst
index fe513ea85..1179bef21 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -1,3 +1,8 @@
+.. figure:: ../sknetwork/../logo_sknetwork.png
+    :align: right
+    :width: 200px
+    :alt: logo sknetwork
+
 Welcome to scikit-network's documentation!
 ==========================================
 
@@ -9,8 +14,6 @@ Welcome to scikit-network's documentation!
    installation
    usage
    reference/index
-   reference2
-   modules
    contributing
    authors
    history
diff --git a/docs/reference/clustering.rst b/docs/reference/clustering.rst
index 1bd1f6fbb..2f2eb2ad7 100644
--- a/docs/reference/clustering.rst
+++ b/docs/reference/clustering.rst
@@ -7,18 +7,38 @@ Clustering
 
 Louvain
 -------
-.. automodule:: sknetwork.clustering.louvain
-   :toctree: generated/
+.. automodule:: sknetwork.clustering
 
-.. autoclass:: Louvain
+.. autoclass:: sknetwork.clustering.Louvain
     :members:
 
-.. autoclass:: GreedyModularity
+.. autoclass:: sknetwork.clustering.GreedyModularity
     :members:
 
-.. autoclass:: GreedyModularityJiT
+.. autoclass:: sknetwork.clustering.GreedyModularityNumba
     :members:
 
+Louvain for bipartite graphs
+----------------------------
+
+.. autoclass:: sknetwork.clustering.BiLouvain
+    :members:
+
+.. autoclass:: sknetwork.clustering.GreedyBipartite
+    :members:
+
+.. autoclass:: sknetwork.clustering.GreedyBipartiteNumba
+    :members:
+
+
 Metrics
 -------
+.. autofunction:: sknetwork.clustering.modularity
+
+.. autofunction:: sknetwork.clustering.bimodularity
+
+.. autofunction:: sknetwork.clustering.cocitation_modularity
+
+.. autofunction:: sknetwork.clustering.performance
 
+.. autofunction:: sknetwork.clustering.cocitation_performance
diff --git a/docs/reference/embedding.rst b/docs/reference/embedding.rst
index 2a1bbb77c..ec04509ac 100644
--- a/docs/reference/embedding.rst
+++ b/docs/reference/embedding.rst
@@ -6,17 +6,30 @@ Embedding
 .. currentmodule:: sknetwork
 
 
-Spectral embeddings
--------------------
-.. automodule:: sknetwork.embedding.forwardbackward_embedding
-   :toctree: generated/
+Spectral
+--------
+.. automodule:: sknetwork.embedding
 
-.. autoclass:: ForwardBackwardEmbedding
+.. autoclass:: sknetwork.embedding.SpectralEmbedding
     :members:
 
-.. automodule:: sknetwork.embedding.spectral_embedding
-   :toctree: generated/
+SVD
+---
 
-.. autoclass:: SpectralEmbedding
+.. autoclass:: sknetwork.embedding.GSVDEmbedding
     :members:
 
+Metrics
+-------
+.. autofunction:: sknetwork.embedding.dot_modularity
+
+.. autofunction:: sknetwork.embedding.hscore
+
+
+Randomized methods
+------------------
+.. autofunction:: sknetwork.embedding.randomized_range_finder
+
+.. autofunction:: sknetwork.embedding.randomized_svd
+
+.. autofunction:: sknetwork.embedding.randomized_eig
diff --git a/docs/reference/hierarchical_clustering.rst b/docs/reference/hierarchical_clustering.rst
deleted file mode 100644
index 199860609..000000000
--- a/docs/reference/hierarchical_clustering.rst
+++ /dev/null
@@ -1,25 +0,0 @@
-.. _hierarchical_clustering:
-
-Hierarchical Clustering
-***********************
-
-.. currentmodule:: sknetwork
-
-Agglomerative approach
-----------------------
-.. automodule:: sknetwork.hierarhcical_clustering.agglomerative_clustering
-.. autosummary::
-   :toctree: generated/
-
-.. autofunction:: sknetwork.linkage_clustering
-
-Divisive approach
------------------
-
-Metrics
--------
-.. automodule:: sknetwork.hierarhcical_clustering.metrics
-.. autosummary::
-   :toctree: generated/
-
-.. autofunction:: sknetwork.hierarhcical_clustering.metrics.hierarchical_cost
diff --git a/docs/reference/hierarchy.rst b/docs/reference/hierarchy.rst
new file mode 100644
index 000000000..101983db4
--- /dev/null
+++ b/docs/reference/hierarchy.rst
@@ -0,0 +1,22 @@
+.. _hierarchical_clustering:
+
+Hierarchical clustering
+***********************
+
+.. currentmodule:: sknetwork
+
+Paris
+-----
+.. automodule:: sknetwork.hierarchy
+.. autosummary::
+   :toctree: generated/
+
+.. autoclass:: sknetwork.hierarchy.paris.Paris
+    :members:
+
+
+Metrics
+-------
+.. autofunction:: sknetwork.hierarchy.metrics.dasgupta_cost
+
+.. autofunction:: sknetwork.hierarchy.metrics.tree_sampling_divergence
diff --git a/docs/reference/index.rst b/docs/reference/index.rst
index 64217f278..db97d28f0 100644
--- a/docs/reference/index.rst
+++ b/docs/reference/index.rst
@@ -6,5 +6,7 @@ Reference
 .. toctree::
 
     embedding
-    flat_clustering
-    hierarchical_clustering
+    clustering
+    hierarchy
+    loader
+    toy_graphs
diff --git a/docs/reference/loader.rst b/docs/reference/loader.rst
new file mode 100644
index 000000000..07620aa90
--- /dev/null
+++ b/docs/reference/loader.rst
@@ -0,0 +1,9 @@
+.. _loader:
+
+Loader
+******
+
+.. currentmodule:: sknetwork
+
+.. autoclass:: sknetwork.loader.Parser
+    :members:
diff --git a/docs/reference/toy_graphs.rst b/docs/reference/toy_graphs.rst
new file mode 100644
index 000000000..a5f70dbc9
--- /dev/null
+++ b/docs/reference/toy_graphs.rst
@@ -0,0 +1,12 @@
+.. _toy_graphs:
+
+Toy graphs
+**********
+
+.. currentmodule:: sknetwork
+
+.. autofunction:: sknetwork.toy_graphs.house_graph
+
+.. autofunction:: sknetwork.toy_graphs.karate_club_graph
+
+.. autofunction:: sknetwork.toy_graphs.star_wars_villains_graph
diff --git a/logo_sknetwork.png b/logo_sknetwork.png
new file mode 100644
index 000000000..7966d5380
Binary files /dev/null and b/logo_sknetwork.png differ
diff --git a/make.bat b/make.bat
new file mode 100644
index 000000000..543c6b13b
--- /dev/null
+++ b/make.bat
@@ -0,0 +1,35 @@
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+	set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=source
+set BUILDDIR=build
+
+if "%1" == "" goto help
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+	echo.
+	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+	echo.installed, then set the SPHINXBUILD environment variable to point
+	echo.to the full path of the 'sphinx-build' executable. Alternatively you
+	echo.may add the Sphinx directory to PATH.
+	echo.
+	echo.If you don't have Sphinx installed, grab it from
+	echo.http://sphinx-doc.org/
+	exit /b 1
+)
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
+
+:end
+popd
diff --git a/sknetwork/__init__.py b/sknetwork/__init__.py
index 893f60b6b..c100aa26b 100644
--- a/sknetwork/__init__.py
+++ b/sknetwork/__init__.py
@@ -1,7 +1,14 @@
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-
+"""Top-level package for scikit-network"""
 
-"""Top-level package for scikit-network."""
-
-__author__ = """Bertrand Charpentier"""
-__email__ = 'bertrand.charpentier@live.fr'
+__author__ = """scikit-network team"""
+__email__ = "bonald@enst.fr"
 __version__ = '0.2.0'
+
+from scipy.sparse.csgraph import *
+from sknetwork.toy_graphs import *
+from sknetwork.loader import *
+from sknetwork.clustering import *
+from sknetwork.hierarchy import *
+from sknetwork.embedding import *
\ No newline at end of file
diff --git a/sknetwork/clustering/__init__.py b/sknetwork/clustering/__init__.py
index e69de29bb..422a59ca1 100644
--- a/sknetwork/clustering/__init__.py
+++ b/sknetwork/clustering/__init__.py
@@ -0,0 +1,3 @@
+from sknetwork.clustering.louvain import *
+from sknetwork.clustering.bilouvain import *
+from sknetwork.clustering.metrics import *
diff --git a/sknetwork/clustering/bilouvain.py b/sknetwork/clustering/bilouvain.py
new file mode 100644
index 000000000..7e078293d
--- /dev/null
+++ b/sknetwork/clustering/bilouvain.py
@@ -0,0 +1,464 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Mar 3, 2019
+@author: Nathan de Lara <ndelara@enst.fr>
+"""
+
+try:
+    from numba import njit, prange
+    default = 'numba'
+except ImportError:
+    def njit(func):
+        return func
+    prange = range
+    default = 'python'
+
+import numpy as np
+from scipy import sparse
+from typing import Union
+
+
+class BipartiteGraph:
+    """
+    A class of graphs suitable for the BiLouvain algorithm.
+
+    Attributes
+    ----------
+    n_samples: int,
+        the cardinal of V1 (the set of samples)
+    n_features: int,
+        the cardinal of V2 (the set of features)
+    sample_weights: np.ndarray,
+        the normalized degree vector of the samples
+    feature_weights: np.ndarray,
+        the normalized degree vector of the features
+
+    """
+
+    def __init__(self, biadjacency: sparse.csr_matrix):
+        """
+
+        Parameters
+        ----------
+        biadjacency: biadjacency matrix of the graph as SciPy sparse matrix
+        """
+        self.n_samples: int = biadjacency.shape[0]
+        self.n_features: int = biadjacency.shape[1]
+        self.norm_adjacency: sparse.csr_matrix = biadjacency / biadjacency.data.sum()
+        self.sample_weights: np.ndarray = self.norm_adjacency.dot(np.ones(self.n_features))
+        self.feature_weights: np.ndarray = self.norm_adjacency.T.dot(np.ones(self.n_samples))
+
+    def aggregate(self, sample_membership: sparse.csr_matrix, feature_membership: sparse.csr_matrix):
+        """
+        Aggregates nodes belonging to the same clusters while keeping the bipartite structure.
+
+        Parameters
+        ----------
+        sample_membership:
+            matrix of shape n_samples x n_clusters,
+            row number i is the one-hot cluster vector of sample i
+        feature_membership:
+            matrix of shape n_features x n_clusters,
+            row number i is the one-hot cluster vector of feature i
+
+        Returns
+        -------
+        self: :class:`BipartiteGraph`
+        """
+        self.norm_adjacency = sample_membership.T.dot(self.norm_adjacency.dot(feature_membership)).tocsr()
+        self.sample_weights = np.array(sample_membership.T.dot(self.sample_weights).T)
+        self.feature_weights = np.array(feature_membership.T.dot(self.feature_weights).T)
+        self.n_samples, self.n_features = self.norm_adjacency.shape
+        return self
+
+
+class BiOptimizer:
+    """
+    A generic optimization algorithm.
+
+    Attributes
+    ----------
+    score_: float
+        Total increase of the objective function.
+    sample_labels_: np.ndarray
+        Cluster index of each node in V1.
+    feature_labels_: np.ndarray
+        Cluster index of each node in V2.
+    """
+    def __init__(self):
+        self.score_ = None
+        self.sample_labels_ = None
+        self.feature_labels_ = None
+
+    def fit(self, graph: BipartiteGraph):
+        """Fit the clusters to the objective function.
+
+         Parameters
+         ----------
+         graph:
+             Graph to cluster.
+
+         Returns
+         -------
+         self: :class:`BiOptimizer`
+
+         """
+        return self
+
+
+class GreedyBipartite(BiOptimizer):
+    """
+    A greedy bipartite modularity optimizer.
+
+    Attributes
+    ----------
+    resolution:
+        modularity resolution
+    tol:
+        minimum bimodularity increase to enter a new optimization pass
+    """
+
+    def __init__(self, resolution: float = 1, tol: float = 1e-3):
+        BiOptimizer.__init__(self)
+        self.resolution = resolution
+        self.tol = tol
+
+    def fit(self, graph: BipartiteGraph):
+        """Iterates over the nodes of the graph and moves them to the cluster of highest increase among their neighbors.
+
+        Parameters
+        ----------
+        graph:
+            the graph to cluster
+
+        Returns
+        -------
+        self: :class:`BiOptimizer`
+        """
+        increase: bool = True
+        total_increase: float = 0.
+
+        sample_labels: np.ndarray = np.arange(graph.n_samples)
+        feature_labels: np.ndarray = graph.n_samples + np.arange(graph.n_features)
+        sample_clusters_proba: np.ndarray = np.hstack((graph.sample_weights.copy(), np.zeros(graph.n_features)))
+        feature_clusters_proba: np.ndarray = np.hstack((np.zeros(graph.n_samples), graph.feature_weights.copy()))
+
+        sample_indptr: np.ndarray = graph.norm_adjacency.indptr
+        sample_indices: np.ndarray = graph.norm_adjacency.indices
+        sample_data: np.ndarray = graph.norm_adjacency.data
+
+        transposed_adjacency = graph.norm_adjacency.T.tocsr()
+        feature_indptr: np.ndarray = transposed_adjacency.indptr
+        feature_indices: np.ndarray = transposed_adjacency.indices
+        feature_data: np.ndarray = transposed_adjacency.data
+        del transposed_adjacency
+
+        while increase:
+            increase = False
+            pass_increase: float = 0.
+
+            sample_args = (graph.n_samples, graph.sample_weights, sample_labels, sample_clusters_proba,
+                           sample_indptr, sample_indices, sample_data)
+            feature_args = (graph.n_features, graph.feature_weights, feature_labels, feature_clusters_proba,
+                            feature_indptr, feature_indices, feature_data)
+
+            for source, target in [(sample_args, feature_args), (feature_args, sample_args)]:
+                n_nodes, source_weights, source_labels, source_cluster_proba, indptr, indices, data = source
+                target_labels, target_cluster_proba = target[2], target[3]
+
+                for node in range(n_nodes):
+                    node_cluster: int = source_labels[node]
+                    target_weights: np.ndarray = data[indptr[node]:indptr[node + 1]]
+                    neighbors: np.ndarray = indices[indptr[node]:indptr[node + 1]]
+                    neighbors_clusters: np.ndarray = target_labels[neighbors]
+                    unique_clusters: list = list(set(neighbors_clusters.tolist()) - {node_cluster})
+                    n_clusters: int = len(unique_clusters)
+
+                    if n_clusters > 0:
+                        node_proba: float = source_weights[node]
+                        node_ratio: float = self.resolution * node_proba
+
+                        # total weight of edges to neighbors in the same cluster
+                        out_delta: float = target_weights[neighbors_clusters == node_cluster].sum()
+                        # minus the probability to choose a neighbor in the same cluster (with resolution factor)
+                        out_delta -= node_ratio * target_cluster_proba[node_cluster]
+
+                        local_delta: np.ndarray = np.full(n_clusters, -out_delta)
+
+                        for index_cluster, cluster in enumerate(unique_clusters):
+                            # total weight of edges to neighbors in the candidate cluster
+                            in_delta: float = target_weights[neighbors_clusters == cluster].sum()
+                            # minus the probability to choose a neighbor in the candidate cluster
+                            in_delta -= node_ratio * target_cluster_proba[cluster]
+
+                            local_delta[index_cluster] += in_delta
+
+                        delta_argmax: int = local_delta.argmax()
+                        best_delta: float = local_delta[delta_argmax]
+                        if best_delta > 0:
+                            pass_increase += best_delta
+                            best_cluster = unique_clusters[delta_argmax]
+
+                            source_cluster_proba[node_cluster] -= node_proba
+                            source_cluster_proba[best_cluster] += node_proba
+                            source_labels[node] = best_cluster
+
+            total_increase += pass_increase
+            if pass_increase > self.tol:
+                increase = True
+
+        self.score_ = total_increase
+        _, self.sample_labels_ = np.unique(sample_labels, return_inverse=True)
+        _, self.feature_labels_ = np.unique(feature_labels, return_inverse=True)
+
+        return self
+
+
+@njit
+def fit_core(sample_args, feature_args, resolution, tol):
+    n_samples, n_features = sample_args[0], feature_args[0]
+    sample_weights, feature_weights = sample_args[1], feature_args[1]
+
+    increase = True
+    total_increase = 0.
+
+    sample_labels: np.ndarray = np.arange(n_samples)
+    feature_labels: np.ndarray = n_samples + np.arange(n_features)
+
+    sample_clusters_proba: np.ndarray = np.hstack((sample_weights.copy(), np.zeros(n_features)))
+    feature_clusters_proba: np.ndarray = np.hstack((np.zeros(n_samples), feature_weights.copy()))
+
+    while increase:
+        increase = False
+        pass_increase = 0.
+
+        all_sample_args = (sample_args, sample_labels, sample_clusters_proba)
+        all_feature_args = (feature_args, feature_labels, feature_clusters_proba)
+
+        for source, target in [(all_sample_args, all_feature_args), (all_feature_args, all_sample_args)]:
+            source_args, source_labels, source_clusters_proba = source
+            target_args, target_labels, target_clusters_proba = target
+
+            n_nodes, source_weights, indptr, indices, data = source_args
+            for node in prange(n_nodes):
+                node_cluster: int = source_labels[node]
+                unique_clusters = set(target_labels[indices[indptr[node]:indptr[node + 1]]])
+                unique_clusters.discard(node_cluster)
+                n_clusters = len(unique_clusters)
+
+                if n_clusters > 0:
+                    node_proba: float = source_weights[node]
+                    out_delta: float = resolution * node_proba * target_clusters_proba[node_cluster]
+
+                    unique_clusters_list = list(unique_clusters)
+                    neighbor_cluster_weights = np.full(n_clusters, 0.0)
+
+                    for ix in range(indptr[node], indptr[node + 1]):
+                        neighbor_cluster = target_labels[indices[ix]]
+                        if neighbor_cluster == node_cluster:
+                            out_delta -= data[ix]
+                        else:
+                            neighbor_cluster_ix = unique_clusters_list.index(neighbor_cluster)
+                            neighbor_cluster_weights[neighbor_cluster_ix] += data[ix]
+
+                    best_delta = 0.0
+                    best_cluster = node_cluster
+
+                    for ix, cluster in enumerate(unique_clusters):
+                        in_delta: float = neighbor_cluster_weights[ix]
+                        in_delta -= resolution * node_proba * target_clusters_proba[cluster]
+
+                        local_delta = out_delta + in_delta
+                        if local_delta > best_delta:
+                            best_delta = local_delta
+                            best_cluster = cluster
+
+                    if best_delta > 0:
+                        pass_increase += best_delta
+                        source_clusters_proba[node_cluster] -= node_proba
+                        source_clusters_proba[best_cluster] += node_proba
+                        source_labels[node] = best_cluster
+
+        total_increase += pass_increase
+        if pass_increase > tol:
+            increase = True
+
+    return sample_labels, feature_labels, total_increase
+
+
+class GreedyBipartiteNumba(BiOptimizer):
+    """
+    A greedy modularity optimizer using Numba for enhanced performance.
+
+    Attributes
+    ----------
+    resolution:
+        bimodularity resolution
+    tol:
+        minimum bimodularity increase to enter a new optimization pass
+
+    Notes
+    -----
+    Tested with Numba v0.42.0.
+    """
+
+    def __init__(self, resolution: float = 1., tol: float = 1e-3):
+        BiOptimizer.__init__(self)
+        self.resolution = resolution
+        self.tol = tol
+
+    def fit(self, graph: BipartiteGraph):
+        """Iterates over the nodes of the graph and moves them to the cluster of highest increase among their neighbors.
+
+        Parameters
+        ----------
+        graph:
+            the graph to cluster
+
+        Returns
+        -------
+        self: :class:`BiOptimizer`
+
+        """
+        transposed_adjacency = graph.norm_adjacency.T.tocsr()
+        sample_args = (graph.n_samples, graph.sample_weights,
+                       graph.norm_adjacency.indptr, graph.norm_adjacency.indices, graph.norm_adjacency.data)
+        feature_args = (graph.n_features, graph.feature_weights,
+                        transposed_adjacency.indptr, transposed_adjacency.indices, transposed_adjacency.data)
+
+        sample_labels, feature_labels, total_increase = fit_core(sample_args, feature_args,
+                                                                 self.resolution, self.tol)
+
+        self.score_ = total_increase
+        _, self.sample_labels_ = np.unique(sample_labels, return_inverse=True)
+        _, self.feature_labels_ = np.unique(feature_labels, return_inverse=True)
+
+        return self
+
+
+class BiLouvain:
+    """
+    BiLouvain algorithm for graph clustering in Python (default) and Numba.
+
+    Attributes
+    ----------
+    sample_labels_: np.ndarray
+        Cluster index of each node in V1.
+    feature_labels_: np.ndarray
+        Cluster index of each node in V2.
+    iteration_count_: int
+        Total number of aggregations performed.
+    aggregate_graph_: sparse.csr_matrix
+        Aggregated graph at the end of the algorithm.
+    score_: float
+        objective function value after fit
+    n_clusters_: int
+        number of clusters after fit
+    """
+
+    def __init__(self, algorithm: Union[str, BiOptimizer] = default, resolution: float = 1, tol: float = 1e-3,
+                 agg_tol: float = 1e-3, max_agg_iter: int = -1, verbose: bool = False):
+        """
+        Parameters
+        ----------
+        algorithm:
+            The optimization algorithm.
+            Requires a fit method.
+            Requires score_, sample_labels_,  and labels_ attributes.
+        resolution:
+            Resolution parameter.
+        tol:
+            Minimum increase in the objective function to enter a new optimization pass.
+        agg_tol:
+            Minimum increase in the objective function to enter a new aggregation pass.
+        max_agg_iter:
+            Maximum number of aggregations.
+            A negative value is interpreted as no limit.
+        verbose:
+            Verbose mode.
+        """
+        if type(algorithm) == str:
+            if algorithm == "numba":
+                self.algorithm = GreedyBipartiteNumba(resolution, tol)
+            elif algorithm == "python":
+                self.algorithm = GreedyBipartite(resolution, tol)
+            else:
+                raise ValueError('Unknown algorithm name.')
+        elif isinstance(algorithm, BiOptimizer):
+            self.algorithm = algorithm
+        else:
+            raise TypeError('Algorithm must be a string ("numba" or "python") or a valid algorithm.')
+
+        if type(max_agg_iter) != int:
+            raise TypeError('The maximum number of iterations must be an integer.')
+        self.agg_tol = agg_tol
+        self.max_agg_iter = max_agg_iter
+        self.verbose = verbose
+        self.sample_labels_ = None
+        self.feature_labels_ = None
+        self.iteration_count_ = None
+        self.aggregate_graph_ = None
+        self.score_ = None
+        self.n_clusters_ = None
+
+    def fit(self, biadjacency: sparse.csr_matrix):
+        """Alternates local optimization and aggregation until convergence.
+
+        Parameters
+        ----------
+        biadjacency:
+            adjacency matrix of the graph to cluster, treated as a biadjacency matrix
+
+        Returns
+        -------
+        self: :class:`BiLouvain`
+        """
+        if type(biadjacency) != sparse.csr_matrix:
+            raise TypeError('The adjacency matrix must be in a scipy compressed sparse row (csr) format.')
+        graph = BipartiteGraph(biadjacency)
+        sample_membership = sparse.identity(graph.n_samples, format='csr')
+        feature_membership = sparse.identity(graph.n_features, format='csr')
+        increase: bool = True
+        iteration_count: int = 0
+        if self.verbose:
+            print("Starting with", biadjacency.shape, "nodes")
+
+        self.score_ = 0.
+        while increase:
+            iteration_count += 1
+            self.algorithm.fit(graph)
+            if self.algorithm.score_ - self.score_ <= self.agg_tol:
+                increase = False
+            else:
+                self.score_ = self.algorithm.score_
+
+                sample_row = np.arange(graph.n_samples)
+                sample_col = self.algorithm.sample_labels_
+                sample_data = np.ones(graph.n_samples)
+                sample_agg_membership = sparse.csr_matrix((sample_data, (sample_row, sample_col)))
+                sample_membership = sample_membership.dot(sample_agg_membership)
+
+                feature_row = np.arange(graph.n_features)
+                feature_col = self.algorithm.feature_labels_
+                feature_data = np.ones(graph.n_features)
+                feature_agg_membership = sparse.csr_matrix((feature_data, (feature_row, feature_col)))
+                feature_membership = feature_membership.dot(feature_agg_membership)
+
+                graph.aggregate(sample_agg_membership, feature_agg_membership)
+
+            if self.verbose:
+                print("Iteration", iteration_count, "completed with",
+                      graph.n_samples, graph.n_features, "clusters")
+                print(self.algorithm.score_)
+            if iteration_count == self.max_agg_iter:
+                break
+
+        self.iteration_count_ = iteration_count
+        self.sample_labels_ = sample_membership.indices
+        _, self.sample_labels_ = np.unique(self.sample_labels_, return_inverse=True)
+        self.feature_labels_ = feature_membership.indices
+        _, self.feature_labels_ = np.unique(self.feature_labels_, return_inverse=True)
+        self.n_clusters_ = max(max(self.sample_labels_), max(self.feature_labels_)) + 1
+        self.aggregate_graph_ = graph.norm_adjacency * biadjacency.data.sum()
+        return self
diff --git a/sknetwork/clustering/louvain.py b/sknetwork/clustering/louvain.py
index 6d9e59ca9..a7132e32a 100644
--- a/sknetwork/clustering/louvain.py
+++ b/sknetwork/clustering/louvain.py
@@ -4,122 +4,142 @@
 Created on Nov 2, 2018
 @author: Nathan de Lara <ndelara@enst.fr>
 @author: Quentin Lutz <qlutz@enst.fr>
+@author: Thomas Bonald <tbonald@enst.fr>
 """
 
 try:
     from numba import njit
+    default = 'numba'
 except ImportError:
     def njit(func):
         return func
+    default = 'python'
 
 import numpy as np
 from scipy import sparse
+from typing import Union
 
 
 class NormalizedGraph:
     """
-    A class of graphs suitable for the Louvain algorithm.
+    A class of graphs suitable for the Louvain algorithm. Each node represents a cluster.
 
     Attributes
     ----------
     n_nodes: int
-        the number of nodes in the graph
-    norm_adj: normalized adjacency matrix (summing to 1)
-    node_weights: vector of node weights
+        Number of nodes.
+    norm_adjacency: sparse.csr_matrix
+        Normalized adjacency matrix (sums to 1).
+    node_probs : np.ndarray
+        Distribution of node weights (sums to 1).
     """
 
-    def __init__(self, adj_matrix, node_weights='degree'):
+    def __init__(self, adjacency: sparse.csr_matrix, node_probs: np.ndarray):
         """
-
         Parameters
         ----------
-        adj_matrix: adjacency matrix of the graph as SciPy sparse matrix
-        node_weights: node weights to be used in the second term of the modularity
+        adjacency :
+            Adjacency matrix of the graph.
+        node_probs :
+            Distribution of node weights (sums to 1), used in the second term of modularity.
         """
-        self.n_nodes = adj_matrix.shape[0]
-        self.norm_adj = adj_matrix / adj_matrix.data.sum()
-        if type(node_weights) == np.ndarray:
-            if len(node_weights) != self.n_nodes:
-                raise ValueError('The number of node weights must match the number of nodes.')
-            if any(node_weights < np.zeros(self.n_nodes)):
-                raise ValueError('All node weights must be non-negative.')
-            else:
-                self.node_weights = node_weights
-        elif type(node_weights) == str:
-            if node_weights == 'degree':
-                self.node_weights = self.norm_adj.dot(np.ones(self.n_nodes))
-            elif node_weights == 'uniform':
-                self.node_weights = np.ones(self.n_nodes) / self.n_nodes
-            else:
-                raise ValueError('Unknown distribution type.')
-        else:
-            raise TypeError(
-                'Node weights must be a known distribution ("degree" or "uniform" string) or a custom NumPy array.')
+        self.n_nodes = adjacency.shape[0]
+        self.norm_adjacency = adjacency / adjacency.data.sum()
+        self.node_probs = node_probs.copy()
+
+    def aggregate(self, membership: sparse.csr_matrix):
+        """Aggregates nodes belonging to the same cluster.
 
-    def aggregate(self, membership):
-        """
-        Aggregates nodes belonging to the same clusters.
         Parameters
         ----------
-        membership: scipy sparse matrix of shape n_nodes x n_clusters
+        membership:
+            Entry i,j equals to 1 if node i belongs to cluster j (0 otherwise).
 
         Returns
         -------
-        the aggregated graph
+        The aggregated graph
         """
-        self.norm_adj = membership.T.dot(self.norm_adj.dot(membership)).tocsr()
-        self.node_weights = np.array(membership.T.dot(self.node_weights).T)
-        self.n_nodes = self.norm_adj.shape[0]
+        self.norm_adjacency = membership.T.dot(self.norm_adjacency.dot(membership)).tocsr()
+        self.node_probs = np.array(membership.T.dot(self.node_probs).T)
+        self.n_nodes = self.norm_adjacency.shape[0]
         return self
 
 
-class GreedyModularity:
-    """
-    A greedy modularity optimizer.
+class Optimizer:
+    """A generic optimization algorithm.
 
     Attributes
     ----------
-    score_: total increase of modularity after fitting
-    labels_: partition of the nodes. labels[node] = cluster_index
+    score_: float
+        Total increase of the objective function.
+    labels_: np.ndarray
+        Cluster index of each node.
+    """
+    def __init__(self):
+        self.score_ = None
+        self.labels_ = None
+
+    def fit(self, graph: NormalizedGraph):
+        """Fit the clusters to the objective function.
+
+         Parameters
+         ----------
+         graph:
+             Graph to cluster.
+
+         Returns
+         -------
+         self: :class:̀Optimizer`
+
+         """
+        return self
+
+
+class GreedyModularity(Optimizer):
+    """
+    A greedy modularity optimizer.
     """
 
-    def __init__(self, resolution=1., tol=0., shuffle_nodes=False):
+    def __init__(self, resolution: float = 1, tol: float = 1e-3, shuffle_nodes: bool = False):
         """
 
         Parameters
         ----------
-        resolution: modularity resolution
-        tol: minimum modularity increase to enter a new optimization pass
-        shuffle_nodes: whether to shuffle the nodes before beginning an optimization pass
+        resolution:
+             Resolution parameter (positive)
+        tol:
+            Minimum increase in modularity to enter a new optimization pass.
+        shuffle_nodes:
+            If true, shuffle the nodes before starting a new optimization pass.
         """
+        Optimizer.__init__(self)
         self.resolution = resolution
         self.tol = tol
         self.shuffle_nodes = shuffle_nodes
-        self.score_ = None
-        self.labels_ = None
 
     def fit(self, graph: NormalizedGraph):
-        """
-        Iterates over the nodes of the graph and moves them to the cluster of highest increase among their neighbors.
-        Parameters
-        ----------
-        graph: the graph to cluster
+        """Iterate over the nodes to increase modularity.
 
-        Returns
-        -------
-        self
+         Parameters
+         ----------
+         graph:
+             Graph to cluster.
 
-        """
+         Returns
+         -------
+         self: :class:`Optimizer`
+         """
         increase = True
-        total_increase = 0.
+        total_increase: float = 0
 
         labels: np.ndarray = np.arange(graph.n_nodes)
-        clusters_proba: np.ndarray = graph.node_weights.copy()
-        self_loops: np.ndarray = graph.norm_adj.diagonal()
+        node_probs: np.ndarray = graph.node_probs
+        cluster_probs = node_probs.copy()
+        self_loops: np.ndarray = graph.norm_adjacency.diagonal()
 
         while increase:
             increase = False
-            pass_increase = 0.
+            pass_increase: float = 0
 
             if self.shuffle_nodes:
                 nodes = np.random.permutation(np.arange(graph.n_nodes))
@@ -128,40 +148,41 @@ def fit(self, graph: NormalizedGraph):
 
             for node in nodes:
                 node_cluster: int = labels[node]
-                node_weights: np.ndarray = graph.norm_adj.data[
-                                           graph.norm_adj.indptr[node]:graph.norm_adj.indptr[node + 1]]
-                neighbors: np.ndarray = graph.norm_adj.indices[
-                                        graph.norm_adj.indptr[node]:graph.norm_adj.indptr[node + 1]]
-                neighbors_clusters: np.ndarray = labels[neighbors]
-                unique_clusters: list = list(set(neighbors_clusters.tolist()) - {node_cluster})
-                n_clusters: int = len(unique_clusters)
+                neighbor_weights: np.ndarray = graph.norm_adjacency.data[
+                                           graph.norm_adjacency.indptr[node]:graph.norm_adjacency.indptr[node + 1]]
+                neighbors: np.ndarray = graph.norm_adjacency.indices[
+                                        graph.norm_adjacency.indptr[node]:graph.norm_adjacency.indptr[node + 1]]
+                neighbor_clusters: np.ndarray = labels[neighbors]
+                unique_clusters: list = list(set(neighbor_clusters.tolist()) - {node_cluster})
+                n_clusters = len(unique_clusters)
 
-                if n_clusters > 0:
-                    node_proba: float = graph.node_weights[node]
-                    node_ratio: float = self.resolution * node_proba
+                if n_clusters:
+                    node_prob: float = node_probs[node]
+                    node_prob_res: float = self.resolution * node_prob
 
-                    # node_weights of connections to all other nodes in original cluster
-                    out_delta: float = (self_loops[node] - node_weights.dot(neighbors_clusters == node_cluster))
-                    # proba to choose (node, other_neighbor) among original cluster
-                    out_delta += node_ratio * (clusters_proba[node_cluster] - node_proba)
+                    # total weight of edges to neighbors in the same cluster
+                    out_delta: float = neighbor_weights[neighbor_clusters == node_cluster].sum() - self_loops[node]
+                    # minus the probability to choose a neighbor in the same cluster (with resolution factor)
+                    out_delta -= node_prob_res * (cluster_probs[node_cluster] - node_prob)
 
-                    local_delta: np.ndarray = np.full(n_clusters, out_delta)
+                    local_delta: np.ndarray = np.full(n_clusters, -out_delta)
 
                     for index_cluster, cluster in enumerate(unique_clusters):
-                        # node_weights of connections to all other nodes in candidate cluster
-                        in_delta: float = node_weights.dot(neighbors_clusters == cluster)
-                        # proba to choose (node, other_neighbor) among new cluster
-                        in_delta -= node_ratio * clusters_proba[cluster]
+                        # total weight of edges to neighbors in the candidate cluster
+                        in_delta: float = neighbor_weights[neighbor_clusters == cluster].sum()
+                        # minus the probability to choose a neighbor in the candidate cluster (with resolution factor)
+                        in_delta -= node_prob_res * cluster_probs[cluster]
 
                         local_delta[index_cluster] += in_delta
 
-                    best_delta: float = 2 * max(local_delta)
+                    best_ix: int = local_delta.argmax()
+                    best_delta: float = 2 * local_delta[best_ix]
                     if best_delta > 0:
                         pass_increase += best_delta
-                        best_cluster = unique_clusters[local_delta.argmax()]
+                        best_cluster = unique_clusters[best_ix]
 
-                        clusters_proba[node_cluster] -= node_proba
-                        clusters_proba[best_cluster] += node_proba
+                        cluster_probs[node_cluster] -= node_prob
+                        cluster_probs[best_cluster] += node_prob
                         labels[node] = best_cluster
 
             total_increase += pass_increase
@@ -175,38 +196,48 @@ def fit(self, graph: NormalizedGraph):
 
 
 @njit
-def fit_core(shuffle_nodes, n_nodes, node_weights, resolution, self_loops, tol, indptr, indices, weights):
+def fit_core(resolution: float, tol: float, shuffle_nodes: bool, n_nodes: int, node_probs: np.ndarray,
+             self_loops: np.ndarray, data: np.ndarray, indices: np.ndarray, indptr: np.ndarray) -> (np.ndarray, float):
     """
 
     Parameters
     ----------
-    shuffle_nodes: if True, a random permutation of the node is done. The natural order is used otherwise
-    n_nodes: number of nodes in the graph
-    node_weights: the node weights in the graph
-    resolution: the resolution for the Louvain modularity
-    self_loops: the weights of the self loops for each node
-    tol: the minimum desired increase for each maximization pass
-    indptr: the indptr array from the Scipy CSR adjacency matrix
-    indices: the indices array from the Scipy CSR adjacency matrix
-    weights: the data array from the Scipy CSR adjacency matrix
+    resolution:
+        Resolution parameter (positive).
+    tol:
+        Minimum increase in modularity to enter a new optimization pass.
+    shuffle_nodes:
+        If True, shuffle the nodes before starting a new optimization pass.
+    n_nodes:
+        Number of nodes.
+    node_probs:
+        Distribution of node weights (sums to 1).
+    self_loops:
+        Weights of self loops.
+    data:
+        CSR format data array of the normalized adjacency matrix.
+    indices:
+        CSR format index array of the normalized adjacency matrix.
+    indptr:
+        CSR format index pointer array of the normalized adjacency matrix.
 
     Returns
     -------
-    a tuple consisting of:
-        -the labels found by the algorithm
-        -the score of the algorithm (total modularity increase)
+    labels:
+        Cluster index of each node
+    total_increase:
+        Score of the clustering (total increase in modularity)
     """
-    increase = True
-    total_increase = 0
+    increase: bool = True
+    total_increase: float = 0
 
     labels: np.ndarray = np.arange(n_nodes)
-    clusters_proba: np.ndarray = node_weights.copy()
+    cluster_probs: np.ndarray = node_probs.copy()
 
-    local_cluster_weights = np.full(n_nodes, 0.0)
     nodes = np.arange(n_nodes)
     while increase:
         increase = False
-        pass_increase = 0.
+        pass_increase: float = 0
 
         if shuffle_nodes:
             nodes = np.random.permutation(np.arange(n_nodes))
@@ -214,43 +245,41 @@ def fit_core(shuffle_nodes, n_nodes, node_weights, resolution, self_loops, tol,
         for node in nodes:
             node_cluster = labels[node]
 
-            for k in range(indptr[node], indptr[node + 1]):
-                local_cluster_weights[labels[indices[k]]] += weights[k]
+            neighbor_cluster_weights = np.zeros(n_nodes)
+            for i in range(indptr[node], indptr[node + 1]):
+                neighbor_cluster_weights[labels[indices[i]]] += data[i]
 
             unique_clusters = set(labels[indices[indptr[node]:indptr[node + 1]]])
             unique_clusters.discard(node_cluster)
 
             if len(unique_clusters):
-                node_proba = node_weights[node]
-                node_ratio = resolution * node_proba
+                node_prob = node_probs[node]
+                node_prob_res = resolution * node_prob
 
-                # neighbors_weights of connections to all other nodes in original cluster
-                out_delta = self_loops[node] - local_cluster_weights[node_cluster]
+                # total weight of edges to neighbors in the same cluster
+                out_delta: float = neighbor_cluster_weights[node_cluster] - self_loops[node]
+                # minus the probability to choose a neighbor in the same cluster (with resolution factor)
+                out_delta -= node_prob_res * (cluster_probs[node_cluster] - node_prob)
 
-                # proba to choose (node, other_neighbor) among original cluster
-                out_delta += node_ratio * (clusters_proba[node_cluster] - node_proba)
-
-                best_delta = 0.0
+                best_delta: float = 0
                 best_cluster = node_cluster
 
                 for cluster in unique_clusters:
-                    # neighbors_weights of connections to all other nodes in candidate cluster
-                    in_delta = local_cluster_weights[
-                        cluster]  # np.sum(neighbors_weights[neighbors_clusters == cluster])
-                    local_cluster_weights[cluster] = 0.0
-                    # proba to choose (node, other_neighbor) among new cluster
-                    in_delta -= node_ratio * clusters_proba[cluster]
-                    local_delta = 2 * (out_delta + in_delta)
+                    # total weight of edges to neighbors in the candidate cluster
+                    in_delta: float = neighbor_cluster_weights[cluster]
+                    # minus the probability to choose a neighbor in the candidate cluster (with resolution factor)
+                    in_delta -= node_prob_res * cluster_probs[cluster]
+
+                    local_delta = 2 * (in_delta - out_delta)
                     if local_delta > best_delta:
                         best_delta = local_delta
                         best_cluster = cluster
 
                 if best_delta > 0:
                     pass_increase += best_delta
-                    clusters_proba[node_cluster] -= node_proba
-                    clusters_proba[best_cluster] += node_proba
+                    cluster_probs[node_cluster] -= node_prob
+                    cluster_probs[best_cluster] += node_prob
                     labels[node] = best_cluster
-            local_cluster_weights[node_cluster] = 0.0
 
         total_increase += pass_increase
         if pass_increase > tol:
@@ -258,75 +287,71 @@ def fit_core(shuffle_nodes, n_nodes, node_weights, resolution, self_loops, tol,
     return labels, total_increase
 
 
-class GreedyModularityJiT:
-    """
-    A greedy modularity optimizer using Numba for enhanced performance.
-
-    Tested with Numba v0.40.1.
+class GreedyModularityNumba(Optimizer):
+    """A greedy modularity optimizer using Numba for enhanced performance.
 
     Attributes
     ----------
-    score_: total increase of modularity after fitting
-    labels_: partition of the nodes. labels[node] = cluster_index
+    labels_:
+        Cluster index of each node
+    score_:
+        Score of the clustering (total increase in modularity)
+
+    Notes
+    -----
+    Tested with Numba v0.40.1.
     """
 
-    def __init__(self, resolution=1., tol=0., shuffle_nodes=False):
+    def __init__(self, resolution: float = 1, tol: float = 1e-3, shuffle_nodes: bool = False):
         """
 
         Parameters
         ----------
-        resolution: modularity resolution
-        tol: minimum modularity increase to enter a new optimization pass
-        shuffle_nodes: whether to shuffle the nodes before beginning an optimization pass
+        resolution:
+            Modularity resolution.
+        tol:
+            Minimum increase in modularity to enter a new optimization pass.
+        shuffle_nodes:
+            If True, shuffle the nodes before each optimization pass.
         """
+        Optimizer.__init__(self)
         self.resolution = resolution
         self.tol = tol
         self.shuffle_nodes = shuffle_nodes
-        self.score_ = None
-        self.labels_ = None
 
     def fit(self, graph: NormalizedGraph):
-        """
-        Iterates over the nodes of the graph and moves them to the cluster of highest increase among their neighbors.
-        Parameters
-        ----------
-        graph: the graph to cluster
+        """Iterate over the nodes to increase modularity.
 
-        Returns
-        -------
-        self
+         Parameters
+         ----------
+         graph:
+             Graph to cluster.
 
-        """
-        self_loops: np.ndarray = graph.norm_adj.diagonal()
-
-        res_labels, total_increase = fit_core(self.shuffle_nodes,
-                                              graph.n_nodes,
-                                              graph.node_weights,
-                                              self.resolution,
-                                              self_loops,
-                                              self.tol,
-                                              graph.norm_adj.indptr,
-                                              graph.norm_adj.indices,
-                                              graph.norm_adj.data)
+         Returns
+         -------
+         self: :class:`Optimizer`
+         """
+        labels, total_increase = fit_core(self.resolution, self.tol, self.shuffle_nodes, graph.n_nodes,
+                                          graph.node_probs, graph.norm_adjacency.diagonal(), graph.norm_adjacency.data,
+                                          graph.norm_adjacency.indices, graph.norm_adjacency.indptr)
 
         self.score_ = total_increase
-        _, self.labels_ = np.unique(res_labels, return_inverse=True)
+        _, self.labels_ = np.unique(labels, return_inverse=True)
 
         return self
 
 
 class Louvain:
-    """
-    Macro algorithm for Louvain clustering.
-
-    Several versions of the Greedy Modularity Maximization are available.
-    Those include a pure Python version which is used by default.
-    A Numba version named 'GreedyModularityJiT' is also available.
+    """Louvain algorithm for graph clustering in Python (default) and Numba.
 
     Attributes
     ----------
-    labels_: partition of the nodes. labels[node] = cluster_index
-    iteration_count_: number of aggregations performed during the last run of the "fit" method
+    labels_: np.ndarray
+        Cluster index of each node.
+    iteration_count_: int
+        Total number of aggregations performed.
+    aggregate_graph_: sparse.csr_matrix
+        Aggregated graph at the end of the algorithm.
 
     Example
     -------
@@ -334,8 +359,8 @@ class Louvain:
     >>> graph = sparse.identity(3, format='csr')
     >>> (louvain.fit(graph).labels_ == np.array([0, 1, 2])).all()
     True
-    >>> louvain_jit = Louvain(algorithm=GreedyModularityJiT())
-    >>> (louvain_jit.fit(graph).labels_ == np.array([0, 1, 2])).all()
+    >>> louvain_numba = Louvain(algorithm=GreedyModularityNumba())
+    >>> (louvain_numba.fit(graph).labels_ == np.array([0, 1, 2])).all()
     True
 
     References
@@ -345,54 +370,105 @@ class Louvain:
     Journal of statistical mechanics: theory and experiment, 2008
     """
 
-    def __init__(self, algorithm=GreedyModularity(), tol=0., max_agg_iter: int = -1, verbose=0):
+    def __init__(self, algorithm: Union[str, Optimizer] = default, resolution: float = 1, tol: float = 1e-3,
+                 shuffle_nodes: bool = False, agg_tol: float = 1e-3, max_agg_iter: int = -1, verbose: bool = False):
         """
 
         Parameters
         ----------
-        algorithm: the fixed level optimization algorithm, requires a fit method and score_ and labels_ attributes.
-        tol: the minimum modularity increase to keep aggregating.
-        max_agg_iter: the maximum number of aggregations to perform, a negative value is interpreted as no limit
-        verbose: enables verbosity
+        algorithm:
+            The optimization algorithm.
+            Requires a fit method.
+            Requires score_  and labels_ attributes.
+        resolution:
+            Resolution parameter.
+        tol:
+            Minimum increase in the objective function to enter a new optimization pass.
+        shuffle_nodes:
+            If True, shuffle the nodes before each optimization pass.
+        agg_tol:
+            Minimum increase in the objective function to enter a new aggregation pass.
+        max_agg_iter:
+            Maximum number of aggregations.
+            A negative value is interpreted as no limit.
+        verbose:
+            Verbose mode.
         """
-        self.algorithm = algorithm
-        self.tol = tol
+        if type(algorithm) == str:
+            if algorithm == "numba":
+                self.algorithm = GreedyModularityNumba(resolution, tol, shuffle_nodes)
+            elif algorithm == "python":
+                self.algorithm = GreedyModularity(resolution, tol, shuffle_nodes)
+            else:
+                raise ValueError('Unknown algorithm name.')
+        elif isinstance(algorithm, Optimizer):
+            self.algorithm = algorithm
+        else:
+            raise TypeError('Algorithm must be a string ("numba" or "python") or a valid algorithm.')
+
         if type(max_agg_iter) != int:
-            raise TypeError('The maximum number of iterations must be a integer')
+            raise TypeError('The maximum number of iterations must be an integer.')
+        self.agg_tol = agg_tol
         self.max_agg_iter = max_agg_iter
         self.verbose = verbose
         self.labels_ = None
         self.iteration_count_ = None
+        self.aggregate_graph_ = None
+
+    def fit(self, adjacency: sparse.csr_matrix, node_weights: Union[str, np.ndarray] = 'degree') -> 'Louvain':
+        """Clustering using chosen Optimizer.
 
-    def fit(self, adj_matrix: sparse.csr_matrix, node_weights="degree"):
-        """
-        Alternates local optimization and aggregation until convergence.
         Parameters
         ----------
-        adj_matrix: adjacency matrix of the graph to cluster
-        node_weights: node node_weights distribution to be used in the second term of the modularity
+        adjacency :
+            Adjacency matrix of the graph to cluster.
+        node_weights :
+            Node weights used in the second term of modularity.
 
         Returns
         -------
-        self
+        self: :class:`Louvain`
         """
-        if type(adj_matrix) != sparse.csr_matrix:
+        if type(adjacency) != sparse.csr_matrix:
             raise TypeError('The adjacency matrix must be in a scipy compressed sparse row (csr) format.')
-        # check that the graph is not directed
-        if adj_matrix.shape[0] != adj_matrix.shape[1]:
+        if adjacency.shape[0] != adjacency.shape[1]:
             raise ValueError('The adjacency matrix must be square.')
-        if (adj_matrix != adj_matrix.T).nnz != 0:
-            raise ValueError('The graph must not be directed. Please fit a symmetric adjacency matrix.')
-        graph = NormalizedGraph(adj_matrix, node_weights)
+        if (adjacency != adjacency.T).nnz != 0:
+            raise ValueError('The graph cannot be directed. Please fit a symmetric adjacency matrix.')
+
+        n_nodes = adjacency.shape[0]
+
+        if type(node_weights) == np.ndarray:
+            if len(node_weights) != n_nodes:
+                raise ValueError('The number of node weights must match the number of nodes.')
+            else:
+                node_weights_vec = node_weights
+        elif type(node_weights) == str:
+            if node_weights == 'degree':
+                node_weights_vec = adjacency.dot(np.ones(n_nodes))
+            elif node_weights == 'uniform':
+                node_weights_vec = np.ones(n_nodes)
+            else:
+                raise ValueError('Unknown distribution of node weights.')
+        else:
+            raise TypeError(
+                'Node weights must be a known distribution ("degree" or "uniform" string) or a custom NumPy array.')
+
+        if np.any(node_weights_vec <= 0):
+            raise ValueError('All node weights must be positive.')
+        else:
+            node_weights_vec = node_weights_vec / np.sum(node_weights_vec)
+
+        graph = NormalizedGraph(adjacency, node_weights_vec)
         membership = sparse.identity(graph.n_nodes, format='csr')
         increase = True
         iteration_count = 0
         if self.verbose:
-            print("Starting with", graph.n_nodes, "nodes")
+            print("Starting with", graph.n_nodes, "nodes.")
         while increase:
             iteration_count += 1
             self.algorithm.fit(graph)
-            if self.algorithm.score_ <= self.tol:
+            if self.algorithm.score_ <= self.agg_tol:
                 increase = False
             else:
                 row = np.arange(graph.n_nodes)
@@ -412,4 +488,5 @@ def fit(self, adj_matrix: sparse.csr_matrix, node_weights="degree"):
         self.iteration_count_ = iteration_count
         self.labels_ = membership.indices
         _, self.labels_ = np.unique(self.labels_, return_inverse=True)
+        self.aggregate_graph_ = graph.norm_adjacency * adjacency.data.sum()
         return self
diff --git a/sknetwork/clustering/metrics.py b/sknetwork/clustering/metrics.py
index 620c25893..50e8b3521 100644
--- a/sknetwork/clustering/metrics.py
+++ b/sknetwork/clustering/metrics.py
@@ -8,32 +8,36 @@
 import numpy as np
 
 from itertools import combinations
-from scipy import errstate, sqrt, isinf, sparse
+from scipy import sparse
+from typing import Union
 
 
-def modularity(adjacency_matrix, partition, resolution=1.0) -> float:
+def modularity(adjacency: Union[sparse.csr_matrix, np.ndarray], partition: Union[dict, np.ndarray],
+               resolution: float = 1) -> float:
     """
     Compute the modularity of a node partition.
+
+    :math:`Q = \\sum_{ij}(\\dfrac{A_{ij}}{w} - \\gamma \\dfrac{d_id_j}{w^2})\\delta_{ij}`
+
     Parameters
     ----------
-    partition: dict or np.ndarray
-       The partition of the nodes.
-       The keys of the dictionary correspond to the nodes and the values to the communities.
-    adjacency_matrix: scipy.csr_matrix or np.ndarray
+    partition : dict or np.ndarray
+       The partition of the nodes. The keys of the dictionary correspond to the nodes and the values to the communities.
+    adjacency : scipy.csr_matrix or np.ndarray
         The adjacency matrix of the graph (sparse or dense).
-    resolution: double, optional
-        The resolution parameter in the modularity function (default=1.).
+    resolution : float, optional (default=1.)
+        The resolution parameter in the modularity function.
 
     Returns
     -------
     modularity : float
-       The modularity.
+        The modularity.
     """
 
-    if type(adjacency_matrix) == sparse.csr_matrix:
-        adj_matrix = adjacency_matrix
-    elif sparse.isspmatrix(adjacency_matrix) or type(adjacency_matrix) == np.ndarray:
-        adj_matrix = sparse.csr_matrix(adjacency_matrix)
+    if type(adjacency) == sparse.csr_matrix:
+        adj_matrix = adjacency
+    elif sparse.isspmatrix(adjacency) or type(adjacency) == np.ndarray:
+        adj_matrix = sparse.csr_matrix(adjacency)
     else:
         raise TypeError(
             "The argument must be a NumPy array or a SciPy Sparse matrix.")
@@ -54,84 +58,131 @@ def modularity(adjacency_matrix, partition, resolution=1.0) -> float:
     data = np.ones(n_nodes)
     membership = sparse.csr_matrix((data, (row, col)))
 
-    pos = ((membership.multiply(norm_adj.dot(membership))).dot(np.ones(membership.shape[1]))).sum()
-    neg = np.linalg.norm(membership.T.dot(probs)) ** 2
-    return float(pos - resolution * neg)
+    fit = ((membership.multiply(norm_adj.dot(membership))).dot(np.ones(membership.shape[1]))).sum()
+    diversity = np.linalg.norm(membership.T.dot(probs)) ** 2
+    return float(fit - resolution * diversity)
+
 
+def bimodularity(biadjacency: sparse.csr_matrix, sample_labels: np.ndarray, feature_labels: np.ndarray,
+                 resolution: float = 1) -> float:
+    """
+    Modularity for bipartite graphs.
+
+    :math:`Q = \\sum_{ij}(\\dfrac{B_{ij}}{w} - \\gamma \\dfrac{d_if_j}{w^2})\\delta_{ij}`
+
+    Parameters
+    ----------
+    biadjacency:
+        matrix of shape n1 x n2
+    sample_labels:
+        partition of the samples, sample_labels[node] = cluster_index
+    feature_labels:
+        partition of the features, feature_labels[node] = cluster_index
+    resolution:
+        scaling for the second term of the bimodularity
 
-def cocitation_modularity(adjacency_matrix, partition, resolution=1.0) -> float:
+    Returns
+    -------
+    bimodularity: float
+        The bimodularity
+    """
+    n_samples, n_features = biadjacency.shape
+    one_samples, one_features = np.ones(n_samples), np.ones(n_features)
+    total_weight: float = biadjacency.data.sum()
+    sample_weights = biadjacency.dot(one_features) / total_weight
+    features_weights = biadjacency.T.dot(one_samples) / total_weight
+
+    _, sample_labels = np.unique(sample_labels, return_inverse=True)
+    _, feature_labels = np.unique(feature_labels, return_inverse=True)
+    n_clusters = max(len(set(sample_labels)), len(set(feature_labels)))
+
+    sample_membership = sparse.csr_matrix((one_samples, (np.arange(n_samples), sample_labels)),
+                                          shape=(n_samples, n_clusters))
+    feature_membership = sparse.csc_matrix((one_features, (np.arange(n_features), feature_labels)),
+                                           shape=(n_features, n_clusters))
+
+    fit: float = sample_membership.multiply(biadjacency.dot(feature_membership)).data.sum() / total_weight
+    div: float = (sample_membership.T.dot(sample_weights)).dot(feature_membership.T.dot(features_weights))
+
+    return fit - resolution * div
+
+
+def cocitation_modularity(adjacency, partition, resolution: float = 1) -> float:
     """
     Compute the modularity of a node partition on the normalized cocitation graph
     associated to a network without explicit computation of the cocitation graph.
+
+    :math:`Q = \\sum_{ij}(\\dfrac{(AF^{-1}A^T)_{ij}}{w} - \\gamma \\dfrac{d_id_j}{w^2})\\delta_{ij}`
+
     Parameters
     ----------
     partition: dict or np.ndarray
-       The partition of the nodes.
-       The keys of the dictionary correspond to the nodes and the values to the communities.
-    adjacency_matrix: scipy.csr_matrix or np.ndarray
+       The partition of the nodes. The keys of the dictionary correspond to the nodes and the values to the communities.
+    adjacency: scipy.csr_matrix or np.ndarray
         The adjacency matrix of the graph (sparse or dense).
-    resolution: double, optional
-        The resolution parameter in the modularity function (default=1.).
+    resolution: float (default=1.)
+        The resolution parameter in the modularity function.
 
     Returns
     -------
-    modularity : float
-       The modularity.
+    modularity: float
+       The modularity on the normalized cocitation graph.
     """
 
-    if type(adjacency_matrix) == sparse.csr_matrix:
-        adj_matrix = adjacency_matrix
-    elif sparse.isspmatrix(adjacency_matrix) or type(adjacency_matrix) == np.ndarray:
-        adj_matrix = sparse.csr_matrix(adjacency_matrix)
+    if type(adjacency) == sparse.csr_matrix:
+        adj_matrix = adjacency
+    elif sparse.isspmatrix(adjacency) or type(adjacency) == np.ndarray:
+        adj_matrix = sparse.csr_matrix(adjacency)
     else:
         raise TypeError(
             "The argument must be a NumPy array or a SciPy Sparse matrix.")
 
-    n_nodes = adj_matrix.shape[0]
-    out_degree = adj_matrix.dot(np.ones(adj_matrix.shape[0]))
-    in_degree = adj_matrix.T.dot(np.ones(adj_matrix.shape[1]))
-    total_weight = out_degree.sum()
+    n_samples, n_features = adj_matrix.shape
+    total_weight = adj_matrix.data.sum()
+    pou = adj_matrix.dot(np.ones(n_samples)) / total_weight
+    din = adj_matrix.T.dot(np.ones(n_features))
+
+    # pseudo inverse square-root in-degree matrix
+    dhin = sparse.diags(np.sqrt(din), shape=(n_features, n_features), format='csr')
+    dhin.data = 1 / dhin.data
 
-    with errstate(divide='ignore'):
-        in_degree_sqrt = 1.0 / sqrt(in_degree)
-    in_degree_sqrt[isinf(in_degree_sqrt)] = 0
-    in_degree_sqrt = sparse.spdiags(in_degree_sqrt, [0], adj_matrix.shape[1], adj_matrix.shape[1], format='csr')
-    normalized_adjacency = (adj_matrix.dot(in_degree_sqrt)).T
+    normalized_adjacency = (adj_matrix.dot(dhin)).T.tocsr()
 
     if type(partition) == dict:
-        labels = np.array([partition[i] for i in range(n_nodes)])
+        labels = np.array([partition[i] for i in range(n_samples)])
     elif type(partition) == np.ndarray:
         labels = partition.copy()
     else:
         raise TypeError('The partition must be a dictionary or a NumPy array.')
 
-    clusters_indices = set(labels.tolist())
-    mod = 0.
-    for cluster in clusters_indices:
-        indicator_vector = (labels == cluster)
-        mod += np.linalg.norm(normalized_adjacency.dot(indicator_vector)) ** 2
-        mod -= (resolution / total_weight) * (np.dot(out_degree, indicator_vector)) ** 2
+    membership = sparse.csc_matrix((np.ones(n_samples), (np.arange(n_samples), labels)),
+                                   shape=(n_samples, labels.max() + 1))
+    fit = ((normalized_adjacency.dot(membership)).data ** 2).sum() / total_weight
+    diversity = np.linalg.norm(membership.T.dot(pou)) ** 2
 
-    return float(mod / total_weight)
+    return float(fit - resolution * diversity)
 
 
-def performance(adjacency_matrix: sparse.csr_matrix, labels: np.ndarray) -> float:
-    """
-    The performance is the ratio of the total number of intra-cluster edges plus the total number of inter-cluster
+def performance(adjacency: sparse.csr_matrix, labels: np.ndarray) -> float:
+    """The performance is the ratio of the total number of intra-cluster edges plus the total number of inter-cluster
     non-edges with the number of potential edges in the graph.
+
     Parameters
     ----------
-    adjacency_matrix: the adjacency matrix of the graph
-    labels: the cluster indices, labels[node] = index of the cluster of node.
+    adjacency:
+        the adjacency matrix of the graph
+    labels:
+        the cluster indices, labels[node] = index of the cluster of node.
 
     Returns
     -------
-    the performance metric
+    : float
+        the performance metric
     """
-    n_nodes, m_nodes = adjacency_matrix.shape
+    n_nodes, m_nodes = adjacency.shape
     if n_nodes != m_nodes:
         raise ValueError('The adjacency matrix is not square.')
-    bool_adj = abs(adjacency_matrix.sign())
+    bool_adj = abs(adjacency.sign())
 
     clusters = set(labels.tolist())
     cluster_indicator = {cluster: (labels == cluster) for cluster in clusters}
@@ -148,34 +199,35 @@ def performance(adjacency_matrix: sparse.csr_matrix, labels: np.ndarray) -> floa
     return perf / n_nodes**2
 
 
-def cocitation_performance(adjacency_matrix: sparse.csr_matrix, labels: np.ndarray) -> float:
-    """
-    The performance of the clustering on the normalized cocitation graph associated to the provided adjacency
+def cocitation_performance(adjacency: sparse.csr_matrix, labels: np.ndarray) -> float:
+    """The performance of the clustering on the normalized cocitation graph associated to the provided adjacency
     matrix without explicit computation of the graph.
+
     Parameters
     ----------
-    adjacency_matrix: the adjacency matrix of the graph
-    labels: the cluster indices, labels[node] = index of the cluster of node.
+    adjacency:
+        the adjacency matrix of the graph
+    labels:
+        the cluster indices, labels[node] = index of the cluster of node.
 
     Returns
     -------
-    The performance metric.
+    : float
+        The performance metric on the normalized cocitation graph.
     """
-    n_nodes, m_nodes = adjacency_matrix.shape
-    if n_nodes != m_nodes:
+    n_samples, n_features = adjacency.shape
+    if n_samples != n_features:
         raise ValueError('The adjacency matrix is not square.')
-    bool_adj = abs(adjacency_matrix.sign())
+    bool_adj = abs(adjacency.sign())
 
     clusters = set(labels.tolist())
     cluster_indicator = {cluster: (labels == cluster) for cluster in clusters}
     cluster_sizes = {cluster: cluster_indicator[cluster].sum() for cluster in clusters}
 
-    din = bool_adj.T.dot(np.ones(m_nodes))
-    with errstate(divide='ignore'):
-        din_sqrt = 1.0 / sqrt(din)
-    din_sqrt[isinf(din_sqrt)] = 0
+    din = bool_adj.T.dot(np.ones(n_features))
     # pseudo inverse square-root in-degree matrix
-    dhin = sparse.spdiags(din_sqrt, [0], m_nodes, m_nodes, format='csr')
+    dhin = sparse.diags(np.sqrt(din), shape=(n_features, n_features), format='csr')
+    dhin.data = 1 / dhin.data
 
     norm_backward_adj = (dhin.dot(bool_adj.T)).tocsr()
 
@@ -188,4 +240,4 @@ def cocitation_performance(adjacency_matrix: sparse.csr_matrix, labels: np.ndarr
         perf -= 2 * cluster_indicator[cluster_i].dot(
             bool_adj.dot(norm_backward_adj.dot(cluster_indicator[cluster_j])))
 
-    return perf / n_nodes**2
+    return perf / n_samples**2
diff --git a/sknetwork/clustering/test/test_bilouvain.py b/sknetwork/clustering/test/test_bilouvain.py
new file mode 100644
index 000000000..d31872cb1
--- /dev/null
+++ b/sknetwork/clustering/test/test_bilouvain.py
@@ -0,0 +1,22 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""tests for bilouvain.py"""
+
+
+import unittest
+from sknetwork.clustering import *
+from sknetwork.toy_graphs import *
+
+
+class TestBiLouvainClustering(unittest.TestCase):
+
+    def setUp(self):
+        self.bilouvain = BiLouvain()
+        self.bilouvain_high_resolution = BiLouvain(resolution=2)
+        self.star_wars_graph = star_wars_villains_graph()
+
+    def test_star_wars_graph(self):
+        labels = self.bilouvain.fit(self.star_wars_graph).sample_labels_
+        self.assertEqual(labels.shape, (4,))
+        labels = self.bilouvain_high_resolution.fit(self.star_wars_graph).sample_labels_
+        self.assertEqual(labels.shape, (4,))
diff --git a/sknetwork/clustering/test/test_louvain.py b/sknetwork/clustering/test/test_louvain.py
index 08b941fe3..4965bf261 100644
--- a/sknetwork/clustering/test/test_louvain.py
+++ b/sknetwork/clustering/test/test_louvain.py
@@ -1,31 +1,40 @@
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-
-# test for louvain.py
-#
-# Copyright 2018 Scikit-network Developers.
-# Copyright 2018 Quentin Lutz <qlutz@enst.fr>
-#
-# This file is part of Scikit-network.
+"""tests for louvain.py"""
+
 
 import unittest
-from sknetwork.clustering.louvain import Louvain
+from sknetwork.clustering import *
+from sknetwork.toy_graphs import *
 from scipy.sparse import identity
 
 
 class TestLouvainClustering(unittest.TestCase):
 
     def setUp(self):
-        self.louvain_basic = Louvain()
+        self.louvain = Louvain()
+        self.louvain_high_resolution = Louvain(resolution=2)
+        self.karate_club_graph = karate_club_graph()
 
     def test_unknown_types(self):
         with self.assertRaises(TypeError):
-            self.louvain_basic.fit(identity(1))
+            self.louvain.fit(identity(1))
 
         with self.assertRaises(TypeError):
-            self.louvain_basic.fit(identity(2, format='csr'), node_weights=1)
+            self.louvain.fit(identity(2, format='csr'), node_weights=1)
 
     def test_unknown_options(self):
         with self.assertRaises(ValueError):
-            self.louvain_basic.fit(identity(2, format='csr'), node_weights='unknown')
+            self.louvain.fit(identity(2, format='csr'), node_weights='unknown')
 
     def test_single_node_graph(self):
-        self.assertEqual(self.louvain_basic.fit(identity(1, format='csr')).labels_, [0])
+        self.assertEqual(self.louvain.fit(identity(1, format='csr')).labels_, [0])
+
+    def test_karate_club_graph(self):
+        labels = self.louvain.fit(self.karate_club_graph).labels_
+        self.assertEqual(labels.shape, (34,))
+        self.assertAlmostEqual(modularity(self.karate_club_graph, labels), 0.42, 2)
+        labels = self.louvain_high_resolution.fit(self.karate_club_graph).labels_
+        self.assertEqual(labels.shape, (34,))
+        self.assertAlmostEqual(modularity(self.karate_club_graph, labels), 0.34, 2)
+
diff --git a/sknetwork/clustering/test/test_metrics.py b/sknetwork/clustering/test/test_metrics.py
index 686fc5564..d1f643b47 100644
--- a/sknetwork/clustering/test/test_metrics.py
+++ b/sknetwork/clustering/test/test_metrics.py
@@ -1,15 +1,10 @@
 # -*- coding: utf-8 -*-
 # test for metrics.py
-#
-# Copyright 2018 Scikit-network Developers.
-# Copyright 2018 Nathan de Lara <ndelara@enst.fr>
-#
-# This file is part of Scikit-network.
+""""tests for clustering metrics"""
 
 import unittest
-import numpy as np
-from scipy import sparse
-from sknetwork.clustering.metrics import modularity, cocitation_modularity, performance, cocitation_performance
+from sknetwork.clustering import *
+from sknetwork.toy_graphs import *
 
 
 class TestClusteringMetrics(unittest.TestCase):
@@ -19,6 +14,7 @@ def setUp(self):
                                                  [1, 0, 0, 0],
                                                  [1, 0, 0, 1],
                                                  [1, 0, 1, 0]]))
+        self.star_wars_graph = star_wars_villains_graph()
         self.labels = np.array([0, 1, 0, 0])
         self.unique_cluster = np.zeros(4, dtype=int)
 
@@ -26,6 +22,9 @@ def test_modularity(self):
         self.assertAlmostEqual(modularity(self.graph, self.labels), -0.0312, 3)
         self.assertAlmostEqual(modularity(self.graph, self.unique_cluster), 0.)
 
+    def test_bimodularity(self):
+        self.assertAlmostEqual(bimodularity(self.graph, self.unique_cluster, self.unique_cluster), 0.)
+
     def test_cocitation_modularity(self):
         self.assertAlmostEqual(cocitation_modularity(self.graph, self.labels), 0.0521, 3)
         self.assertAlmostEqual(cocitation_modularity(self.graph, self.unique_cluster), 0.)
diff --git a/sknetwork/embedding/__init__.py b/sknetwork/embedding/__init__.py
index e69de29bb..617853707 100644
--- a/sknetwork/embedding/__init__.py
+++ b/sknetwork/embedding/__init__.py
@@ -0,0 +1,4 @@
+from sknetwork.embedding.gsvd import *
+from sknetwork.embedding.spectral import *
+from sknetwork.embedding.metrics import *
+from sknetwork.embedding.randomized_matrix_factorization import *
diff --git a/sknetwork/embedding/gsvd.py b/sknetwork/embedding/gsvd.py
new file mode 100755
index 000000000..32990bc97
--- /dev/null
+++ b/sknetwork/embedding/gsvd.py
@@ -0,0 +1,103 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Thu May 31 17:16:22 2018
+@author: Nathan de Lara <ndelara@enst.fr>
+"""
+
+import numpy as np
+
+from sknetwork.embedding.randomized_matrix_factorization import randomized_svd
+from scipy import sparse, linalg
+from typing import Union
+
+
+class GSVDEmbedding:
+    """Generalized Singular Value Decomposition for non-linear dimensionality reduction.
+
+    Parameters
+    -----------
+    embedding_dimension: int, optional
+        The dimension of the projected subspace (default=2).
+
+    Attributes
+    ----------
+    embedding_ : array, shape = (n_samples, embedding_dimension)
+        Embedding of the samples (rows of the training matrix)
+    features_ : array, shape = (n_features, embedding_dimension)
+        Embedding of the features (columns of the training matrix)
+    singular_values_ : array, shape = (embedding_dimension)
+        Singular values of the training matrix
+
+    References
+    ----------
+    - Bonald, De Lara. "Interpretable Graph Embedding using Generalized SVD."
+    """
+
+    def __init__(self, embedding_dimension=2):
+        self.embedding_dimension = embedding_dimension
+        self.embedding_ = None
+        self.features_ = None
+        self.singular_values_ = None
+
+    def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray], randomized_decomposition: bool = True,
+            n_iter='auto', power_iteration_normalizer: Union[str, None] = 'auto', random_state=None) -> 'GSVDEmbedding':
+        """Fits the model from data in adjacency_matrix.
+
+        Parameters
+        ----------
+        adjacency: array-like, shape = (n, m)
+            Adjacency matrix, where n = m is the number of nodes for a standard directed or undirected graph,
+            n is the cardinal of V1 and m is the cardinal of V2 for a bipartite graph.
+        randomized_decomposition:
+            whether to use a randomized (and faster) svd method or the standard scipy one.
+        n_iter: int or ``'auto'`` (default is ``'auto'``)
+            See :meth:`sknetwork.embedding.randomized_matrix_factorization.randomized_range_finder`
+        power_iteration_normalizer: ``'auto'`` (default), ``'QR'``, ``'LU'``, ``None``
+            See :meth:`sknetwork.embedding.randomized_matrix_factorization.randomized_range_finder`
+        random_state: int, RandomState instance or ``None``, optional (default= ``None``)
+            See :meth:`sknetwork.embedding.randomized_matrix_factorization.randomized_range_finder`
+
+        Returns
+        -------
+        self: :class:`GSVDEmbedding`
+        """
+        if type(adjacency) == sparse.csr_matrix:
+            adjacency = adjacency
+        elif type(adjacency) == np.ndarray:
+            adjacency = sparse.csr_matrix(adjacency)
+        else:
+            raise TypeError(
+                "The argument must be a NumPy array or a SciPy Compressed Sparse Row matrix.")
+        n_nodes, m_nodes = adjacency.shape
+        total_weight = adjacency.data.sum()
+        # out-degree vector
+        dou = adjacency.dot(np.ones(m_nodes))
+        # in-degree vector
+        din = adjacency.T.dot(np.ones(n_nodes))
+
+        # pseudo inverse square-root out-degree matrix
+        dhou = sparse.diags(np.sqrt(dou), shape=(n_nodes, n_nodes), format='csr')
+        dhou.data = 1 / dhou.data
+        # pseudo inverse square-root in-degree matrix
+        dhin = sparse.diags(np.sqrt(din), shape=(m_nodes, m_nodes), format='csr')
+        dhin.data = 1 / dhin.data
+
+        laplacian = dhou.dot(adjacency.dot(dhin))
+
+        if randomized_decomposition:
+            u, sigma, vt = randomized_svd(laplacian, self.embedding_dimension,
+                                          n_iter=n_iter,
+                                          power_iteration_normalizer=power_iteration_normalizer,
+                                          random_state=random_state)
+        else:
+            u, sigma, vt = linalg.svds(laplacian, self.embedding_dimension)
+
+        self.singular_values_ = sigma
+        self.embedding_ = np.sqrt(total_weight) * dhou.dot(u) * sigma
+        self.features_ = np.sqrt(total_weight) * dhin.dot(vt.T)
+        # shift the center of mass
+        self.embedding_ -= np.ones((n_nodes, 1)).dot(self.embedding_.T.dot(dou)[:, np.newaxis].T) / total_weight
+        self.features_ -= np.ones((m_nodes, 1)).dot(self.features_.T.dot(din)[:, np.newaxis].T) / total_weight
+
+        return self
diff --git a/sknetwork/embedding/metrics.py b/sknetwork/embedding/metrics.py
index 831b26488..bb38948af 100644
--- a/sknetwork/embedding/metrics.py
+++ b/sknetwork/embedding/metrics.py
@@ -1,6 +1,5 @@
 #!/usr/bin/env python3
-# coding: utf-8
-
+# -*- coding: utf-8 -*-
 """
 Created on Nov 6 2018
 
@@ -13,23 +12,95 @@
 import numpy as np
 
 from scipy import sparse
+from scipy.stats import hmean
 
 
-def cosine_modularity(adjacency_matrix, embedding: np.ndarray, return_all: bool=False):
+def dot_modularity(adjacency_matrix, embedding: np.ndarray, features=None, resolution=1., weights='degree',
+                   return_all: bool=False):
     """
-    Computes the difference of the weighted average cosine similarity between pairs of neighbors in the graph
-    and pairs of nodes in the graph.
+    Difference of the weighted average dot product between embeddings of pairs of neighbors in the graph
+    (fit term) and pairs of nodes in the graph (diversity term).
+
+    :math:r`Q = \\sum_{ij}(\\dfrac{A_{ij}}{w} - \\gamma \\dfrac{d_id_j}{w^2})x_i^Tx_j`
+
+    This metric is normalized to lie between -1 and 1.
+    If the embeddings are normalized, this reduces to the cosine modularity.
+
     Parameters
     ----------
     adjacency_matrix: sparse.csr_matrix or np.ndarray
-    the adjacency matrix of the graph
+        the adjacency matrix of the graph
     embedding: np.ndarray
-    the embedding to evaluate, embedding[i] must represent the embedding of node i
-    return_all: whether to return both means besides their difference, or only the difference
+        the embedding to evaluate, embedding[i] must represent the embedding of node i
+    features: None or np.ndarray
+        For bipartite graphs, features should be the embedding of the second part
+    resolution: float
+        scaling for first-order approximation
+    weights: ``'degree'`` or ``'uniform'``
+        prior distribution on the nodes
+
+    return_all: bool (default = ``False``)
+        whether to return (fit, diversity) or fit - diversity
 
     Returns
     -------
-    the difference of means or (the difference of means, mean_neighbors, mean_pairs)
+    dot_modularity: a float or a tuple of floats.
+    """
+    if type(adjacency_matrix) == sparse.csr_matrix:
+        adj_matrix = adjacency_matrix
+    elif sparse.isspmatrix(adjacency_matrix) or type(adjacency_matrix) == np.ndarray:
+        adj_matrix = sparse.csr_matrix(adjacency_matrix)
+    else:
+        raise TypeError(
+            "The argument must be a NumPy array or a SciPy Sparse matrix.")
+    n_nodes, m_nodes = adj_matrix.shape
+    total_weight: float = adjacency_matrix.data.sum()
+
+    if features is None:
+        if n_nodes != m_nodes:
+            raise ValueError('feature cannot be None for non-square adjacency matrices.')
+        else:
+            normalization = np.linalg.norm(embedding) ** 2 / np.sqrt(n_nodes * m_nodes)
+            features = embedding
+    else:
+        normalization = np.linalg.norm(embedding.dot(features.T)) / np.sqrt(n_nodes * m_nodes)
+
+    if weights == 'degree':
+        wou = adj_matrix.dot(np.ones(m_nodes)) / total_weight
+        win = adj_matrix.T.dot(np.ones(n_nodes)) / total_weight
+    elif weights == 'uniform':
+        wou = np.ones(n_nodes) / n_nodes
+        win = np.ones(m_nodes) / m_nodes
+    else:
+        raise ValueError('weights must be degree or uniform.')
+
+    fit = (np.multiply(embedding, adjacency_matrix.dot(features))).sum() / (total_weight * normalization)
+    diversity = (embedding.T.dot(wou)).dot(features.T.dot(win)) / normalization
+
+    if return_all:
+        return fit, resolution * diversity
+    else:
+        return fit - resolution * diversity
+
+
+def hscore(adjacency_matrix, embedding: np.ndarray, order='second', return_all: bool=False):
+    """Harmonic mean of fit and diversity with respect to first or second order node similarity.
+
+    Parameters
+    ----------
+    adjacency_matrix: sparse.csr_matrix or np.ndarray
+        the adjacency matrix of the graph
+    embedding: np.ndarray
+        the embedding to evaluate, embedding[i] must represent the embedding of node i
+    order: \'first\' or \'second\'.
+        The order of the node similarity metric to use. First-order corresponds to edges weights while second-order
+        corresponds to the weights of the edges in the normalized cocitation graph.
+    return_all: bool (default = ``False``)
+        whether to return (fit, diversity) or hmean(fit, diversity)
+
+    Returns
+    -------
+    hscore: a float or a tuple of floats.
 
     """
     if type(adjacency_matrix) == sparse.csr_matrix:
@@ -40,19 +111,33 @@ def cosine_modularity(adjacency_matrix, embedding: np.ndarray, return_all: bool=
         raise TypeError(
             "The argument must be a NumPy array or a SciPy Sparse matrix.")
     n_nodes, m_nodes = adj_matrix.shape
-    if n_nodes != m_nodes:
-        raise ValueError("The adjacency matrix must be a square matrix.")
-    if (adj_matrix != adj_matrix.maximum(adj_matrix.T)).nnz != 0:
-        raise ValueError("The adjacency matrix is not symmetric.")
+    if order == 'first' and (n_nodes != m_nodes):
+        raise ValueError('For fist order similarity, the adjacency matrix must be square.')
+    total_weight = adj_matrix.data.sum()
+    # out-degree vector
+    dou = adj_matrix.dot(np.ones(m_nodes))
+    # in-degree vector
+    din = adj_matrix.T.dot(np.ones(n_nodes))
 
-    total_weight: float = adjacency_matrix.data.sum()
-    normalized_embedding = embedding / embedding.sum(axis=1)[:, np.newaxis]
+    pdhou, pdhin = np.zeros(n_nodes), np.zeros(m_nodes)
+    pdhou[dou.nonzero()] = 1 / np.sqrt(dou[dou.nonzero()])
+    pdhin[din.nonzero()] = 1 / np.sqrt(din[din.nonzero()])
 
-    neighbors_mean = (np.multiply(normalized_embedding, adjacency_matrix.dot(normalized_embedding))).sum()
-    neighbors_mean /= total_weight
-    pairs_mean = np.linalg.norm(normalized_embedding.mean(axis=0))**2
+    normalization = np.linalg.norm(embedding.T * np.sqrt(dou)) ** 2
+    if order == 'first':
+        fit = (np.multiply(embedding, adjacency_matrix.dot(embedding))).sum()
+        fit /= total_weight * (np.linalg.norm(embedding) ** 2 / n_nodes)
+    elif order == 'second':
+        fit = np.linalg.norm(adj_matrix.T.dot(embedding).T * pdhin) ** 2 / normalization
+    else:
+        raise ValueError('The similarity order should be \'first\' or \'second\'.')
+    diversity = (np.linalg.norm(embedding.T.dot(dou))) ** 2 / total_weight
+    diversity = 1 - diversity / normalization
 
     if return_all:
-        return neighbors_mean - pairs_mean, neighbors_mean, pairs_mean
+        return fit, diversity
     else:
-        return neighbors_mean - pairs_mean
+        if np.isclose(fit, 0.) or np.isclose(diversity, 0.):
+            return 0.
+        else:
+            return hmean([fit, diversity])
diff --git a/sknetwork/embedding/randomized_matrix_factorization.py b/sknetwork/embedding/randomized_matrix_factorization.py
index 06c1d64c6..6f2427744 100644
--- a/sknetwork/embedding/randomized_matrix_factorization.py
+++ b/sknetwork/embedding/randomized_matrix_factorization.py
@@ -2,30 +2,30 @@
 # -*- coding: utf-8 -*-
 """
 Created on Oct 12 2018
-
 @author: Nathan de Lara <ndelara@enst.fr>
-
-Part of this code was adapted from the scikit-learn project.
+Part of this code was adapted from the scikit-learn project: https://scikit-learn.org/stable/.
 """
 
 import numpy as np
 from numpy.random import mtrand
 from scipy import sparse, linalg
 from scipy.sparse import linalg as splinalg
+from typing import Union
 
 
 def safe_sparse_dot(a, b, dense_output=False):
     """
-    Dot product that handle the sparse matrix case correctly
-    Uses BLAS GEMM as replacement for numpy.dot where possible
+    Dot product that handle the sparse matrix case correctly. Uses BLAS GEMM as replacement for numpy.dot where possible
     to avoid unnecessary copies.
+
     Parameters
     ----------
-    a : array or sparse matrix
-    b : array or sparse matrix
-    dense_output : boolean, default False
-        When False, either ``a`` or ``b`` being sparse will yield sparse
-        output. When True, output will always be an array.
+    a: array or sparse matrix
+    b: array or sparse matrix
+    dense_output: bool (default= ``False``)
+        When False, either ``a`` or ``b`` being sparse will yield sparse output.
+        When True, output will always be an array.
+
     Returns
     -------
     dot_product : array or sparse matrix
@@ -42,6 +42,7 @@ def safe_sparse_dot(a, b, dense_output=False):
 
 def check_random_state(seed):
     """Turn seed into a np.random.RandomState instance
+
     Parameters
     ----------
     seed : None | int | instance of RandomState
@@ -51,6 +52,7 @@ def check_random_state(seed):
         Otherwise raise ValueError.
     """
     if seed is None or seed is np.random:
+        # noinspection PyProtectedMember
         return mtrand._rand
     if isinstance(seed, np.integer) or isinstance(seed, int):
         return np.random.RandomState(seed)
@@ -63,44 +65,46 @@ def check_random_state(seed):
 def randomized_range_finder(matrix, size, n_iter, power_iteration_normalizer='auto', random_state=None,
                             return_all: bool = False) -> [np.ndarray, tuple]:
     """Computes an orthonormal matrix whose range approximates the range of A.
+
     Parameters
     ----------
     matrix : two dimensional array
         The input data matrix
     size : integer
         Size of the return array
-    n_iter : integer
-        Number of power iterations used to stabilize the result
-    power_iteration_normalizer : 'auto' (default), 'QR', 'LU', 'none'
-        Whether the power iterations are normalized with step-by-step
-        QR factorization (the slowest but most accurate), 'none'
-        (the fastest but numerically unstable when `n_iter` is large, e.g.
-        typically 5 or larger), or 'LU' factorization (numerically stable
-        but can lose slightly in accuracy). The 'auto' mode applies no
-        normalization if `n_iter`<=2 and switches to LU otherwise.
-        .. versionadded:: 0.18
-    random_state : int, RandomState instance or None, optional (default=None)
+    n_iter : int
+        Number of power iterations. It can be used to deal with very noisy
+        problems. When 'auto', it is set to 4, unless ``size`` is small
+        (< .1 * min(matrix.shape)) in which case ``n_iter`` is set to 7.
+        This improves precision with few components.
+    power_iteration_normalizer: ``'auto'`` (default), ``'QR'``, ``'LU'``, ``None``
+            Whether the power iterations are normalized with step-by-step
+            QR factorization (the slowest but most accurate), ``None``
+            (the fastest but numerically unstable when ``n_iter`` is large, e.g.
+            typically 5 or larger), or ``'LU'`` factorization (numerically stable
+            but can lose slightly in accuracy). The ``'auto'`` mode applies no
+            normalization if ``n_iter`` <= 2 and switches to ``'LU'`` otherwise.
+    random_state: int, RandomState instance or ``None``, optional (default= ``None``)
         The seed of the pseudo random number generator to use when shuffling
         the data.  If int, random_state is the seed used by the random number
         generator; If RandomState instance, random_state is the random number
-        generator; If None, the random number generator is the RandomState
+        generator; If ``None``, the random number generator is the RandomState
         instance used by `np.random`.
     return_all : if True, returns (range_matrix, random_matrix, random_proj)
                 else returns range_matrix.
+
     Returns
     -------
     range_matrix : 2D array
         matrix (size x size) projection matrix, the range of which
         approximates well the range of the input matrix.
+
     Notes
     -----
     Follows Algorithm 4.3 of
-    Finding structure with randomness: Stochastic algorithms for constructing
-    approximate matrix decompositions
+
+    Finding structure with randomness: Stochastic algorithms for constructing approximate matrix decompositions
     Halko, et al., 2009 (arXiv:909) http://arxiv.org/pdf/0909.4061
-    An implementation of a randomized algorithm for principal component
-    analysis
-    A. Szlam et al. 2014
     """
     random_state = check_random_state(random_state)
 
@@ -149,19 +153,19 @@ def svd_flip(u, v, u_based_decision=True):
     """Sign correction to ensure deterministic output from SVD.
     Adjusts the columns of u and the rows of v such that the loadings in the
     columns in u that are largest in absolute value are always positive.
+
     Parameters
     ----------
-    u, v : ndarray
-        u and v are the output of `linalg.svd` or
-        `sklearn.utils.extmath.randomized_svd`, with matching inner dimensions
-        so one can compute `np.dot(u * s, v)`.
-    u_based_decision : boolean, (default=True)
+    u, v: np.ndarray
+        u and v are the output of the svd, with matching inner dimensions so one can compute `np.dot(u * s, v)`.
+    u_based_decision : bool, (default=True)
         If True, use the columns of u as the basis for sign flipping.
-        Otherwise, use the rows of v. The choice of which variable to base the
-        decision on is generally algorithm dependent.
+        Otherwise, use the rows of v.
+        The choice of which variable to base the decision on is generally algorithm dependent.
+
     Returns
     -------
-    u_adjusted, v_adjusted : arrays with the same dimensions as the input.
+    u_adjusted, v_adjusted: arrays with the same dimensions as the input.
     """
     if u_based_decision:
         # columns of u, rows of v
@@ -180,65 +184,53 @@ def svd_flip(u, v, u_based_decision=True):
 
 
 def randomized_svd(matrix, n_components: int, n_oversamples: int = 10, n_iter='auto', transpose='auto',
-                   power_iteration_normalizer='auto', flip_sign: bool = True, random_state=0):
+                   power_iteration_normalizer: Union[str, None] = 'auto', flip_sign: bool = True, random_state=0):
     """Computes a truncated randomized SVD
+
         Parameters
         ----------
         matrix : ndarray or sparse matrix
             Matrix to decompose
         n_components : int
             Number of singular values and vectors to extract.
-        n_oversamples : int (default is 10)
+        n_oversamples : int (default=10)
             Additional number of random vectors to sample the range of M so as
             to ensure proper conditioning. The total number of random vectors
             used to find the range of M is embedding_dimension + n_oversamples. Smaller
             number can improve speed but can negatively impact the quality of
             approximation of singular vectors and singular values.
         n_iter : int or 'auto' (default is 'auto')
-            Number of power iterations. It can be used to deal with very noisy
-            problems. When 'auto', it is set to 4, unless `embedding_dimension` is small
-            (< .1 * min(X.shape)) `n_iter` in which case is set to 7.
-            This improves precision with few components.
-            .. versionchanged:: 0.18
-        power_iteration_normalizer : 'auto' (default), 'QR', 'LU', 'none'
-            Whether the power iterations are normalized with step-by-step
-            QR factorization (the slowest but most accurate), 'none'
-            (the fastest but numerically unstable when `n_iter` is large, e.g.
-            typically 5 or larger), or 'LU' factorization (numerically stable
-            but can lose slightly in accuracy). The 'auto' mode applies no
-            normalization if `n_iter`<=2 and switches to LU otherwise.
-            .. versionadded:: 0.18
+            See :meth:`randomized_range_finder`
+        power_iteration_normalizer : ``'auto'`` (default), ``'QR'``, ``'LU'``, ``None``
+            See :meth:`randomized_range_finder`
         transpose : True, False or 'auto' (default)
-            Whether the algorithm should be applied to M.T instead of M. The
+            Whether the algorithm should be applied to ``matrix.T`` instead of ``matrix``. The
             result should approximately be the same. The 'auto' mode will
-            trigger the transposition if M.shape[1] > M.shape[0] since this
-            implementation of randomized SVD tend to be a little faster in that
-            case.
-            .. versionchanged:: 0.18
-        flip_sign : boolean, (True by default)
+            trigger the transposition if ``matrix.shape[1] > matrix.shape[0]`` since this
+            implementation of randomized SVD tends to be a little faster in that case.
+        flip_sign : boolean, (default=True)
             The output of a singular value decomposition is only unique up to a
             permutation of the signs of the singular vectors. If `flip_sign` is
             set to `True`, the sign ambiguity is resolved by making the largest
             loadings for each component in the left singular vectors positive.
         random_state : int, RandomState instance or None, optional (default=None)
-            The seed of the pseudo random number generator to use when shuffling
-            the data.  If int, random_state is the seed used by the random number
-            generator; If RandomState instance, random_state is the random number
-            generator; If None, the random number generator is the RandomState
-            instance used by `np.random`.
+            See :meth:`randomized_range_finder`
+
         Notes
         -----
         This algorithm finds a (usually very good) approximate truncated
         singular value decomposition using randomization to speed up the
         computations. It is particularly fast on large matrices on which
         you wish to extract only a small number of components. In order to
-        obtain further speed up, `n_iter` can be set <=2 (at the cost of
+        obtain further speed up, ``n_iter`` can be set <=2 (at the cost of
         loss of precision).
+
         References
         ----------
         * Finding structure with randomness: Stochastic algorithms for constructing
           approximate matrix decompositions
           Halko, et al., 2009 http://arxiv.org/abs/arXiv:0909.4061
+          (algorithm 5.1)
         * A randomized algorithm for the decomposition of matrices
           Per-Gunnar Martinsson, Vladimir Rokhlin and Mark Tygert
         * An implementation of a randomized algorithm for principal component
@@ -294,46 +286,40 @@ def randomized_svd(matrix, n_components: int, n_oversamples: int = 10, n_iter='a
 
 
 def randomized_eig(matrix, n_components: int, which='LM', n_oversamples: int = 10, n_iter='auto',
-                   power_iteration_normalizer='auto', random_state=0):
+                   power_iteration_normalizer: Union[str, None] = 'auto', random_state=0, one_pass: bool = False):
     """Computes a randomized eigenvalue decomposition.
+
     Parameters
     ----------
     matrix: ndarray or sparse matrix
-            Matrix to decompose
+        Matrix to decompose
     n_components: int
-            Number of singular values and vectors to extract.
-    which: which eigenvalues to compute. 'SM' for Smallest Magnitude,
-            any other entry will result in Smallest Magnitude.
-    n_oversamples : int (default is 10)
-            Additional number of random vectors to sample the range of M so as
-            to ensure proper conditioning. The total number of random vectors
-            used to find the range of M is embedding_dimension + n_oversamples. Smaller
-            number can improve speed but can negatively impact the quality of
-            approximation of singular vectors and singular values.
+        Number of singular values and vectors to extract.
+    which: str
+        which eigenvalues to compute. ``'LM'`` for Largest Magnitude and ``'SM'`` for Smallest Magnitude.
+        Any other entry will result in Largest Magnitude.
+    n_oversamples : int (default=10)
+        Additional number of random vectors to sample the range of ``matrix`` so as
+        to ensure proper conditioning. The total number of random vectors
+        used to find the range of ``matrix`` is ``n_components + n_oversamples``. Smaller number can improve speed
+        but can negatively impact the quality of approximation of singular vectors and singular values.
     n_iter: int or 'auto' (default is 'auto')
-            Number of power iterations. It can be used to deal with very noisy
-            problems. When 'auto', it is set to 4, unless `embedding_dimension` is small
-            (< .1 * min(X.shape)) `n_iter` in which case is set to 7.
-            This improves precision with few components.
-            .. versionchanged:: 0.18
-    power_iteration_normalizer: 'auto' (default), 'QR', 'LU', 'none'
-            Whether the power iterations are normalized with step-by-step
-            QR factorization (the slowest but most accurate), 'none'
-            (the fastest but numerically unstable when `n_iter` is large, e.g.
-            typically 5 or larger), or 'LU' factorization (numerically stable
-            but can lose slightly in accuracy). The 'auto' mode applies no
-            normalization if `n_iter`<=2 and switches to LU otherwise.
-            .. versionadded:: 0.18
+        See :meth:`randomized_range_finder`
+    power_iteration_normalizer: ``'auto'`` (default), ``'QR'``, ``'LU'``, ``None``
+        See :meth:`randomized_range_finder`
     random_state: int, RandomState instance or None, optional (default=None)
-            The seed of the pseudo random number generator to use when shuffling
-            the data.  If int, random_state is the seed used by the random number
-            generator; If RandomState instance, random_state is the random number
-            generator; If None, the random number generator is the RandomState
-            instance used by `np.random`.
+        See :meth:`randomized_range_finder`
+    one_pass: bool (default=False)
+        whether to use algorithm 5.6 instead of 5.3. 5.6 requires less access to the original matrix,
+        while 5.3 is more accurate.
 
     Returns
     -------
-
+    References
+    ----------
+    Finding structure with randomness: Stochastic algorithms for constructing
+    approximate matrix decompositions
+    Halko, et al., 2009 http://arxiv.org/abs/arXiv:0909.4061
     """
 
     random_state = check_random_state(random_state)
@@ -356,7 +342,10 @@ def randomized_eig(matrix, n_components: int, which='LM', n_oversamples: int = 1
 
     range_matrix, random_matrix, random_proj = randomized_range_finder(matrix, n_random, n_iter,
                                                                        power_iteration_normalizer, random_state, True)
-    approx_matrix = np.linalg.lstsq(random_matrix.T.dot(range_matrix), random_proj.T.dot(range_matrix), None)[0].T
+    if one_pass:
+        approx_matrix = np.linalg.lstsq(random_matrix.T.dot(range_matrix), random_proj.T.dot(range_matrix), None)[0].T
+    else:
+        approx_matrix = (matrix.dot(range_matrix)).T.dot(range_matrix)
 
     eigenvalues, eigenvectors = np.linalg.eig(approx_matrix)
 
diff --git a/sknetwork/embedding/spectral.py b/sknetwork/embedding/spectral.py
index 02e920291..df61bc1ac 100644
--- a/sknetwork/embedding/spectral.py
+++ b/sknetwork/embedding/spectral.py
@@ -1,36 +1,34 @@
 #!/usr/bin/env python3
 # coding: utf-8
-
 """
 Created on Thu Sep 13 2018
 
 Authors:
 Thomas Bonald <thomas.bonald@telecom-paristech.fr>
 Nathan De Lara <nathan.delara@telecom-paristech.fr>
-
-Spectral embedding by decomposition of the normalized graph Laplacian.
 """
 
 import numpy as np
 
 from sknetwork.embedding.randomized_matrix_factorization import randomized_eig
-from scipy import sparse, errstate, sqrt, isinf
+from scipy import sparse
 from scipy.sparse import csgraph
 from scipy.sparse.linalg import eigsh
+from typing import Union
 
 
 class SpectralEmbedding:
     """Weighted spectral embedding of a graph
 
-        Attributes
+        Parameters
         ----------
         embedding_dimension : int, optional
-            Dimension of the embedding space (default=100)
-        eigenvalue_normalization : bool, optional
-            Whether to normalize the embedding by the pseudo-inverse square roots of laplacian eigenvalues
-            (default=True)
-        node_weights : {'uniform', 'degree', array of length n_nodes with positive entries}, optional
+            Dimension of the embedding space
+        node_weights : {``'uniform'``, ``'degree'``, array of length n_nodes with positive entries}, optional
             Weights used for the normalization for the laplacian, :math:`W^{-1/2} L W^{-1/2}`
+
+        Attributes
+        ----------
         embedding_ : array, shape = (n_nodes, embedding_dimension)
             Embedding matrix of the nodes
         eigenvalues_ : array, shape = (embedding_dimension)
@@ -42,57 +40,58 @@ class SpectralEmbedding:
         * Laplacian Eigenmaps for Dimensionality Reduction and Data Representation, M. Belkin, P. Niyogi
         """
 
-    def __init__(self, embedding_dimension: int = 2, node_weights='degree', eigenvalue_normalization: bool = True):
+    def __init__(self, embedding_dimension: int = 2, node_weights='degree'):
         self.embedding_dimension = embedding_dimension
         self.node_weights = node_weights
-        self.eigenvalue_normalization = eigenvalue_normalization
         self.embedding_ = None
         self.eigenvalues_ = None
 
-    def fit(self, adjacency_matrix, node_weights=None, randomized_decomposition: bool = True):
+    def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray], node_weights=None,
+            randomized_decomposition: bool = True) -> 'SpectralEmbedding':
         """Fits the model from data in adjacency_matrix
 
         Parameters
         ----------
-        adjacency_matrix : Scipy csr matrix or numpy ndarray
+        adjacency : array-like, shape = (n, n)
               Adjacency matrix of the graph
-        randomized_decomposition: whether to use a randomized (and faster) decomposition method or
-            the standard scipy one.
-        node_weights : {'uniform', 'degree', array of length n_nodes with positive entries}
+        randomized_decomposition: bool (default=True)
+            whether to use a randomized (and faster) decomposition method or the standard scipy one.
+        node_weights : {``'uniform'``, ``'degree'``, array of length n_nodes with positive entries}
               Node weights
+
+        Returns
+        -------
+        self: :class:`SpectralEmbedding`
         """
 
-        if type(adjacency_matrix) == sparse.csr_matrix:
-            adj_matrix = adjacency_matrix
-        elif sparse.isspmatrix(adjacency_matrix) or type(adjacency_matrix) == np.ndarray:
-            adj_matrix = sparse.csr_matrix(adjacency_matrix)
+        if type(adjacency) == sparse.csr_matrix:
+            adjacency = adjacency
+        elif sparse.isspmatrix(adjacency) or type(adjacency) == np.ndarray:
+            adjacency = sparse.csr_matrix(adjacency)
         else:
             raise TypeError(
                 "The argument must be a NumPy array or a SciPy Sparse matrix.")
-        n_nodes, m_nodes = adj_matrix.shape
+        n_nodes, m_nodes = adjacency.shape
         if n_nodes != m_nodes:
             raise ValueError("The adjacency matrix must be a square matrix.")
-        if csgraph.connected_components(adj_matrix, directed=False)[0] > 1:
+        if csgraph.connected_components(adjacency, directed=False)[0] > 1:
             raise ValueError("The graph must be connected.")
-        if (adj_matrix != adj_matrix.maximum(adj_matrix.T)).nnz != 0:
+        if (adjacency != adjacency.maximum(adjacency.T)).nnz != 0:
             raise ValueError("The adjacency matrix is not symmetric.")
 
         # builds standard laplacian
-        degrees = adj_matrix.dot(np.ones(n_nodes))
+        degrees = adjacency.dot(np.ones(n_nodes))
         degree_matrix = sparse.diags(degrees, format='csr')
-        laplacian = degree_matrix - adj_matrix
+        laplacian = degree_matrix - adjacency
 
         # applies normalization by node weights
         if node_weights is None:
             node_weights = self.node_weights
         if type(node_weights) == str:
             if node_weights == 'uniform':
-                weight_matrix = sparse.identity(n_nodes)
+                weights = np.ones(n_nodes)
             elif node_weights == 'degree':
-                with errstate(divide='ignore'):
-                    degrees_inv_sqrt = 1.0 / sqrt(degrees)
-                degrees_inv_sqrt[isinf(degrees_inv_sqrt)] = 0
-                weight_matrix = sparse.diags(degrees_inv_sqrt, format='csr')
+                weights = degrees
             else:
                 raise ValueError('Unknown weighting policy. Try \'degree\' or \'uniform\'.')
         else:
@@ -101,11 +100,10 @@ def fit(self, adjacency_matrix, node_weights=None, randomized_decomposition: boo
             elif min(self.node_weights) < 0:
                 raise ValueError('node_weights must be positive.')
             else:
-                with errstate(divide='ignore'):
-                    weights_inv_sqrt = 1.0 / sqrt(self.node_weights)
-                weights_inv_sqrt[isinf(weights_inv_sqrt)] = 0
-                weight_matrix = sparse.diags(weights_inv_sqrt, format='csr')
+                weights = self.node_weights
 
+        weight_matrix = sparse.diags(np.sqrt(weights), format='csr')
+        weight_matrix.data = 1 / weight_matrix.data
         laplacian = weight_matrix.dot(laplacian.dot(weight_matrix))
 
         # spectral decomposition
@@ -116,10 +114,5 @@ def fit(self, adjacency_matrix, node_weights=None, randomized_decomposition: boo
             eigenvalues, eigenvectors = eigsh(laplacian, n_components, which='SM')
 
         self.eigenvalues_ = eigenvalues[1:]
-
         self.embedding_ = np.array(weight_matrix.dot(eigenvectors[:, 1:]))
-        if self.eigenvalue_normalization:
-            eigenvalues_inv_sqrt = 1.0 / sqrt(eigenvalues[1:])
-            self.embedding_ = eigenvalues_inv_sqrt * self.embedding_
-
         return self
diff --git a/sknetwork/embedding/svd.py b/sknetwork/embedding/svd.py
deleted file mode 100755
index fcefd6ed7..000000000
--- a/sknetwork/embedding/svd.py
+++ /dev/null
@@ -1,159 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Thu May 31 17:16:22 2018
-
-@author: Nathan de Lara <ndelara@enst.fr>
-"""
-
-import numpy as np
-
-from sknetwork.embedding.randomized_matrix_factorization import randomized_svd
-from scipy import sparse, errstate, sqrt, isinf, linalg
-
-
-def normalized_adjacency(adjacency_matrix):
-    """Normalized adjacency matrix :math:`(D_{in}^{+})^{1/2} A (D_{out}^{+})^{1/2}`.
-        Parameters
-        ----------
-        adjacency_matrix: sparse.csr_matrix or np.ndarray
-
-        Returns
-        -------
-        The normalized adjacency matrix.
-    """
-
-    if type(adjacency_matrix) == sparse.csr_matrix:
-        adj_matrix = adjacency_matrix
-    elif type(adjacency_matrix) == np.ndarray:
-        adj_matrix = sparse.csr_matrix(adjacency_matrix)
-    else:
-        raise TypeError(
-            "The argument must be a NumPy array or a SciPy Compressed Sparse Row matrix.")
-    n_nodes, m_nodes = adj_matrix.shape
-
-    # out-degree vector
-    dou = adj_matrix.dot(np.ones(n_nodes))
-    # in-degree vector
-    din = adj_matrix.T.dot(np.ones(m_nodes))
-
-    with errstate(divide='ignore'):
-        dou_sqrt = 1.0 / sqrt(dou)
-        din_sqrt = 1.0 / sqrt(din)
-    dou_sqrt[isinf(dou_sqrt)] = 0
-    din_sqrt[isinf(din_sqrt)] = 0
-    # pseudo inverse square-root out-degree matrix
-    dhou = sparse.spdiags(dou_sqrt, [0], n_nodes, n_nodes, format='csr')
-    # pseudo inverse square-root in-degree matrix
-    dhin = sparse.spdiags(din_sqrt, [0], m_nodes, m_nodes, format='csr')
-
-    return dhou.dot(adj_matrix.dot(dhin))
-
-
-class ForwardBackwardEmbedding:
-    """Forward and Backward embeddings for non-linear dimensionality reduction.
-
-    Parameters
-    -----------
-    embedding_dimension: int, optional
-        The dimension of the projected subspace (default=2).
-
-    Attributes
-    ----------
-    embedding_ : array, shape = (n_samples, embedding_dimension)
-        Forward embedding of the training matrix.
-    backward_embedding_ : array, shape = (n_samples, embedding_dimension)
-        Backward embedding of the training matrix.
-    singular_values_ : array, shape = (embedding_dimension)
-        Singular values of the training matrix
-
-    References
-    ----------
-    - Bonald, De Lara. "The Forward-Backward Embedding of Directed Graphs."
-    """
-
-    def __init__(self, embedding_dimension=2):
-        self.embedding_dimension = embedding_dimension
-        self.embedding_ = None
-        self.backward_embedding_ = None
-        self.singular_values_ = None
-
-    def fit(self, adjacency_matrix, randomized_decomposition: bool = True, tol=1e-6, n_iter='auto',
-            power_iteration_normalizer='auto', random_state=None):
-        """Fits the model from data in adjacency_matrix.
-
-        Parameters
-        ----------
-        adjacency_matrix: array-like, shape = (n, m)
-            Adjacency matrix, where n = m = |V| for a standard graph,
-            n = |V1|, m = |V2| for a bipartite graph.
-        randomized_decomposition: whether to use a randomized (and faster) svd method or
-            the standard scipy one.
-        tol: float, optional
-            Tolerance for pseudo-inverse of singular values (default=1e-6).
-        n_iter: int or 'auto' (default is 'auto')
-            Number of power iterations. It can be used to deal with very noisy
-            problems. When 'auto', it is set to 4, unless `embedding_dimension` is small
-            (< .1 * min(X.shape)) `n_iter` in which case is set to 7.
-            This improves precision with few components.
-        power_iteration_normalizer: 'auto' (default), 'QR', 'LU', 'none'
-            Whether the power iterations are normalized with step-by-step
-            QR factorization (the slowest but most accurate), 'none'
-            (the fastest but numerically unstable when `n_iter` is large, e.g.
-            typically 5 or larger), or 'LU' factorization (numerically stable
-            but can lose slightly in accuracy). The 'auto' mode applies no
-            normalization if `n_iter`<=2 and switches to LU otherwise.
-        random_state: int, RandomState instance or None, optional (default=None)
-            The seed of the pseudo random number generator to use when shuffling
-            the data.  If int, random_state is the seed used by the random number
-            generator; If RandomState instance, random_state is the random number
-            generator; If None, the random number generator is the RandomState
-            instance used by `np.random`.
-
-        Returns
-        -------
-        self
-
-        """
-        if type(adjacency_matrix) == sparse.csr_matrix:
-            adj_matrix = adjacency_matrix
-        elif type(adjacency_matrix) == np.ndarray:
-            adj_matrix = sparse.csr_matrix(adjacency_matrix)
-        else:
-            raise TypeError(
-                "The argument must be a NumPy array or a SciPy Compressed Sparse Row matrix.")
-        n_nodes, m_nodes = adj_matrix.shape
-
-        # out-degree vector
-        dou = adj_matrix.dot(np.ones(n_nodes))
-        # in-degree vector
-        din = adj_matrix.T.dot(np.ones(m_nodes))
-
-        with errstate(divide='ignore'):
-            dou_sqrt = 1.0 / sqrt(dou)
-            din_sqrt = 1.0 / sqrt(din)
-        dou_sqrt[isinf(dou_sqrt)] = 0
-        din_sqrt[isinf(din_sqrt)] = 0
-        # pseudo inverse square-root out-degree matrix
-        dhou = sparse.spdiags(dou_sqrt, [0], n_nodes, n_nodes, format='csr')
-        # pseudo inverse square-root in-degree matrix
-        dhin = sparse.spdiags(din_sqrt, [0], m_nodes, m_nodes, format='csr')
-
-        laplacian = dhou.dot(adj_matrix.dot(dhin))
-
-        if randomized_decomposition:
-            u, sigma, vt = randomized_svd(laplacian, self.embedding_dimension,
-                                          n_iter=n_iter,
-                                          power_iteration_normalizer=power_iteration_normalizer,
-                                          random_state=random_state)
-        else:
-            u, sigma, vt = linalg.svds(laplacian, self.embedding_dimension)
-
-        self.singular_values_ = sigma
-
-        gamma = 1 - sigma ** 2
-        gamma_sqrt = np.diag(np.piecewise(gamma, [gamma > tol, gamma <= tol], [lambda x: 1 / np.sqrt(x), 0]))
-        self.embedding_ = dhou.dot(u).dot(gamma_sqrt)
-        self.backward_embedding_ = dhin.dot(vt.T).dot(gamma_sqrt)
-
-        return self
diff --git a/sknetwork/embedding/tests/test_embeddings.py b/sknetwork/embedding/tests/test_embeddings.py
index c553c6dd9..699d29367 100644
--- a/sknetwork/embedding/tests/test_embeddings.py
+++ b/sknetwork/embedding/tests/test_embeddings.py
@@ -1,13 +1,9 @@
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-
-# test for metrics.py
-#
-# Copyright 2018 Scikit-network Developers.
-# Copyright 2018 Nathan de Lara <ndelara@enst.fr>
-#
-# This file is part of Scikit-network.
+"""tests for embeddings"""
 
 import unittest
-from sknetwork.embedding.svd import ForwardBackwardEmbedding
+from sknetwork.embedding.gsvd import GSVDEmbedding
 from sknetwork.embedding.spectral import SpectralEmbedding
 from scipy import sparse
 import numpy as np
@@ -20,13 +16,22 @@ def setUp(self):
                                                  [1, 0, 0, 0],
                                                  [1, 0, 0, 1],
                                                  [1, 0, 1, 0]]))
+        self.bipartite = sparse.csr_matrix(np.array([[1, 0, 1],
+                                                     [1, 0, 0],
+                                                     [1, 1, 1],
+                                                     [0, 1, 1]]))
 
-    def test_forwardbackward(self):
-        fb = ForwardBackwardEmbedding(2)
-        fb.fit(self.graph)
-        self.assertTrue(fb.embedding_.shape == (4, 2))
-        self.assertTrue(fb.backward_embedding_.shape == (4, 2))
-        self.assertTrue(type(fb.singular_values_) == np.ndarray and len(fb.singular_values_) == 2)
+    def test_gsvd(self):
+        gsvd = GSVDEmbedding(2)
+        gsvd.fit(self.graph)
+        self.assertTrue(gsvd.embedding_.shape == (4, 2))
+        self.assertTrue(gsvd.features_.shape == (4, 2))
+        self.assertTrue(type(gsvd.singular_values_) == np.ndarray and len(gsvd.singular_values_) == 2)
+
+        gsvd.fit(self.bipartite)
+        self.assertTrue(gsvd.embedding_.shape == (4, 2))
+        self.assertTrue(gsvd.features_.shape == (3, 2))
+        self.assertTrue(type(gsvd.singular_values_) == np.ndarray and len(gsvd.singular_values_) == 2)
 
     def test_spectral(self):
         sp = SpectralEmbedding(2)
diff --git a/sknetwork/embedding/tests/test_metrics.py b/sknetwork/embedding/tests/test_metrics.py
index 102332bf8..26d6a841f 100644
--- a/sknetwork/embedding/tests/test_metrics.py
+++ b/sknetwork/embedding/tests/test_metrics.py
@@ -1,15 +1,12 @@
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-
-# test for metrics.py
-#
-# Copyright 2018 Scikit-network Developers.
-# Copyright 2018 Nathan de Lara <ndelara@enst.fr>
-#
-# This file is part of Scikit-network.
+"""tests for embeddings metrics"""
 
 import unittest
 import numpy as np
 from scipy import sparse
-from sknetwork.embedding.metrics import cosine_modularity
+from sknetwork.embedding.metrics import dot_modularity, hscore
+from sknetwork import star_wars_villains_graph
 
 
 class TestClusteringMetrics(unittest.TestCase):
@@ -19,7 +16,23 @@ def setUp(self):
                                                  [1, 0, 0, 0],
                                                  [1, 0, 0, 1],
                                                  [1, 0, 1, 0]]))
+        self.bipartite = star_wars_villains_graph()
         self.embedding = np.ones((4, 2))
+        self.features = np.ones((3, 2))
 
-    def test_cosine_modularity(self):
-        self.assertAlmostEqual(cosine_modularity(self.graph, self.embedding), 0.)
+    def test_dot_modularity(self):
+        self.assertAlmostEqual(dot_modularity(self.graph, self.embedding), 0.)
+        fit, diversity = dot_modularity(self.graph, self.embedding, return_all=True)
+        self.assertAlmostEqual(fit, 1.)
+        self.assertAlmostEqual(diversity, 1.)
+
+        self.assertAlmostEqual(dot_modularity(self.bipartite, self.embedding, self.features), 0.)
+        fit, diversity = dot_modularity(self.bipartite, self.embedding, self.features, return_all=True)
+        self.assertAlmostEqual(fit, 1.)
+        self.assertAlmostEqual(diversity, 1.)
+
+    def test_hscore(self):
+        self.assertAlmostEqual(hscore(self.graph, self.embedding), 0.)
+        fit, diversity = hscore(self.graph, self.embedding, return_all=True)
+        self.assertAlmostEqual(fit, 1.)
+        self.assertAlmostEqual(diversity, 0.)
diff --git a/sknetwork/embedding/tests/test_randomized_matrix_factorization.py b/sknetwork/embedding/tests/test_randomized_matrix_factorization.py
index ef025c0c8..3940856c7 100644
--- a/sknetwork/embedding/tests/test_randomized_matrix_factorization.py
+++ b/sknetwork/embedding/tests/test_randomized_matrix_factorization.py
@@ -1,10 +1,6 @@
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-
-# test for metrics.py
-#
-# Copyright 2018 Scikit-network Developers.
-# Copyright 2018 Nathan de Lara <ndelara@enst.fr>
-#
-# This file is part of Scikit-network.
+"""tests for randomized matrix factorization"""
 
 import unittest
 import numpy as np
diff --git a/sknetwork/hierarchy/__init__.py b/sknetwork/hierarchy/__init__.py
index e69de29bb..acef6c2e1 100644
--- a/sknetwork/hierarchy/__init__.py
+++ b/sknetwork/hierarchy/__init__.py
@@ -0,0 +1,2 @@
+from sknetwork.hierarchy.paris import *
+from sknetwork.hierarchy.metrics import *
diff --git a/sknetwork/hierarchy/metrics.py b/sknetwork/hierarchy/metrics.py
index eea85de18..eec029bc7 100644
--- a/sknetwork/hierarchy/metrics.py
+++ b/sknetwork/hierarchy/metrics.py
@@ -1,137 +1,184 @@
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-
-# metrics.py - functions for computing metrics on hierarchy
-#
-# Copyright 2018 Scikit-network Developers.
-# Copyright 2018 Bertrand Charpentier <bertrand.charpentier@live.fr>
-#
-# This file is part of Scikit-network.
-#
-# NetworkX is distributed under a BSD license; see LICENSE.txt for more information.
 """
-Metrics for hierarchy
+Created on March 2019
+@author: Thomas Bonald <bonald@enst.fr>
 """
 
 import numpy as np
-import networkx as nx
-
-_AFFINITY = {'unitary', 'weighted'}
-_LINKAGE = {'classic'}
-_ADMISSIBLE_FUNCTION = {'dasgupta'}
-
-
-def hierarchical_cost(graph, dendrogram, affinity='weighted', linkage='classic', g=lambda a, b: a + b, check=True):
-    """Compute the hierarchichal cost of an undirected graph  hierarchy given a linkage and a admissible function g.
-    The graph can be weighted or unweighted
-
-    Parameters
-    ----------
-    graph : NetworkX graph
-        An undirected graph.
-    dendrogram : numpy array
-        A dendrogram
-    affinity : string (default: 'weighted')
-        The affinity  can be either 'weighted' or 'unitary'.
-        Value 'weighted' takes the attribute 'weight' on the edges.
-        Value 'unitary' consider that the edges have a weight equal to 1
-    linkage : string, optional (default: 'modular')
-        The parameter linkage can be 'single', 'average', 'complete' or 'modular'.
-
-        =============== ========================================
-        Value           Linkage
-        =============== ========================================
-        'single'        max wij
-        'average'       min wij
-        'complete'      n^2/w*wij/(|i||j|)
-        'modular'       w wij/(wi wj)
-        =============== ========================================
-
-    g : function, optional (default: lambda a, b: a + b)
-        The function g must be admissible. The classic choice is the dasgupta function.
-    check : bool, optional (default: True)
-        If True, reorder the node labels and check that the edges have the
-        'weight' attribute corresponding to the given affinity.
-
-    Returns
-    -------
-    dendrogram : numpy array
-        dendrogram.
-
-    Raises
-    ------
-    ValueError
-        If the affinity or the linkage is not known.
-    KeyError
-        If all the edges do not have the 'weight' attribute with the 'weighted' affinity.
-
-    Notes
-    -----
-    This is a classic implementation of the hierarchical cost function
-
-    See Also
-    --------
-    laplacian_matrix
+from scipy import sparse
+from sknetwork.hierarchy import AggregateGraph
+from typing import Union
+
+
+def dasgupta_cost(adjacency: sparse.csr_matrix, dendrogram: np.ndarray,
+                  node_weights: Union[str, np.ndarray] = 'uniform', normalized: bool = True) -> float:
+    """Dasgupta's cost of a hierarchy (cost metric)
+
+     Parameters
+     ----------
+     adjacency :
+        Adjacency matrix of the graph.
+     dendrogram :
+        Each row contains the two merged nodes, the height in the dendrogram, and the size of the corresponding cluster
+     node_weights :
+        Vector of node weights. Default = 'uniform', weight 1 for each node.
+     normalized:
+        If true, normalized by the number of ndoes of the graph.
+
+     Returns
+     -------
+     cost : float
+         Dasgupta's cost of the hierarchy.
+         Normalized by the number of nodes to get a value between 0 and 1.
+
+     References
+     ----------
+     S. Dasgupta. A cost function for similarity-based hierarchical clustering.
+     In Proceedings of ACM symposium on Theory of Computing, 2016.
+
     """
 
-    if affinity not in _AFFINITY:
-        raise ValueError("Unknown affinity type %s."
-                         "Valid options are %s" % (affinity, _AFFINITY))
+    if type(adjacency) != sparse.csr_matrix:
+        raise TypeError('The adjacency matrix must be in a scipy compressed sparse row (csr) format.')
+    # check that the graph is not directed
+    if adjacency.shape[0] != adjacency.shape[1]:
+        raise ValueError('The adjacency matrix must be square.')
+    if (adjacency != adjacency.T).nnz != 0:
+        raise ValueError('The graph cannot be directed. Please fit a symmetric adjacency matrix.')
+
+    n_nodes = adjacency.shape[0]
+
+    if type(node_weights) == np.ndarray:
+        if len(node_weights) != n_nodes:
+            raise ValueError('The number of node weights must match the number of nodes.')
+        else:
+            node_weights_vec = node_weights
+    elif type(node_weights) == str:
+        if node_weights == 'degree':
+            node_weights_vec = adjacency.dot(np.ones(n_nodes))
+        elif node_weights == 'uniform':
+            node_weights_vec = np.ones(n_nodes)
+        else:
+            raise ValueError('Unknown distribution of node weights.')
+    else:
+        raise TypeError(
+            'Node weights must be a known distribution ("degree" or "uniform" string) or a custom NumPy array.')
 
-    if linkage not in _LINKAGE:
-        raise ValueError("Unknown linkage type %s."
-                         "Valid options are %s" % (linkage, _LINKAGE))
+    if np.any(node_weights_vec <= 0):
+        raise ValueError('All node weights must be positive.')
+    else:
+        node_weights_vec = node_weights_vec / np.sum(node_weights_vec)
 
-    graph_copy = graph.copy()
+    aggregate_graph = AggregateGraph(adjacency, node_weights_vec)
 
-    if check:
+    height = np.zeros(n_nodes - 1)
+    edge_sampling = np.zeros(n_nodes - 1)
+    cluster_weight = np.zeros(n_nodes - 1)
+    for t in range(n_nodes - 1):
+        node1 = int(dendrogram[t][0])
+        node2 = int(dendrogram[t][1])
+        if node1 >= n_nodes and height[node1 - n_nodes] == dendrogram[t][2]:
+            edge_sampling[t] = edge_sampling[node1 - n_nodes]
+            edge_sampling[node1 - n_nodes] = 0
+        elif node2 >= n_nodes and height[node2 - n_nodes] == dendrogram[t][2]:
+            edge_sampling[t] = edge_sampling[node2 - n_nodes]
+            edge_sampling[node2 - n_nodes] = 0
+        height[t] = dendrogram[t][2]
+        edge_sampling[t] += 2 * aggregate_graph.graph[node1][node2]
+        cluster_weight[t] = aggregate_graph.cluster_probs[node1] + aggregate_graph.cluster_probs[node2]
+        aggregate_graph.merge(node1, node2)
+
+    cost: float = (edge_sampling * cluster_weight).sum()
+    if not normalized:
+        cost *= node_weights_vec.sum()
+    return cost
 
-        graph_copy = nx.convert_node_labels_to_integers(graph_copy)
 
-        if affinity == 'unitary':
-            for e in graph_copy.edges:
-                graph_copy.add_edge(e[0], e[1], weight=1)
+def tree_sampling_divergence(adj_matrix: sparse.csr_matrix, dendrogram: np.ndarray,
+                             node_weights: Union[str, np.ndarray] = 'degree', normalized: bool = True) -> float:
+    """Tree sampling divergence of a hierarchy (quality metric)
 
-        n_edges = len(list(graph_copy.edges()))
-        n_weighted_edges = len(nx.get_edge_attributes(graph_copy, 'weight'))
-        if affinity == 'weighted' and not n_weighted_edges == n_edges:
-            raise KeyError("%s edges among %s do not have the attribute/key \'weigth\'."
-                           % (n_edges - n_weighted_edges, n_edges))
+     Parameters
+     ----------
+     adj_matrix :
+        Adjacency matrix of the graph.
+     dendrogram :
+        Each row contains the two merged nodes, the height in the dendrogram, and the size of the corresponding cluster
+     node_weights :
+        Vector of node weights. Default = 'degree', weight of each node in the graph.
+     normalized:
+        If true, normalized by the mutual information of the graph.
 
-    if linkage == 'classic':
-        cost = classic_linkage_hierarchical_cost(graph_copy, dendrogram, g)
-    else:
-        cost = 0.
+     Returns
+     -------
+     quality : float
+         The tree sampling divergence of the hierarchy (quality metric).
+         Normalized by the mutual information to get a value between 0 and 1.
 
-    return cost
+     References
+     ----------
+     T. Bonald, B. Charpentier (2018), Learning Graph Representations by Dendrograms, https://arxiv.org/abs/1807.05087
 
+    """
 
-def classic_linkage_hierarchical_cost(graph, dendrogram, g):
-    n_nodes = np.shape(dendrogram)[0] + 1
+    if type(adj_matrix) != sparse.csr_matrix:
+        raise TypeError('The adjacency matrix must be in a scipy compressed sparse row (csr) format.')
+    # check that the graph is not directed
+    if adj_matrix.shape[0] != adj_matrix.shape[1]:
+        raise ValueError('The adjacency matrix must be square.')
+    if (adj_matrix != adj_matrix.T).nnz != 0:
+        raise ValueError('The graph cannot be directed. Please fit a symmetric adjacency matrix.')
+
+    n_nodes = adj_matrix.shape[0]
+
+    if type(node_weights) == np.ndarray:
+        if len(node_weights) != n_nodes:
+            raise ValueError('The number of node weights must match the number of nodes.')
+        else:
+            node_weights_vec = node_weights
+    elif type(node_weights) == str:
+        if node_weights == 'degree':
+            node_weights_vec = adj_matrix.dot(np.ones(n_nodes))
+        elif node_weights == 'uniform':
+            node_weights_vec = np.ones(n_nodes)
+        else:
+            raise ValueError('Unknown distribution of node weights.')
+    else:
+        raise TypeError(
+            'Node weights must be a known distribution ("degree" or "uniform" string) or a custom NumPy array.')
 
-    cost = 0
+    if np.any(node_weights_vec <= 0):
+        raise ValueError('All node weights must be positive.')
+    else:
+        node_weights_vec = node_weights_vec / np.sum(node_weights_vec)
 
-    u = n_nodes
+    aggregate_graph = AggregateGraph(adj_matrix, node_weights_vec)
 
-    cluster_size = {t: 1 for t in range(n_nodes)}
+    height = np.zeros(n_nodes - 1)
+    edge_sampling = np.zeros(n_nodes - 1)
+    node_sampling = np.zeros(n_nodes - 1)
     for t in range(n_nodes - 1):
-        a = int(dendrogram[t][0])
-        b = int(dendrogram[t][1])
-
-        linkage = graph[a][b]['weight']
-        cost += linkage * g(cluster_size[a], cluster_size[b])
-
-        graph.add_node(u)
-        neighbors_a = list(graph.neighbors(a))
-        neighbors_b = list(graph.neighbors(b))
-        for v in neighbors_a:
-            graph.add_edge(u, v, weight=graph[a][v]['weight'])
-        for v in neighbors_b:
-            if graph.has_edge(u, v):
-                graph[u][v]['weight'] += graph[b][v]['weight']
-            else:
-                graph.add_edge(u, v, weight=graph[b][v]['weight'])
-        graph.remove_node(a)
-        graph.remove_node(b)
-        cluster_size[u] = cluster_size.pop(a) + cluster_size.pop(b)
-        u += 1
-
-    return cost
+        node1 = int(dendrogram[t][0])
+        node2 = int(dendrogram[t][1])
+        if node1 >= n_nodes and height[node1 - n_nodes] == dendrogram[t][2]:
+            edge_sampling[t] = edge_sampling[node1 - n_nodes]
+            edge_sampling[node1 - n_nodes] = 0
+            node_sampling[t] = node_sampling[node1 - n_nodes]
+        elif node2 >= n_nodes and height[node2 - n_nodes] == dendrogram[t][2]:
+            edge_sampling[t] = edge_sampling[node2 - n_nodes]
+            edge_sampling[node2 - n_nodes] = 0
+            node_sampling[t] = node_sampling[node2 - n_nodes]
+        edge_sampling[t] += 2 * aggregate_graph.graph[node1][node2]
+        node_sampling[t] += aggregate_graph.cluster_probs[node1] * aggregate_graph.cluster_probs[node2]
+        height[t] = dendrogram[t][2]
+        aggregate_graph.merge(node1, node2)
+
+    index = np.where(edge_sampling)[0]
+    quality: float = np.sum(edge_sampling[index] * np.log(edge_sampling[index] / node_sampling[index]))
+    if normalized:
+        inv_node_weights = sparse.diags(1 / node_weights_vec, shape=(n_nodes, n_nodes), format='csr')
+        sampling_ratio = inv_node_weights.dot(adj_matrix.dot(inv_node_weights)) / adj_matrix.data.sum()
+        mutual_information = np.sum(adj_matrix.data / adj_matrix.data.sum() * np.log(2 * sampling_ratio.data))
+        quality /= mutual_information
+    return quality
diff --git a/sknetwork/hierarchy/paris.py b/sknetwork/hierarchy/paris.py
index dc25e2457..9cee3d6df 100644
--- a/sknetwork/hierarchy/paris.py
+++ b/sknetwork/hierarchy/paris.py
@@ -1,151 +1,106 @@
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-
-# paris.py - agglomerative clustering algorithm
-#
-# Copyright 2018 Scikit-network Developers.
-# Copyright 2018 Thomas Bonald <bonald@enst.fr>
-# Copyright 2018 Bertrand Charpentier <bertrand.charpentier@live.fr>
-#
-# This file is part of Scikit-network.
-#
-# NetworkX is distributed under a BSD license; see LICENSE.txt for more information.
-
-try:
-    # noinspection PyPackageRequirements
-    from numba import njit
-except ImportError:
-    def njit(func):
-        return func
+"""
+Created on March 2019
+@author: Thomas Bonald <bonald@enst.fr>
+@author: Bertrand Charpentier <bertrand.charpentier@live.fr>
+"""
+
 
 import numpy as np
 from scipy import sparse
+from typing import Union
 
 
-class SimilarityGraph:
+class AggregateGraph:
     """
-    A class of similarity graph suitable for aggregation.
+    A class of graph suitable for aggregation. Each node represents a cluster.
 
     Attributes
     ----------
-    current_node: int
-        current node index
-    neighbor_sim: np.ndarray of lists of tuples
-        list of (neighbor, similarity) for each node
-    active_nodes: set
-        set of active nodes
-    node_sizes: np.ndarray(dtype=int)
-        vector of node sizes
-
+    graph : dict[dict]
+        Dictionary of dictionary of edge weights.
+    next_cluster : int
+        Index of the next cluster (resulting from aggregation).
+    cluster_sizes : dict
+        Dictionary of cluster sizes.
+    cluster_probs : dict
+        Dictionary of cluster probabilities.
     """
 
-    def __init__(self, adj_matrix, node_weights='degree'):
+    def __init__(self, adjacency: sparse.csr_matrix, node_probs: np.ndarray):
         """
 
         Parameters
         ----------
-        adj_matrix: scipy.csr_matrix
-            adjacency matrix of the graph
-        node_weights: Union[str,np.ndarray(dtype=float)]
-            vector of node weights
+        adjacency :
+            Adjacency matrix of the graph.
+        node_probs :
+            Distribution of node weights.
     """
-        n_nodes = adj_matrix.shape[0]
-        if type(node_weights) == np.ndarray:
-            if len(node_weights) != n_nodes:
-                raise ValueError('The number of node weights must match the number of nodes.')
-            if np.any(node_weights <= 0):
-                raise ValueError('All node weights must be positive.')
-            else:
-                node_weights_vec = node_weights
-        elif type(node_weights) == str:
-            if node_weights == 'degree':
-                node_weights_vec = adj_matrix.dot(np.ones(n_nodes))
-            elif node_weights == 'uniform':
-                node_weights_vec = np.ones(n_nodes) / n_nodes
-            else:
-                raise ValueError('Unknown distribution type.')
-        else:
+        n_nodes = adjacency.shape[0]
+        total_weight = adjacency.data.sum()
 
-            raise TypeError(
-                'Node weights must be a known distribution ("degree" or "uniform" string) or a custom NumPy array.')
-
-        self.current_node = n_nodes
-        # arrays have size 2 * n_nodes - 1 for the future nodes (resulting from merges)
-        self.neighbor_sim = np.empty((2 * n_nodes - 1,), dtype=object)
-        inv_weights_diag = sparse.spdiags(1 / node_weights_vec, [0], n_nodes, n_nodes, format='csr')
-        sim_matrix = inv_weights_diag.dot(adj_matrix.dot(inv_weights_diag))
+        self.next_cluster = n_nodes
+        self.graph = {}
         for node in range(n_nodes):
-            start = sim_matrix.indptr[node]
-            end = sim_matrix.indptr[node + 1]
-            self.neighbor_sim[node] = [(sim_matrix.indices[i], sim_matrix.data[i]) for i in range(start, end) if
-                                       sim_matrix.indices[i] != node]
-        self.active_nodes = set(range(n_nodes))
-        self.node_sizes = np.zeros(2 * n_nodes - 1, dtype=int)
-        self.node_sizes[:n_nodes] = np.ones(n_nodes, dtype=int)
-        self.node_weights = np.zeros(2 * n_nodes - 1, dtype=float)
-        self.node_weights[:n_nodes] = node_weights_vec
-
-    def merge(self, nodes):
-        """
-        Merges two nodes.
+            # normalize so that the total weight is equal to 1
+            # remove self-loops
+            self.graph[node] = {adjacency.indices[i]: adjacency.data[i] / total_weight for i in
+                                range(adjacency.indptr[node], adjacency.indptr[node + 1])
+                                if adjacency.indices[i] != node}
+        self.cluster_sizes = {node: 1 for node in range(n_nodes)}
+        self.cluster_probs = {node: node_probs[node] for node in range(n_nodes)}
+
+    def merge(self, node1: int, node2: int) -> 'AggregateGraph':
+        """Merges two nodes.
 
         Parameters
         ----------
-        nodes: tuple
-            The two nodes
+        node1, node2 :
+            The two nodes to merge.
+
         Returns
         -------
-        The aggregated graph
+        self: :class:`AggregateGraph`
+            The aggregated graph (without self-loop).
         """
-        first_node = nodes[0]
-        second_node = nodes[1]
-        first_weight = self.node_weights[first_node]
-        second_weight = self.node_weights[second_node]
-        new_weight = first_weight + second_weight
-        first_list = self.neighbor_sim[first_node]
-        second_list = self.neighbor_sim[second_node]
-        new_list = []
-        while first_list and second_list:
-            if first_list[-1][0] > second_list[-1][0]:
-                node, sim = first_list.pop()
-                if node != second_node:
-                    new_sim = sim * first_weight / new_weight
-                    new_list.append((node, new_sim))
-                    self.neighbor_sim[node].append((self.current_node, new_sim))
-            elif first_list[-1][0] < second_list[-1][0]:
-                node, sim = second_list.pop()
-                if node != first_node:
-                    new_sim = sim * second_weight / new_weight
-                    new_list.append((node, new_sim))
-                    self.neighbor_sim[node].append((self.current_node, new_sim))
-            else:
-                node, first_sim = first_list.pop()
-                _, second_sim = second_list.pop()
-                # weighted sum of similarities
-                new_sim = (first_sim * first_weight + second_sim * second_weight) / new_weight
-                new_list.append((node, new_sim))
-                self.neighbor_sim[node].append((self.current_node, new_sim))
-        while first_list:
-            node, sim = first_list.pop()
-            if node != second_node:
-                new_sim = sim * first_weight / new_weight
-                new_list.append((node, new_sim))
-                self.neighbor_sim[node].append((self.current_node, new_sim))
-        while second_list:
-            node, sim = second_list.pop()
-            if node != first_node:
-                new_sim = sim * second_weight / new_weight
-                new_list.append((node, new_sim))
-                self.neighbor_sim[node].append((self.current_node, new_sim))
-        self.neighbor_sim[self.current_node] = new_list
-        self.active_nodes -= set(nodes)
-        self.active_nodes.add(self.current_node)
-        self.node_sizes[self.current_node] = self.node_sizes[[nodes]].sum()
-        self.node_sizes[[nodes]] = (0, 0)
-        self.node_weights[self.current_node] = self.node_weights[[nodes]].sum()
-        self.current_node += 1
+        new_node = self.next_cluster
+        self.graph[new_node] = {}
+        common_neighbors = set(self.graph[node1]) & set(self.graph[node2]) - {node1, node2}
+        for node in common_neighbors:
+            self.graph[new_node][node] = self.graph[node1][node] + self.graph[node2][node]
+            self.graph[node][new_node] = self.graph[node].pop(node1) + self.graph[node].pop(node2)
+        node1_neighbors = set(self.graph[node1]) - set(self.graph[node2]) - {node2}
+        for node in node1_neighbors:
+            self.graph[new_node][node] = self.graph[node1][node]
+            self.graph[node][new_node] = self.graph[node].pop(node1)
+        node2_neighbors = set(self.graph[node2]) - set(self.graph[node1]) - {node1}
+        for node in node2_neighbors:
+            self.graph[new_node][node] = self.graph[node2][node]
+            self.graph[node][new_node] = self.graph[node].pop(node2)
+        del self.graph[node1]
+        del self.graph[node2]
+        self.cluster_sizes[new_node] = self.cluster_sizes.pop(node1) + self.cluster_sizes.pop(node2)
+        self.cluster_probs[new_node] = self.cluster_probs.pop(node1) + self.cluster_probs.pop(node2)
+        self.next_cluster += 1
         return self
 
 
-def reorder_dendrogram(dendrogram: np.ndarray):
+def reorder_dendrogram(dendrogram: np.ndarray) -> np.ndarray:
+    """
+    Get the dendrogram in increasing order of height.
+
+    Parameters
+    ----------
+    dendrogram:
+        Original dendrogram.
+
+    Returns
+    -------
+    dendrogram: np.ndarray
+        Reordered dendrogram.
+    """
     n_nodes = np.shape(dendrogram)[0] + 1
     order = np.zeros((2, n_nodes - 1), float)
     order[0] = np.arange(n_nodes - 1)
@@ -159,13 +114,39 @@ def reorder_dendrogram(dendrogram: np.ndarray):
 
 
 class Paris:
-    """
-    Agglomerative algorithm.
+    """Agglomerative algorithm.
 
     Attributes
     ----------
-    dendrogram_: numpy array of shape (n_nodes - 1) x 4
-        dendrogram of the nodes
+    dendrogram_ : numpy array of shape (n_nodes - 1, 4)
+        Dendrogram.
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from scipy import sparse
+
+    >>> # House graph
+    >>> row = np.array([0, 0, 1, 1, 2, 3])
+    >>> col = np.array([1, 4, 2, 4, 3, 4])
+    >>> adjacency = sparse.csr_matrix((np.ones(len(row), dtype=int), (row, col)), shape=(5, 5))
+    >>> adjacency = adjacency + adjacency.T
+
+    >>> paris = Paris()
+    >>> paris.fit(adjacency).predict()
+    array([1, 1, 0, 0, 1])
+
+    Notes
+    -----
+    Each row of the dendrogram = i, j, height, size of cluster i + j.
+
+    The similarity between clusters i,j is :math:`\\dfrac{w_{ij}}{w_i w_j}` where
+
+    * :math:`w_{ij}` is the weight of edge i,j, if any, and 0 otherwise,
+
+    * :math:`w_{i}` is the weight of cluster i
+
+    * :math:`w_{j}` is the weight of cluster j
 
     See Also
     --------
@@ -175,56 +156,81 @@ class Paris:
     ----------
     T. Bonald, B. Charpentier, A. Galland, A. Hollocou (2018).
     Hierarchical Graph Clustering using Node Pair Sampling.
-    KDD Workshop, 2018.
+    Workshop on Mining and Learning with Graphs.
+    https://arxiv.org/abs/1806.01664
+
     """
 
     def __init__(self):
         self.dendrogram_ = None
 
-    def fit(self, adj_matrix: sparse.csr_matrix, node_weights="degree", reorder=False):
+    def fit(self, adjacency: sparse.csr_matrix, node_weights: Union[str, np.ndarray] = 'degree', reorder: bool = True):
         """
-        Agglomerative clustering using the nearest neighbor chain
+        Agglomerative clustering using the nearest neighbor chain.
 
         Parameters
         ----------
-        adj_matrix: scipy.csr_matrix
-            adjacency matrix of the graph to cluster
-        node_weights: np.ndarray(dtype=float)
-            node weights to be used in the linkage
-        reorder: boolean
-            reorder the dendrogram in increasing order of heights
+        adjacency :
+            Adjacency matrix of the graph to cluster.
+        node_weights :
+            Node weights used in the linkage.
+        reorder :
+            If True, reorder the dendrogram in increasing order of heights.
 
         Returns
         -------
-        self
+        self: :class:`Paris`
         """
-        if type(adj_matrix) != sparse.csr_matrix:
+        if type(adjacency) != sparse.csr_matrix:
             raise TypeError('The adjacency matrix must be in a scipy compressed sparse row (csr) format.')
-        # check that the graph is not directed
-        if adj_matrix.shape[0] != adj_matrix.shape[1]:
+        if adjacency.shape[0] != adjacency.shape[1]:
             raise ValueError('The adjacency matrix must be square.')
-        if (adj_matrix != adj_matrix.T).nnz != 0:
+        if adjacency.shape[0] <= 1:
+            raise ValueError('The graph must contain at least two nodes.')
+        if (adjacency != adjacency.T).nnz != 0:
             raise ValueError('The graph cannot be directed. Please fit a symmetric adjacency matrix.')
 
-        sim_graph = SimilarityGraph(adj_matrix, node_weights)
+        n_nodes = adjacency.shape[0]
+
+        if type(node_weights) == np.ndarray:
+            if len(node_weights) != n_nodes:
+                raise ValueError('The number of node weights must match the number of nodes.')
+            else:
+                node_probs = node_weights
+        elif type(node_weights) == str:
+            if node_weights == 'degree':
+                node_probs = adjacency.dot(np.ones(n_nodes))
+            elif node_weights == 'uniform':
+                node_probs = np.ones(n_nodes)
+            else:
+                raise ValueError('Unknown distribution of node weights.')
+        else:
+            raise TypeError(
+                'Node weights must be a known distribution ("degree" or "uniform" string) or a custom NumPy array.')
+
+        if np.any(node_probs <= 0):
+            raise ValueError('All node weights must be positive.')
+        else:
+            node_probs = node_probs / np.sum(node_probs)
+
+        aggregate_graph = AggregateGraph(adjacency, node_probs)
 
         connected_components = []
         dendrogram = []
 
-        while len(sim_graph.active_nodes) > 0:
+        while len(aggregate_graph.cluster_sizes) > 0:
             node = None
-            for node in sim_graph.active_nodes:
+            for node in aggregate_graph.cluster_sizes:
                 break
             chain = [node]
             while chain:
                 node = chain.pop()
-                # filter active nodes
-                sim_graph.neighbor_sim[node] = [element for element in sim_graph.neighbor_sim[node] if
-                                                (sim_graph.node_sizes[element[0]] > 0)]
-                if sim_graph.neighbor_sim[node]:
+                if aggregate_graph.graph[node]:
                     max_sim = -float("inf")
                     nearest_neighbor = None
-                    for neighbor, sim in sim_graph.neighbor_sim[node]:
+                    for neighbor in aggregate_graph.graph[node]:
+                        sim = aggregate_graph.graph[node][neighbor] / aggregate_graph.cluster_probs[node] / \
+                              aggregate_graph.cluster_probs[neighbor]
                         if sim > max_sim:
                             nearest_neighbor = neighbor
                             max_sim = sim
@@ -234,8 +240,9 @@ def fit(self, adj_matrix: sparse.csr_matrix, node_weights="degree", reorder=Fals
                         nearest_neighbor_last = chain.pop()
                         if nearest_neighbor_last == nearest_neighbor:
                             dendrogram.append([node, nearest_neighbor, 1. / max_sim,
-                                               sim_graph.node_sizes[node] + sim_graph.node_sizes[nearest_neighbor]])
-                            sim_graph.merge((node, nearest_neighbor))
+                                               aggregate_graph.cluster_sizes[node]
+                                               + aggregate_graph.cluster_sizes[nearest_neighbor]])
+                            aggregate_graph.merge(node, nearest_neighbor)
                         else:
                             chain.append(nearest_neighbor_last)
                             chain.append(node)
@@ -244,16 +251,15 @@ def fit(self, adj_matrix: sparse.csr_matrix, node_weights="degree", reorder=Fals
                         chain.append(node)
                         chain.append(nearest_neighbor)
                 else:
-                    connected_components.append((node, sim_graph.node_sizes[node]))
-                    sim_graph.active_nodes -= {node}
-                    sim_graph.node_sizes[node] = 0
+                    connected_components.append((node, aggregate_graph.cluster_sizes[node]))
+                    del aggregate_graph.cluster_sizes[node]
 
-        node, node_size = connected_components.pop()
-        for next_node, next_node_size in connected_components:
-            node_size += next_node_size
-            dendrogram.append([node, next_node, float("inf"), node_size])
-            node = sim_graph.current_node
-            sim_graph.current_node += 1
+        node, cluster_size = connected_components.pop()
+        for next_node, next_cluster_size in connected_components:
+            cluster_size += next_cluster_size
+            dendrogram.append([node, next_node, float("inf"), cluster_size])
+            node = aggregate_graph.next_cluster
+            aggregate_graph.next_cluster += 1
 
         dendrogram = np.array(dendrogram)
         if reorder:
@@ -262,3 +268,37 @@ def fit(self, adj_matrix: sparse.csr_matrix, node_weights="degree", reorder=Fals
         self.dendrogram_ = dendrogram
 
         return self
+
+    def predict(self, n_clusters: int = 2, sorted_clusters: bool = False) -> np.ndarray:
+        """
+        Extract the clustering with given number of clusters from the dendrogram.
+
+        Parameters
+        ----------
+        n_clusters :
+            Number of clusters.
+        sorted_clusters :
+            If True, sort labels in decreasing order of cluster size.
+
+        Returns
+        -------
+        labels : np.ndarray
+            Cluster index of each node.
+        """
+        if self.dendrogram_ is None:
+            raise ValueError("First fit data using the fit method.")
+        n_nodes = np.shape(self.dendrogram_)[0] + 1
+        if n_clusters < 1 or n_clusters > n_nodes:
+            raise ValueError("The number of clusters must be between 1 and the number of nodes.")
+
+        labels = np.zeros(n_nodes, dtype=int)
+        clusters = {node: [node] for node in range(n_nodes)}
+        for t in range(n_nodes - n_clusters):
+            clusters[n_nodes + t] = clusters.pop(int(self.dendrogram_[t][0])) + \
+                                    clusters.pop(int(self.dendrogram_[t][1]))
+        clusters = list(clusters.values())
+        if sorted_clusters:
+            clusters = sorted(clusters, key=len, reverse=True)
+        for label, cluster in enumerate(clusters):
+            labels[cluster] = label
+        return labels
diff --git a/sknetwork/hierarchy/tests/test_metrics.py b/sknetwork/hierarchy/tests/test_metrics.py
index 7635f1b5f..9856a6099 100644
--- a/sknetwork/hierarchy/tests/test_metrics.py
+++ b/sknetwork/hierarchy/tests/test_metrics.py
@@ -1,27 +1,27 @@
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-
-# test for metrics.py
-#
-# Copyright 2018 Scikit-network Developers.
-# Copyright 2018 Bertrand Charpentier <bertrand.charpentier@live.fr>
-#
-# This file is part of Scikit-network.
-#
-# NetworkX is distributed under a BSD license; see LICENSE.txt for more information.
+"""
+Created on March 2019
+@author: Thomas Bonald <bonald@enst.fr>
+"""
 
 import unittest
-from sknetwork.hierarchy.paris import Paris
-#from sknetwork.hierarchy.metrics import hierarchical_cost
+from sknetwork.hierarchy import Paris, tree_sampling_divergence, dasgupta_cost
+from sknetwork.toy_graphs import karate_club_graph
 
 
 class TestMetrics(unittest.TestCase):
 
     def setUp(self):
         self.paris = Paris()
+        self.karate_club_graph = karate_club_graph()
 
-    #def test_unknown_options(self):
-        #with self.assertRaises(ValueError):
-            #hierarchical_cost(self.weighted_graph, self.d_w, affinity='unknown')
+    def test_karate_club_graph(self):
+        adjacency = self.karate_club_graph
+        dendrogram = self.paris.fit(adjacency).dendrogram_
+        tsd = tree_sampling_divergence(adjacency, dendrogram, normalized=True)
+        self.assertAlmostEqual(tsd, .65, 2)
+        dc = dasgupta_cost(adjacency, dendrogram, normalized=True)
+        self.assertAlmostEqual(dc, .33, 2)
 
-        #with self.assertRaises(ValueError):
-            #hierarchical_cost(self.weighted_graph, self.dendrogram, linkage='unknown')
 
diff --git a/sknetwork/hierarchy/tests/test_paris.py b/sknetwork/hierarchy/tests/test_paris.py
index 0613a0480..02c16547b 100644
--- a/sknetwork/hierarchy/tests/test_paris.py
+++ b/sknetwork/hierarchy/tests/test_paris.py
@@ -1,20 +1,23 @@
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-
-# test for louvain.py
-#
-# Copyright 2018 Scikit-network Developers.
-# Copyright 2018 Thomas Bonald <tbonald@enst.fr>
-#
-# This file is part of Scikit-network.
+"""
+Created on March 2019
+@author: Thomas Bonald <bonald@enst.fr>
+"""
 
 import unittest
-from sknetwork.hierarchy.paris import Paris
+import numpy as np
 from scipy.sparse import identity
+from sknetwork.hierarchy import Paris
+from sknetwork.toy_graphs import house_graph, karate_club_graph
 
 
 class TestParis(unittest.TestCase):
 
     def setUp(self):
         self.paris = Paris()
+        self.house_graph = house_graph()
+        self.karate_club_graph = karate_club_graph()
 
     def test_unknown_types(self):
         with self.assertRaises(TypeError):
@@ -27,5 +30,14 @@ def test_unknown_options(self):
         with self.assertRaises(ValueError):
             self.paris.fit(identity(2, format='csr'), node_weights='unknown')
 
-    def test_single_node_graph(self):
-        self.assertEqual(self.paris.fit(identity(1, format='csr')).dendrogram_.shape[0], 0)
+    def test_house_graph(self):
+        self.paris.fit(self.house_graph)
+        self.assertEqual(self.paris.dendrogram_.shape[0], 4)
+        labels = self.paris.predict(sorted_clusters=True)
+        self.assertTrue(np.array_equal(labels, np.array([0, 0, 1, 1, 0])))
+
+    def test_karate_club_graph(self):
+        self.paris.fit(self.karate_club_graph)
+        self.assertEqual(self.paris.dendrogram_.shape[0], 33)
+        labels = self.paris.predict()
+        self.assertEqual(np.max(labels), 1)
diff --git a/sknetwork/loader/__init__.py b/sknetwork/loader/__init__.py
index e69de29bb..6f74000bc 100644
--- a/sknetwork/loader/__init__.py
+++ b/sknetwork/loader/__init__.py
@@ -0,0 +1 @@
+from sknetwork.loader.parser import *
diff --git a/sknetwork/loader/parser.py b/sknetwork/loader/parser.py
index d0569f3fb..d80dfa734 100644
--- a/sknetwork/loader/parser.py
+++ b/sknetwork/loader/parser.py
@@ -2,11 +2,12 @@
 # -*- coding: utf-8 -*-
 """
 Created on Dec 5, 2018
-@author: Quentin Lutz <qlutz@enst.fr>
+@author: Quentin Lutz <qlutz@enst.fr>, Nathan de Lara <ndelara@enst.fr>
 """
 
-from scipy import sparse
 import numpy as np
+from scipy import sparse
+from typing import Union
 
 
 class Parser:
@@ -14,23 +15,34 @@ class Parser:
     A collection of parsers to import datasets into a SciPy format
 
     """
+
     @staticmethod
-    def parse_tsv(path, directed=False, weighted=False, labeled=False, offset=1):
+    def parse_tsv(path, directed: bool = False, bipartite: bool = False, weighted: bool = False, labeled: bool = False,
+                  offset: int = 1) -> Union[sparse.csr_matrix, tuple]:
         """
         A parser for Tabulation-Separated Values (TSV) datasets.
 
         Parameters
         ----------
-        path : str, the path to the dataset in TSV format
-        directed : bool, ensures the adjacency matrix is symmetric
-        weighted : bool, retrieves the weights in the third field of the file, raises an error if no such field exists
-        labeled : bool, retrieves the names given to the nodes and renumbers them. Returns an additional array
-        offset : int, renumbers the nodes (useful if the nodes are indexed from a given value other than 0)
+        path : str
+            the path to the dataset in TSV format
+        directed : bool
+            ensures the adjacency matrix is symmetric
+        bipartite : bool
+            if True, returns the biadjacency matrix of shape (n1, n2)
+        weighted : bool
+            retrieves the weights in the third field of the file, raises an error if no such field exists
+        labeled : bool
+            retrieves the names given to the nodes and renumbers them. Returns an additional array
+        offset : int
+            renumbers the nodes (useful if the nodes are indexed from a given value other than 0)
 
         Returns
         -------
-        adj_matrix : csr_matrix, the adjacency_matrix of the graph
-        labels : numpy.array, optional, an array such that labels[k] is the label given to the k-th node
+        adj_matrix : csr_matrix
+            the adjacency_matrix of the graph
+        labels : numpy.array
+            optional, an array such that labels[k] is the label given to the k-th node
 
         """
         parsed_file = np.loadtxt(path, dtype=str, unpack=True, comments='%')
@@ -43,6 +55,7 @@ def parse_tsv(path, directed=False, weighted=False, labeled=False, offset=1):
             row = new_nodes[:n_edges]
             col = new_nodes[n_edges:]
         else:
+            labels = None
             row = parsed_file[0].astype(int) - offset * np.ones(n_edges, dtype=int)
             col = parsed_file[1].astype(int) - offset * np.ones(n_edges, dtype=int)
         if weighted:
@@ -51,12 +64,21 @@ def parse_tsv(path, directed=False, weighted=False, labeled=False, offset=1):
             else:
                 data = parsed_file[2].astype(float)
         else:
-            data = np.ones(n_edges, dtype=int)
-        adj_matrix = sparse.csr_matrix((data, (row, col)))
-        n_nodes = max(adj_matrix.shape)
-        adj_matrix.resize((n_nodes, n_nodes))
-        if not directed:
-            adj_matrix += adj_matrix.transpose()
+            data = np.ones(n_edges, dtype=bool)
+
+        n_nodes = max(max(row), max(col)) + 1
+        if n_nodes < 2 * 10e9:
+            dtype = np.int32
+        else:
+            dtype = np.int64
+
+        if bipartite:
+            adj_matrix = sparse.csr_matrix((data, (row, col)), dtype=dtype)
+        else:
+            adj_matrix = sparse.csr_matrix((data, (row, col)), shape=(n_nodes, n_nodes), dtype=dtype)
+            if not directed:
+                adj_matrix += adj_matrix.transpose()
         if labeled:
             return adj_matrix, labels
-        return adj_matrix
+        else:
+            return adj_matrix
diff --git a/sknetwork/toy_graphs/__init__.py b/sknetwork/toy_graphs/__init__.py
index e69de29bb..866fd1552 100644
--- a/sknetwork/toy_graphs/__init__.py
+++ b/sknetwork/toy_graphs/__init__.py
@@ -0,0 +1 @@
+from sknetwork.toy_graphs.graph_data import *
diff --git a/sknetwork/toy_graphs/graph_data.py b/sknetwork/toy_graphs/graph_data.py
index bd2c52845..4e71833bf 100644
--- a/sknetwork/toy_graphs/graph_data.py
+++ b/sknetwork/toy_graphs/graph_data.py
@@ -3,41 +3,82 @@
 """
 Created on Nov 29, 2018
 @author: Quentin Lutz <qlutz@enst.fr>
+@author: Nathan de Lara <ndelara@enst.fr>
+@author: Thomas Bonald <tbonald@enst.ft>
 """
 
 import numpy as np
 from scipy import sparse
 
 
-class GraphConstants:
+def house_graph():
     """
-    This class is meant to make a few toy graphs easily available to the user.
+    House graph
+
+    5 nodes, 6 edges
+
+    Returns
+    -------
+    adjacency: sparse.csr_matrix
+        Adjacency matrix of the graph.
+    """
+    row = np.array([0, 0, 1, 1, 2, 3])
+    col = np.array([1, 4, 2, 4, 3, 4])
+    adjacency = sparse.csr_matrix((np.ones(len(row), dtype=int), (row, col)), shape=(5, 5))
+    return adjacency + adjacency.T
+
+
+def karate_club_graph():
+    """
+    Zachary's Karate Club Graph
+
+    Data file from: http://vlado.fmf.uni-lj.si/pub/networks/data/Ucinet/UciData.htm
+
+    34 nodes, 78 edges
+
+    Returns
+    -------
+    adjacency: sparse.csr_matrix
+        Adjacency matrix of the graph.
+    """
+    row = np.array(
+        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
+         1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3,
+         3, 4, 4, 5, 5, 5, 6, 8, 8, 8, 9, 13, 14, 14, 15, 15, 18,
+         18, 19, 20, 20, 22, 22, 23, 23, 23, 23, 23, 24, 24, 24, 25, 26, 26,
+         27, 28, 28, 29, 29, 30, 30, 31, 31, 32])
+    col = np.array(
+        [1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 17, 19, 21, 31, 2,
+         3, 7, 13, 17, 19, 21, 30, 3, 7, 8, 9, 13, 27, 28, 32, 7, 12,
+         13, 6, 10, 6, 10, 16, 16, 30, 32, 33, 33, 33, 32, 33, 32, 33, 32,
+         33, 33, 32, 33, 32, 33, 25, 27, 29, 32, 33, 25, 27, 31, 31, 29, 33,
+         33, 31, 33, 32, 33, 32, 33, 32, 33, 33])
+    adjacency = sparse.csr_matrix((np.ones(len(row), dtype=int), (row, col)), shape=(34, 34))
+    return adjacency + adjacency.T
+
+
+def star_wars_villains_graph(return_labels=False):
+    """
+    Bipartite graph connecting some Star Wars villains to the movies in which they appear.\n
+    7 nodes (4 villains, 3 movies), 8 edges
+
+    Parameters
+    ----------
+    return_labels: bool
+        whether to return the labels of the nodes as dictionaries.
+
+    Returns
+    -------
+    biadjacency: sparse.csr_matrix
+        Biadjacency matrix of the graph.
     """
+    row = np.array([0, 0, 1, 2, 2, 2, 3, 3])
+    col = np.array([0, 2, 0, 0, 1, 2, 1, 2])
+    biadjacency = sparse.csr_matrix((np.ones(len(row), dtype=int), (row, col)))
 
-    @staticmethod
-    def karate_club_graph():
-        """
-        Zachary's Karate Club Graph
-        Data file from: http://vlado.fmf.uni-lj.si/pub/networks/data/Ucinet/UciData.htm
-        34 nodes, 78 edges
-
-        Returns
-        -------
-        The adjacency matrix of Zachary's Karate Club Graph in SciPy CSR format
-        """
-        row = np.array(
-            [1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 17, 19, 21, 31, 0, 2, 3, 7, 13, 17, 19, 21, 30, 0, 1, 3, 7, 8, 9,
-             13, 27, 28, 32, 0, 1, 2, 7, 12, 13, 0, 6, 10, 0, 6, 10, 16, 0, 4, 5, 16, 0, 1, 2, 3, 0, 2, 30, 32, 33, 2,
-             33, 0, 4, 5, 0, 0, 3, 0, 1, 2, 3, 33, 32, 33, 32, 33, 5, 6, 0, 1, 32, 33, 0, 1, 33, 32, 33, 0, 1, 32, 33,
-             25, 27, 29, 32, 33, 25, 27, 31, 23, 24, 31, 29, 33, 2, 23, 24, 33, 2, 31, 33, 23, 26, 32, 33, 1, 8, 32,
-             33, 0, 24, 25, 28, 32, 33, 2, 8, 14, 15, 18, 20, 22, 23, 29, 30, 31, 33, 8, 9, 13, 14, 15, 18, 19, 20,
-             22, 23, 26, 27, 28, 29, 30, 31, 32])
-        col = np.array(
-            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-             3, 3, 3, 3, 3, 3, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 8, 9, 9, 10, 10, 10, 11, 12,
-             12, 13, 13, 13, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 19, 20, 20, 21, 21, 22, 22, 23,
-             23, 23, 23, 23, 24, 24, 24, 25, 25, 25, 26, 26, 27, 27, 27, 27, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30,
-             30, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33,
-             33, 33, 33, 33, 33, 33, 33, 33, 33, 33])
-        karate_club = sparse.csr_matrix((np.ones(len(row), dtype=int), (row, col)))
-        return karate_club
+    if return_labels:
+        row_labels = {0: 'Jabba', 1: 'Greedo', 2: 'Vador', 3: 'Boba'}
+        col_labels = {0: 'A_New_Hope', 1: 'The_Empire_Strikes_Back', 2: 'Return_Of_The_Jedi'}
+        return biadjacency, row_labels, col_labels
+    else:
+        return biadjacency
diff --git a/sknetwork/toy_graphs/test/test_graph_imports.py b/sknetwork/toy_graphs/test/test_graph_imports.py
index 311d0c95b..8f8b0125a 100644
--- a/sknetwork/toy_graphs/test/test_graph_imports.py
+++ b/sknetwork/toy_graphs/test/test_graph_imports.py
@@ -1,14 +1,9 @@
 # -*- coding: utf-8 -*-
 # test for graph_data.py
-#
-# Copyright 2018 Scikit-network Developers.
-# Copyright 2018 Quentin Lutz <qlutz@enst.fr>
-#
-# This file is part of Scikit-network.
+# authors: Quentin Lutz <qlutz@enst.fr>, Nathan de Lara <ndelara@enst.fr>
 
 import unittest
-from sknetwork.toy_graphs.graph_data import GraphConstants
-
+from sknetwork.toy_graphs.graph_data import karate_club_graph, star_wars_villains_graph
 
 class TestGraphImport(unittest.TestCase):
 
@@ -16,5 +11,8 @@ def setUp(self):
         pass
 
     def test_available(self):
-        graph = GraphConstants.karate_club_graph()
+        graph = karate_club_graph()
         self.assertEqual(graph.shape[0], 34)
+
+        graph = star_wars_villains_graph()
+        self.assertEqual(graph.shape, (4, 3))