From dc424d26525ff6f93b3aaacd2f00ea685cfb19df Mon Sep 17 00:00:00 2001
From: andreasmardt <andreas.mardt@fu-berlin.de>
Date: Wed, 24 Nov 2021 19:01:12 +0100
Subject: [PATCH 1/6] updated hyperparameter syt

---
 SynaptotagminC2A.ipynb | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/SynaptotagminC2A.ipynb b/SynaptotagminC2A.ipynb
index 0dccd13..5f6430f 100644
--- a/SynaptotagminC2A.ipynb
+++ b/SynaptotagminC2A.ipynb
@@ -33,9 +33,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "stride = 2\n",
+    "stride = 1\n",
     "\n",
-    "tau = 10//stride \n",
+    "tau = 100//stride \n",
     "\n",
     "output_sizes = [8,8]\n",
     "number_subsystems = len(output_sizes)\n",
@@ -46,7 +46,7 @@
     "batch_size = 20000\n",
     "# Which trajectory points percentage is used as validation and testing, the rest is for training\n",
     "valid_ratio = 0.3\n",
-    "test_ratio = 0.2\n",
+    "test_ratio = 0.0001\n",
     "# How many hidden layers the network chi has\n",
     "network_depth = 3\n",
     "\n",
@@ -387,7 +387,7 @@
     "N_trajs = len(dataset.trajectories)\n",
     "indexes_traj = np.arange(N_trajs)\n",
     "n_val = int(N_trajs * percentage)\n",
-    "msmlags=np.array([1,2,4,6,10,15,20,25])\n",
+    "msmlags=np.array([1,2,4,6,10,15,20,25])*10\n",
     "for run in range(runs):\n",
     "    for tau_i in msmlags:\n",
     "        np.random.shuffle(indexes_traj)\n",
@@ -418,7 +418,7 @@
    "outputs": [],
    "source": [
     "axes, fig = plot_protein_its(its_reorder, msmlags, ylog=True, multiple_runs=True, percent=0.9)\n",
-    "x_ticks = np.array([1,5,10,20,40])\n",
+    "x_ticks = np.array([1,5,10,20,40])*10\n",
     "x_ticks_labels = x_ticks*stride # for estimating the right units!\n",
     "y_ticks = np.array([1000,10000, 100000])/stride\n",
     "y_ticks_labels = y_ticks*stride/1000\n",
@@ -436,13 +436,22 @@
     "    ax.set_yticklabels(y_ticks_labels, fontsize=14)\n",
     "    ax.tick_params(direction='out', length=6, width=2, colors='k',\n",
     "                   grid_color='k', grid_alpha=0.5)\n",
-    "    ax.set_xlim(1,25)\n",
+    "    ax.set_xlim(10,250)\n",
     "    ax.set_ylim(0.01*1000, 200*1000)\n",
     "    # fig.savefig('./Syt_its.pdf', bbox_inches='tight')\n",
     "\n",
     "plt.show()"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ivampnet.save_params('./Syt_params')"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,

From 1018be6b0129367193c28b06f5cbaf555feac117 Mon Sep 17 00:00:00 2001
From: Tim Hempel <thempel@zedat.fu-berlin.de>
Date: Fri, 1 Apr 2022 16:32:06 +0200
Subject: [PATCH 2/6] add biorxiv link

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 19ffd7d..589c4b1 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
 
 Codebase for the iVAMPnets estimator and model which includes the classes for constructing the masks for toymodels and real protein applications.
 The implemented methods allow to decompose a possible high dimensional system in its weakly coupled or independent subsystems. Thereby, the downstream estimation of the kinetic models is much more data efficient than estimating a global kinetic model which might not be feasible. The whole pipeline is an end-to-end deep learning framework which allows to define your own network architectures for the kinetics estimation of each subsystem. 
-The data for the synaptotagmin C2A system is available upon request. The code is designed to reproduce the results of our paper "A deep learning framework for the decomposition of macromolecules into independent VAMPnets" (Link will be added) and is based on the deeptime package (see https://deeptime-ml.github.io/latest/index.html). 
+The data for the synaptotagmin C2A system is available upon request. The code is designed to reproduce the results of our paper "Deep learning to decompose macromolecules into independent Markovian domains" (https://www.biorxiv.org/content/10.1101/2022.03.30.486366v1) and is based on the deeptime package (see https://deeptime-ml.github.io/latest/index.html). 
 
 The code includes:
 1. (ivampnets.py) The definition of the ivampnets estimator class, which allows to fit a given model to simulation data. The definition of the ivampnets model class - the resulting model - which can then be used to estimate transition matrices, implied timescales, eigenfunctions, etc.

From 6291587db6e3b2813f5489e0db8959630363645e Mon Sep 17 00:00:00 2001
From: Tim Hempel <thempel@zedat.fu-berlin.de>
Date: Thu, 19 May 2022 13:06:22 +0200
Subject: [PATCH 3/6] add installation instructions

---
 README.md | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/README.md b/README.md
index 589c4b1..988cb6c 100644
--- a/README.md
+++ b/README.md
@@ -29,3 +29,32 @@ tensorboard=2.6.0
 h5py=1.10.4
 ```
 
+## Installation instructions
+
+The software dependencies can be installed with anaconda / miniconda. If you do not have miniconda or anaconda, please follow the instructions here: https://conda.io/miniconda.html
+
+The following command can be used to create a new conda environment and install all dependencies for the ivampnets scripts. 
+```bash
+conda create -n ivampnets pytorch=1.8.0 deeptime=0.2.9 numpy=1.19.5 matplotlib=3.1.3 jupyter h5py -c conda-forge
+```
+The new conda environment can be activated with
+```bash
+conda activate ivampnets
+```
+
+
+In case you are already a conda and jupyter notebook user with various environments, you can install your environment Python kernel via
+```bash
+python -m ipykernel install --user --name ivampnets
+```
+This repository including the python scripts and jupyter notebooks can be downloaded with 
+```bash
+git clone git@github.com:markovmodel/ivampnets.git
+```
+
+The following command will start the jupyter notebook server:
+```bash
+jupyter notebook
+```
+
+Your browser should pop up pointing to a list of notebooks once you navigate into the repository directory. If it's the wrong browser, add for example `--browser=firefox` or copy and paste the URL into the browser of your choice.

From d31f6b9809e49034be0378f4819e0a4c225c5b9d Mon Sep 17 00:00:00 2001
From: Tim Hempel <thempel@zedat.fu-berlin.de>
Date: Thu, 19 May 2022 13:10:07 +0200
Subject: [PATCH 4/6] Update README.md

---
 README.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 988cb6c..eb3341a 100644
--- a/README.md
+++ b/README.md
@@ -12,7 +12,7 @@ The code includes:
 5. (10Cube.ipynb) Notebook to reproduce the results for the 10-Cube example.
 6. (SynaptotagminC2A.ipynb) Notebook to reproduce the results for a protein example. The data of the synaptotagmin C2A domain is available upon request.
 
-The code was executed using the following package versions:
+The code was executed using the following package versions on a linux computer (debian bullseye):
 
 ```
 python=3.6 or higher
@@ -58,3 +58,5 @@ jupyter notebook
 ```
 
 Your browser should pop up pointing to a list of notebooks once you navigate into the repository directory. If it's the wrong browser, add for example `--browser=firefox` or copy and paste the URL into the browser of your choice.
+
+The typical install time ranges from 5 minutes for conda-users to 20 minutes if conda has to be set up from scratch.

From 6a3421bdd123f62fb4fc60e433c2b0b65c078164 Mon Sep 17 00:00:00 2001
From: Tim Hempel <thempel@zedat.fu-berlin.de>
Date: Thu, 19 May 2022 13:13:48 +0200
Subject: [PATCH 5/6] Update README.md

---
 README.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index eb3341a..54fc626 100644
--- a/README.md
+++ b/README.md
@@ -8,9 +8,9 @@ The code includes:
 1. (ivampnets.py) The definition of the ivampnets estimator class, which allows to fit a given model to simulation data. The definition of the ivampnets model class - the resulting model - which can then be used to estimate transition matrices, implied timescales, eigenfunctions, etc.
 2. (masks.py) The definition of the mask modules, which can be used to give the modeler an intuition which part of the global system is assigned to which subsystem.
 3. (examples.py) Helper functions to generate the data for the toy systems and plot some results.
-4. (Toymodel_2Systems.ipynb) Notebook to reproduce the results for a simple truly independent 2D system. 
-5. (10Cube.ipynb) Notebook to reproduce the results for the 10-Cube example.
-6. (SynaptotagminC2A.ipynb) Notebook to reproduce the results for a protein example. The data of the synaptotagmin C2A domain is available upon request.
+4. (Toymodel_2Systems.ipynb) Notebook to reproduce the results for a simple truly independent 2D system. Typical runtime (cpu): 2 min
+5. (10Cube.ipynb) Notebook to reproduce the results for the 10-Cube example. Typical runtime (cpu): 5 min
+6. (SynaptotagminC2A.ipynb) Notebook to reproduce the results for a protein example. The data of the synaptotagmin C2A domain is available upon request. Typical runtime (cuda): 1.5 hours
 
 The code was executed using the following package versions on a linux computer (debian bullseye):
 

From 049feb2f5256d4270507b6a1a7f7dec79ad7f652 Mon Sep 17 00:00:00 2001
From: thempel <thempel@zedat.fu-berlin.de>
Date: Thu, 19 May 2022 13:40:22 +0200
Subject: [PATCH 6/6] add infos

---
 10Cube.ipynb            | 33 ++++++++++++++++++++++----
 SynaptotagminC2A.ipynb  | 51 +++++++++++++++++++++++++++++++++++++++--
 Toymodel_2Systems.ipynb | 39 ++++++++++++++++++++++++++++---
 3 files changed, 114 insertions(+), 9 deletions(-)

diff --git a/10Cube.ipynb b/10Cube.ipynb
index b3393d6..24d5175 100644
--- a/10Cube.ipynb
+++ b/10Cube.ipynb
@@ -111,7 +111,10 @@
    "outputs": [],
    "source": [
     "angles = np.pi / 4 * np.ones(number_subsystems//2)\n",
+    "# training data with 100000 steps\n",
     "hidden_state_traj, observable_traj = toymodel.generate_traj(100000, angles=angles, dim_noise=dim_noise)\n",
+    "\n",
+    "# validation data with 10000 steps\n",
     "hidden_state_traj_valid, observable_traj_valid = toymodel.generate_traj(10000, angles=angles, dim_noise=dim_noise)"
    ]
   },
@@ -266,7 +269,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "tensorboard_installed = True\n",
+    "tensorboard_installed = False\n",
     "if tensorboard_installed:\n",
     "    from torch.utils.tensorboard import SummaryWriter\n",
     "    writer = SummaryWriter('./runs/Cube10/')\n",
@@ -301,6 +304,16 @@
     "                     lam_trace=0, start_mask=0, end_trace=0, tb_writer=writer, clip=False).fetch_model()"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "95548c29",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# execution time (on cpu): ~ 4.5 min"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "3984d128",
@@ -341,6 +354,16 @@
     "plot_mask(mask, vmax=0.5, skip=2)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c0d74aeb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reproduces Fig. 4c (or a permutation with respect to ivampnet state assignments)"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "143348c9",
@@ -396,12 +419,14 @@
    "id": "90807a5c",
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "# reproduces Fig. 4d"
+   ]
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "Python 3",
    "language": "python",
    "name": "python3"
   },
@@ -415,7 +440,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.7"
+   "version": "3.8.8"
   }
  },
  "nbformat": 4,
diff --git a/SynaptotagminC2A.ipynb b/SynaptotagminC2A.ipynb
index 5f6430f..86d67f6 100644
--- a/SynaptotagminC2A.ipynb
+++ b/SynaptotagminC2A.ipynb
@@ -3,6 +3,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "42847660",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -22,6 +23,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "43117557",
    "metadata": {},
    "source": [
     "### Hyperparameters"
@@ -30,6 +32,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "6f87cbf8",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -78,6 +81,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "d10b4cf1",
    "metadata": {},
    "source": [
     "### Load data"
@@ -86,9 +90,12 @@
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "588c7ec9",
    "metadata": {},
    "outputs": [],
    "source": [
+    "# data set has a total length of 184 µs with a 1 ns resolution (total of 184000 frames)\n",
+    "\n",
     "data_trajs = []\n",
     "hdf5_names = []\n",
     "loaded_data_stride = 100\n",
@@ -106,6 +113,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "51a0e9fd",
    "metadata": {},
    "source": [
     "### Define dataset"
@@ -114,6 +122,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "f45ee7fb",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -125,6 +134,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "22c7b97e",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -136,6 +146,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "9d6acabf",
    "metadata": {},
    "source": [
     "### Define networks"
@@ -144,6 +155,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "634d4b2b",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -180,6 +192,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "ea28cbbb",
    "metadata": {},
    "source": [
     "### Create iVAMPnets estimator"
@@ -188,6 +201,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "05aabb2d",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -197,6 +211,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "0325caf5",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -205,6 +220,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "967887b4",
    "metadata": {},
    "source": [
     "### Plot mask before training"
@@ -213,6 +229,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "dba6b102",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -222,6 +239,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "f03dd31d",
    "metadata": {},
    "source": [
     "### Create data loader"
@@ -230,6 +248,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "06a08456",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -241,6 +260,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "c8a820a0",
    "metadata": {},
    "source": [
     "### Create a tensorboard writer to observe performance during training"
@@ -249,10 +269,11 @@
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "7038f8fc",
    "metadata": {},
    "outputs": [],
    "source": [
-    "tensorboard_installed = True\n",
+    "tensorboard_installed = False\n",
     "if tensorboard_installed:\n",
     "    from torch.utils.tensorboard import SummaryWriter\n",
     "    writer = SummaryWriter(log_dir='./runs/Syt/')\n",
@@ -264,6 +285,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "229a10ff",
    "metadata": {},
    "source": [
     "### Fit the model on the training data"
@@ -272,6 +294,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "bfaff88e",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -290,6 +313,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "474445e3",
    "metadata": {},
    "source": [
     "### Plot training and validation scores"
@@ -298,6 +322,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "8a16599e",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -310,6 +335,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "2e65c886",
    "metadata": {},
    "source": [
     "### Plot the mask after training"
@@ -318,6 +344,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "66a61989",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -327,6 +354,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "8437a367",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -336,6 +364,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "f01dfcd4",
    "metadata": {},
    "source": [
     "### Finally train without noise"
@@ -344,6 +373,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "92c01b2a",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -357,6 +387,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "ddc0b3ab",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -369,6 +400,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "29e2875c",
    "metadata": {},
    "source": [
     "### Estimate implied timescales"
@@ -377,6 +409,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "036bd72b",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -400,6 +433,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "0e16ad8b",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -414,6 +448,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "57c8d5a3",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -446,6 +481,17 @@
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "9df5595e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reproduces Fig. 5b"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3e6ec4ad",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -455,6 +501,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "f51b21f6",
    "metadata": {},
    "outputs": [],
    "source": []
@@ -476,7 +523,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.10"
+   "version": "3.8.8"
   }
  },
  "nbformat": 4,
diff --git a/Toymodel_2Systems.ipynb b/Toymodel_2Systems.ipynb
index 30fbdce..3e37187 100644
--- a/Toymodel_2Systems.ipynb
+++ b/Toymodel_2Systems.ipynb
@@ -110,7 +110,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "# training data with 100000 steps\n",
     "hidden_state_traj, observable_traj = toymodel.generate_traj(100000)\n",
+    "\n",
+    "# validation data with 10000 steps\n",
     "hidden_state_traj_valid, observable_traj_valid = toymodel.generate_traj(10000)"
    ]
   },
@@ -293,7 +296,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "tensorboard_installed = True\n",
+    "tensorboard_installed = False\n",
     "if tensorboard_installed:\n",
     "    from torch.utils.tensorboard import SummaryWriter\n",
     "    writer = SummaryWriter('./runs/Toy2/')\n",
@@ -323,6 +326,16 @@
     "    print('The model does not seem to be converged to an independent solution!')"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "edcbc766",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# execution time (cpu): ~ 30 sec"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "375d23fc",
@@ -363,6 +376,16 @@
     "plot_mask(mask)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3ed8483a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reproduces Fig. 3b (or permutation of it)"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "f0935fa5",
@@ -442,11 +465,21 @@
     "from examples import plot_eigfuncs\n",
     "plot_eigfuncs(model, val_data)"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3d8049c4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reproduces Fig. 3c (possibly with permutation of state assignments)"
+   ]
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "Python 3",
    "language": "python",
    "name": "python3"
   },
@@ -460,7 +493,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.7"
+   "version": "3.8.8"
   }
  },
  "nbformat": 4,