From 868327c2bbe58fc502fb563b37c06e20827eff81 Mon Sep 17 00:00:00 2001 From: AlexanderKroll <74175710+AlexanderKroll@users.noreply.github.com> Date: Thu, 20 Apr 2023 16:18:47 +0200 Subject: [PATCH] Updating after manuscript revision --- ...rison of kcat predictions-checkpoint.ipynb | 585 ---- .../01 - Comparison of kcat predictions.ipynb | 2 +- ... and reaction information-checkpoint.ipynb | 818 +++++- ... and plotting the results-checkpoint.ipynb | 2018 ------------- ...ting additional ML models-checkpoint.ipynb | 690 +++++ ... and plotting the results-checkpoint.ipynb | 2603 +++++++++++++++++ .../05 - Comparison to ENKIE-checkpoint.ipynb | 720 +++++ ...with enzyme and reaction information.ipynb | 816 +++++- ... - Testing additional input features.ipynb | 84 +- ...- Analyzing and plotting the results.ipynb | 2589 ---------------- .../03 - Testing additional ML models.ipynb | 690 +++++ ...- Analyzing and plotting the results.ipynb | 2603 +++++++++++++++++ .../01 - Data preprocessing-checkpoint.ipynb | 67 +- ...nd enzyme representations-checkpoint.ipynb | 118 +- .../01 - Data preprocessing.ipynb | 52 +- ...gerprints and enzyme representations.ipynb | 114 +- 16 files changed, 8971 insertions(+), 5598 deletions(-) delete mode 100644 code/DLKcat_comparison/.ipynb_checkpoints/01 - Comparison of kcat predictions-checkpoint.ipynb delete mode 100644 code/model_fitting/.ipynb_checkpoints/03 - Analyzing and plotting the results-checkpoint.ipynb create mode 100644 code/model_fitting/.ipynb_checkpoints/03 - Testing additional ML models-checkpoint.ipynb create mode 100644 code/model_fitting/.ipynb_checkpoints/04 - Analyzing and plotting the results-checkpoint.ipynb create mode 100644 code/model_fitting/.ipynb_checkpoints/05 - Comparison to ENKIE-checkpoint.ipynb delete mode 100644 code/model_fitting/03 - Analyzing and plotting the results.ipynb create mode 100644 code/model_fitting/03 - Testing additional ML models.ipynb create mode 100644 code/model_fitting/04 - Analyzing and plotting the results.ipynb diff --git a/code/DLKcat_comparison/.ipynb_checkpoints/01 - Comparison of kcat predictions-checkpoint.ipynb b/code/DLKcat_comparison/.ipynb_checkpoints/01 - Comparison of kcat predictions-checkpoint.ipynb deleted file mode 100644 index 5461f97..0000000 --- a/code/DLKcat_comparison/.ipynb_checkpoints/01 - Comparison of kcat predictions-checkpoint.ipynb +++ /dev/null @@ -1,585 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### We used the python code from the GitHub repository \"https://github.com/SysBioChalmers/DLKcat\" to reproduce the DLKcat model and to make predictions for their test set:" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "import pandas as pd\n", - "import os\n", - "from os.path import join\n", - "from sklearn.metrics import mean_squared_error, r2_score\n", - "from scipy import stats\n", - "\n", - "import matplotlib.pyplot as plt\n", - "from matplotlib.pyplot import figure\n", - "import matplotlib as mpl\n", - "import json\n", - "\n", - "from Bio import pairwise2\n", - "from Bio.Emboss.Applications import NeedleCommandline\n", - "\n", - "import pickle\n", - "import torch" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "def load_tensor(file_name, dtype):\n", - " return [dtype(d).to(device) for d in np.load(file_name + '.npy', allow_pickle=True)]\n", - "\n", - "\n", - "def load_pickle(file_name):\n", - " with open(file_name, 'rb') as f:\n", - " return pickle.load(f)\n", - "\n", - "def shuffle_dataset(dataset, seed):\n", - " np.random.seed(seed)\n", - " np.random.shuffle(dataset)\n", - " return dataset\n", - "\n", - "def split_dataset(dataset, ratio):\n", - " n = int(ratio * len(dataset))\n", - " dataset_1, dataset_2 = dataset[:n], dataset[n:]\n", - " return dataset_1, dataset_2\n", - "\n", - "def calculate_identity(fasta_file_1, fasta_file_2):\n", - " needle_cline = NeedleCommandline(asequence = fasta_file_1, bsequence = fasta_file_2,\n", - " gapopen=10, gapextend=0.5, filter = True)\n", - "\n", - " out = needle_cline()[0]\n", - " out = out[out.find(\"Identity\"):]\n", - " out = out[:out.find(\"\\n\")]\n", - " percent = float(out[out.find(\"(\")+1 :out.find(\")\")-1].replace(\" \", \"\"))\n", - " return(percent)\n", - "\n", - "device = \"cpu\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 1. Loading results from DLKcat prediction" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - " | y_true | \n", - "y_pred | \n", - "Sequence | \n", - "
---|---|---|---|
0 | \n", - "-2.207608 | \n", - "-0.071899 | \n", - "MSAIDCIITAAGLSSRMGQWKMMLPWEQGTILDTSIKNALQFCSRI... | \n", - "
1 | \n", - "-3.657577 | \n", - "-2.707640 | \n", - "MKEFYLTVEQIGDSIFERYIDSNGRERTREVEYKPSLFAHCPESQA... | \n", - "
2 | \n", - "0.949390 | \n", - "0.831021 | \n", - "MSPSKMNATVGSTSEVEQKIRQELALSDEVTTIRRNAPAAVLYEDG... | \n", - "
3 | \n", - "1.672098 | \n", - "1.513026 | \n", - "MKNVGFIGWRGMVGSVLMQRMVEERDFDAIRPVFFSTSQLGQAAPS... | \n", - "
4 | \n", - "-1.790485 | \n", - "-2.830310 | \n", - "MATSTETISSLAQPFVHLENPINSPLVKETIRPRNDTTITPPPTQW... | \n", - "
... | \n", - "... | \n", - "... | \n", - "... | \n", - "
1679 | \n", - "-1.920819 | \n", - "-0.281649 | \n", - "MNYPAEPFRIKSVETVSMIPRDERLKKMQEAGYNTFLLNSKDIYID... | \n", - "
1680 | \n", - "2.740363 | \n", - "0.945056 | \n", - "MIEADYLVIGAGIAGASTGYWLSAHGRVVVLEREAQPGYHSTGRSA... | \n", - "
1681 | \n", - "1.198657 | \n", - "1.115256 | \n", - "MNLREKYGEWGLILGATEGVGKAFCEKIAAGGMNVVMVGRREEKLN... | \n", - "
1682 | \n", - "0.740363 | \n", - "0.917627 | \n", - "MALLSQAGGSYTVVPSGVCSKAGTKAVVSGGVRNLDVLRMKEAFGS... | \n", - "
1683 | \n", - "1.501059 | \n", - "1.663697 | \n", - "MDFYYLPGSAPCRAVQMTAAAVGVELNLKLTNLMAGEHMKPEFLKI... | \n", - "
1684 rows × 3 columns
\n", - "\n", - " | y_true | \n", - "y_pred | \n", - "Sequence | \n", - "max_ident | \n", - "
---|---|---|---|---|
0 | \n", - "-2.207608 | \n", - "-0.071899 | \n", - "MSAIDCIITAAGLSSRMGQWKMMLPWEQGTILDTSIKNALQFCSRI... | \n", - "22.8 | \n", - "
1 | \n", - "-3.657577 | \n", - "-2.707640 | \n", - "MKEFYLTVEQIGDSIFERYIDSNGRERTREVEYKPSLFAHCPESQA... | \n", - "100.0 | \n", - "
2 | \n", - "0.949390 | \n", - "0.831021 | \n", - "MSPSKMNATVGSTSEVEQKIRQELALSDEVTTIRRNAPAAVLYEDG... | \n", - "100.0 | \n", - "
3 | \n", - "1.672098 | \n", - "1.513026 | \n", - "MKNVGFIGWRGMVGSVLMQRMVEERDFDAIRPVFFSTSQLGQAAPS... | \n", - "100.0 | \n", - "
4 | \n", - "-1.790485 | \n", - "-2.830310 | \n", - "MATSTETISSLAQPFVHLENPINSPLVKETIRPRNDTTITPPPTQW... | \n", - "99.4 | \n", - "
... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "
1679 | \n", - "-1.920819 | \n", - "-0.281649 | \n", - "MNYPAEPFRIKSVETVSMIPRDERLKKMQEAGYNTFLLNSKDIYID... | \n", - "100.0 | \n", - "
1680 | \n", - "2.740363 | \n", - "0.945056 | \n", - "MIEADYLVIGAGIAGASTGYWLSAHGRVVVLEREAQPGYHSTGRSA... | \n", - "100.0 | \n", - "
1681 | \n", - "1.198657 | \n", - "1.115256 | \n", - "MNLREKYGEWGLILGATEGVGKAFCEKIAAGGMNVVMVGRREEKLN... | \n", - "100.0 | \n", - "
1682 | \n", - "0.740363 | \n", - "0.917627 | \n", - "MALLSQAGGSYTVVPSGVCSKAGTKAVVSGGVRNLDVLRMKEAFGS... | \n", - "99.8 | \n", - "
1683 | \n", - "1.501059 | \n", - "1.663697 | \n", - "MDFYYLPGSAPCRAVQMTAAAVGVELNLKLTNLMAGEHMKPEFLKI... | \n", - "100.0 | \n", - "
1684 rows × 4 columns
\n", - "\n", - " | Reaction ID | \n", - "Sequence ID | \n", - "kcat_values | \n", - "Uniprot IDs | \n", - "from_BRENDA | \n", - "from_Sabio | \n", - "from_Uniprot | \n", - "checked | \n", - "Sequence | \n", - "substrates | \n", - "... | \n", - "difference_fp | \n", - "ESM1b | \n", - "ESM1b_ts | \n", - "geomean_kcat | \n", - "frac_of_max_UID | \n", - "frac_of_max_RID | \n", - "frac_of_max_EC | \n", - "y_true | \n", - "y_pred | \n", - "max_ident | \n", - "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", - "Reaction_3207 | \n", - "Sequence_2150 | \n", - "[219] | \n", - "[B9W4V6] | \n", - "[1] | \n", - "[0] | \n", - "[0] | \n", - "[False] | \n", - "MKYFPLFPTLVFAARVVAFPAYASLAGLSQQELDAIIPTLEAREPG... | \n", - "{InChI=1S/H2O2/c1-2/h1-2H, InChI=1S/C7H5NO4/c9... | \n", - "... | \n", - "[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | \n", - "[0.020693962, 0.16804111, 0.0377352, 0.1768811... | \n", - "[0.83155197, 0.08632717, -0.42143562, 0.419359... | \n", - "2.340444 | \n", - "0.665653 | \n", - "1.000000 | \n", - "0.114660 | \n", - "2.340444 | \n", - "1.082393 | \n", - "20.8 | \n", - "
1 | \n", - "Reaction_3629 | \n", - "Sequence_3212 | \n", - "[0.92] | \n", - "[Q0PC20] | \n", - "[1] | \n", - "[0] | \n", - "[0] | \n", - "[False] | \n", - "MMKIAILGAMSEEITPLLETLKDYTKIEHANNTYYFAKYKNHELVL... | \n", - "{InChI=1S/C10H13N5O3/c1-4-6(16)7(17)10(18-4)15... | \n", - "... | \n", - "[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | \n", - "[0.07429815, 0.14984865, -0.08539086, 0.098546... | \n", - "[0.13206507, -0.10826899, -0.31126085, 0.95038... | \n", - "-0.036212 | \n", - "0.340741 | \n", - "1.000000 | \n", - "0.090196 | \n", - "-0.036212 | \n", - "0.370715 | \n", - "35.3 | \n", - "
2 | \n", - "Reaction_375 | \n", - "Sequence_26 | \n", - "[21.0] | \n", - "[Q0GYU4] | \n", - "[0] | \n", - "[1] | \n", - "[0] | \n", - "[False] | \n", - "MASKTYTLNTGAKIPAVGFGTFANEGAKGETYAAVTKALDVGYRHL... | \n", - "{InChI=1S/p+1, InChI=1S/C4H8O2/c1-3(5)4(2)6/h3... | \n", - "... | \n", - "[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | \n", - "[-0.0272103, 0.2500836, 0.08181338, 0.03990136... | \n", - "[0.3617253, 0.8765441, -1.0668296, 1.5401511, ... | \n", - "1.322219 | \n", - "0.175000 | \n", - "0.147887 | \n", - "1.000000 | \n", - "1.322219 | \n", - "-0.119795 | \n", - "40.1 | \n", - "
3 | \n", - "Reaction_4312 | \n", - "Sequence_3788 | \n", - "[4.4] | \n", - "[Q8ZNC4] | \n", - "[0] | \n", - "[0] | \n", - "[1] | \n", - "[False] | \n", - "MTDSIMQNYNQLREQVINGDRRFQHKDGHLCFEGVDLDALARQYPT... | \n", - "{InChI=1S/C6H14N2O2/c7-4-2-1-3-5(8)6(9)10/h5H,... | \n", - "... | \n", - "[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | \n", - "[0.079942256, 0.23130149, -0.012637342, 0.0787... | \n", - "[0.7798445, -0.7589981, -0.2779501, 0.2643281,... | \n", - "0.643453 | \n", - "1.000000 | \n", - "1.000000 | \n", - "1.000000 | \n", - "0.643453 | \n", - "1.030066 | \n", - "25.9 | \n", - "
4 | \n", - "Reaction_2115 | \n", - "Sequence_712 | \n", - "[4.5] | \n", - "[P53602] | \n", - "[1] | \n", - "[0] | \n", - "[0] | \n", - "[False] | \n", - "MASEKPLAAVTCTAPVNIAVIKYWGKRDEELVLPINSSLSVTLHQD... | \n", - "{InChI=1S/C6H14O10P2/c1-6(9,4-5(7)8)2-3-15-18(... | \n", - "... | \n", - "[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | \n", - "[0.086191244, 0.21010432, 0.1960825, -0.041225... | \n", - "[-0.6100984, -0.054886594, -0.09893316, 0.2822... | \n", - "0.653213 | \n", - "1.000000 | \n", - "0.849057 | \n", - "0.112500 | \n", - "0.653213 | \n", - "0.755961 | \n", - "49.3 | \n", - "
5 rows × 27 columns
\n", - "\n", - " | y_true | \n", - "y_pred | \n", - "Sequence | \n", - "max_ident | \n", - "
---|---|---|---|---|
0 | \n", - "-2.207608 | \n", - "-0.071899 | \n", - "MSAIDCIITAAGLSSRMGQWKMMLPWEQGTILDTSIKNALQFCSRI... | \n", - "22.8 | \n", - "
1 | \n", - "-3.657577 | \n", - "-2.707640 | \n", - "MKEFYLTVEQIGDSIFERYIDSNGRERTREVEYKPSLFAHCPESQA... | \n", - "100.0 | \n", - "
2 | \n", - "0.949390 | \n", - "0.831021 | \n", - "MSPSKMNATVGSTSEVEQKIRQELALSDEVTTIRRNAPAAVLYEDG... | \n", - "100.0 | \n", - "
3 | \n", - "1.672098 | \n", - "1.513026 | \n", - "MKNVGFIGWRGMVGSVLMQRMVEERDFDAIRPVFFSTSQLGQAAPS... | \n", - "100.0 | \n", - "
4 | \n", - "-1.790485 | \n", - "-2.830310 | \n", - "MATSTETISSLAQPFVHLENPINSPLVKETIRPRNDTTITPPPTQW... | \n", - "99.4 | \n", - "
\n", - " | Reaction ID | \n", - "Sequence ID | \n", - "kcat_values | \n", - "Uniprot IDs | \n", - "from_BRENDA | \n", - "from_Sabio | \n", - "from_Uniprot | \n", - "checked | \n", - "Sequence | \n", - "substrates | \n", - "products | \n", - "MW_frac | \n", - "max_kcat_for_UID | \n", - "max_kcat_for_RID | \n", - "ECs | \n", - "max_kcat_for_EC | \n", - "structural_fp | \n", - "difference_fp | \n", - "ESM1b | \n", - "ESM1b_ts | \n", - "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", - "Reaction_0 | \n", - "Sequence_309 | \n", - "[2.8, 0.05, 0.11, 205.0, 2.3, 134.0, 360.0] | \n", - "[P20932, P20932, P20932, P20932, P20932, P2093... | \n", - "[0, 0, 0, 0, 0, 0, 0] | \n", - "[1, 1, 1, 1, 1, 1, 1] | \n", - "[0, 0, 0, 0, 0, 0, 0] | \n", - "[False, False, False, False, False, False, False] | \n", - "MSQNLFNVEDYRKLRQKRLPKMVYDYLEGGAEDEYGVKHNRDVFQQ... | \n", - "{InChI=1S/C8H8O3/c9-7(8(10)11)6-4-2-1-3-5-6/h1... | \n", - "{InChI=1S/C17H23N4O9P/c1-7-3-9-10(4-8(7)2)21(1... | \n", - "1.0 | \n", - "360.0 | \n", - "360.0 | \n", - "[1.1.99.31] | \n", - "550.0 | \n", - "1100110100001000000000110111010001000001111010... | \n", - "[0.0, 0.0, 0.0, 0.0, -10.0, 0.0, 0.0, 0.0, 0.0... | \n", - "[0.13688426, 0.20014146, -0.20241867, 0.083636... | \n", - "[0.08041849, -0.05214988, -0.7103536, 0.786840... | \n", - "
1 | \n", - "Reaction_1 | \n", - "Sequence_309 | \n", - "[1.2, 3.4, 0.61, 0.07] | \n", - "[P20932, P20932, P20932, P20932] | \n", - "[0, 0, 0, 0] | \n", - "[1, 1, 1, 1] | \n", - "[0, 0, 0, 0] | \n", - "[False, False, False, False] | \n", - "MSQNLFNVEDYRKLRQKRLPKMVYDYLEGGAEDEYGVKHNRDVFQQ... | \n", - "{InChI=1S/C17H23N4O9P/c1-7-3-9-10(4-8(7)2)21(1... | \n", - "{InChI=1S/H2O2/c1-2/h1-2H, InChI=1S/C17H21N4O9... | \n", - "1.0 | \n", - "360.0 | \n", - "3.4 | \n", - "[1.1.99.31] | \n", - "550.0 | \n", - "1100010100000001010000110110000001000001111000... | \n", - "[0.0, 0.0, 0.0, 0.0, 10.0, 0.0, 0.0, 0.0, 0.0,... | \n", - "[0.13688426, 0.20014146, -0.20241867, 0.083636... | \n", - "[0.08041849, -0.05214988, -0.7103536, 0.786840... | \n", - "
\n", - " | Reaction | \n", - "Sequence | \n", - "kcats | \n", - "
---|---|---|---|
0 | \n", - "Reaction_0 | \n", - "Sequence_309 | \n", - "[2.8, 0.05, 0.11, 205.0, 2.3, 134.0, 360.0] | \n", - "
1 | \n", - "Reaction_1 | \n", - "Sequence_309 | \n", - "[1.2, 3.4, 0.61, 0.07] | \n", - "
2 | \n", - "Reaction_2 | \n", - "Sequence_3142 | \n", - "[6.18, 14.5, 11.58, 13.12, 11.9, 13.98, 14.08,... | \n", - "
3 | \n", - "Reaction_4 | \n", - "Sequence_3263 | \n", - "[57.1, 19.6, 5.96, 13.6, 26.4, 14.0, 41.1, 11.... | \n", - "
4 | \n", - "Reaction_5 | \n", - "Sequence_2101 | \n", - "[2.98, 0.87] | \n", - "
\n", - " | Uniprot ID | \n", - "kcat | \n", - "Substrates | \n", - "Products | \n", - "PMID | \n", - "substrate_IDs | \n", - "product_IDs | \n", - "checked | \n", - "#UIDs | \n", - "complete | \n", - "from BRENDA | \n", - "from Uniprot | \n", - "from Sabio | \n", - "EC | \n", - "ORGANISM | \n", - "BRENDA reaction ID | \n", - "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", - "P20932 | \n", - "2.8 | \n", - "(S)-Mandelate;Riboflavin-5-phosphate | \n", - "Reduced FMN;alpha-Oxo-benzeneacetic acid | \n", - "15311930 | \n", - "[C01984, C00061] | \n", - "[C02137, C01847] | \n", - "False | \n", - "1 | \n", - "True | \n", - "0 | \n", - "0 | \n", - "1 | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "
1 | \n", - "P20932 | \n", - "0.05 | \n", - "(S)-Mandelate;Riboflavin-5-phosphate | \n", - "Reduced FMN;alpha-Oxo-benzeneacetic acid | \n", - "15311930 | \n", - "[C01984, C00061] | \n", - "[C02137, C01847] | \n", - "False | \n", - "1 | \n", - "True | \n", - "0 | \n", - "0 | \n", - "1 | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "
\n", + " | Reaction ID | \n", + "Sequence ID | \n", + "kcat_values | \n", + "Uniprot IDs | \n", + "from_BRENDA | \n", + "from_Sabio | \n", + "from_Uniprot | \n", + "checked | \n", + "Sequence | \n", + "substrates | \n", + "... | \n", + "ESM1b | \n", + "ESM1b_ts | \n", + "geomean_kcat | \n", + "frac_of_max_UID | \n", + "frac_of_max_RID | \n", + "frac_of_max_EC | \n", + "DRFP | \n", + "y_true | \n", + "y_pred | \n", + "max_ident | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "Reaction_3207 | \n", + "Sequence_2150 | \n", + "[219] | \n", + "[B9W4V6] | \n", + "[1] | \n", + "[0] | \n", + "[0] | \n", + "[False] | \n", + "MKYFPLFPTLVFAARVVAFPAYASLAGLSQQELDAIIPTLEAREPG... | \n", + "{InChI=1S/H2O2/c1-2/h1-2H, InChI=1S/C7H5NO4/c9... | \n", + "... | \n", + "[0.020693962, 0.16804111, 0.0377352, 0.1768811... | \n", + "[0.83155197, 0.08632717, -0.42143562, 0.419359... | \n", + "2.340444 | \n", + "0.665653 | \n", + "1.000000 | \n", + "0.114660 | \n", + "[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... | \n", + "2.340444 | \n", + "0.781544 | \n", + "20.8 | \n", + "
1 | \n", + "Reaction_3629 | \n", + "Sequence_3212 | \n", + "[0.92] | \n", + "[Q0PC20] | \n", + "[1] | \n", + "[0] | \n", + "[0] | \n", + "[False] | \n", + "MMKIAILGAMSEEITPLLETLKDYTKIEHANNTYYFAKYKNHELVL... | \n", + "{InChI=1S/C10H13N5O3/c1-4-6(16)7(17)10(18-4)15... | \n", + "... | \n", + "[0.07429815, 0.14984865, -0.08539086, 0.098546... | \n", + "[0.13206507, -0.10826899, -0.31126085, 0.95038... | \n", + "-0.036212 | \n", + "0.340741 | \n", + "1.000000 | \n", + "0.090196 | \n", + "[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... | \n", + "-0.036212 | \n", + "0.537214 | \n", + "35.3 | \n", + "
2 | \n", + "Reaction_375 | \n", + "Sequence_26 | \n", + "[21.0] | \n", + "[Q0GYU4] | \n", + "[0] | \n", + "[1] | \n", + "[0] | \n", + "[False] | \n", + "MASKTYTLNTGAKIPAVGFGTFANEGAKGETYAAVTKALDVGYRHL... | \n", + "{InChI=1S/C21H30N7O17P3/c22-17-12-19(25-7-24-1... | \n", + "... | \n", + "[-0.0272103, 0.2500836, 0.08181338, 0.03990136... | \n", + "[0.3617253, 0.8765441, -1.0668296, 1.5401511, ... | \n", + "1.322219 | \n", + "0.175000 | \n", + "0.147887 | \n", + "1.000000 | \n", + "[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, ... | \n", + "1.322219 | \n", + "0.927225 | \n", + "40.1 | \n", + "
3 | \n", + "Reaction_4312 | \n", + "Sequence_3788 | \n", + "[4.4] | \n", + "[Q8ZNC4] | \n", + "[0] | \n", + "[0] | \n", + "[1] | \n", + "[False] | \n", + "MTDSIMQNYNQLREQVINGDRRFQHKDGHLCFEGVDLDALARQYPT... | \n", + "{InChI=1S/C6H14N2O2/c7-4-2-1-3-5(8)6(9)10/h5H,... | \n", + "... | \n", + "[0.079942256, 0.23130149, -0.012637342, 0.0787... | \n", + "[0.7798445, -0.7589981, -0.2779501, 0.2643281,... | \n", + "0.643453 | \n", + "1.000000 | \n", + "1.000000 | \n", + "1.000000 | \n", + "[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... | \n", + "0.643453 | \n", + "0.959929 | \n", + "25.9 | \n", + "
4 | \n", + "Reaction_2115 | \n", + "Sequence_712 | \n", + "[4.5] | \n", + "[P53602] | \n", + "[1] | \n", + "[0] | \n", + "[0] | \n", + "[False] | \n", + "MASEKPLAAVTCTAPVNIAVIKYWGKRDEELVLPINSSLSVTLHQD... | \n", + "{InChI=1S/C10H16N5O13P3/c11-8-5-9(13-2-12-8)15... | \n", + "... | \n", + "[0.086191244, 0.21010432, 0.1960825, -0.041225... | \n", + "[-0.6100984, -0.054886594, -0.09893316, 0.2822... | \n", + "0.653213 | \n", + "1.000000 | \n", + "0.849057 | \n", + "0.112500 | \n", + "[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... | \n", + "0.653213 | \n", + "0.933098 | \n", + "49.3 | \n", + "
5 rows × 28 columns
\n", + "\n", + " | Reaction ID | \n", + "Sequence ID | \n", + "kcat_values | \n", + "Uniprot IDs | \n", + "from_BRENDA | \n", + "from_Sabio | \n", + "from_Uniprot | \n", + "checked | \n", + "Sequence | \n", + "substrates | \n", + "... | \n", + "ESM1b_ts | \n", + "geomean_kcat | \n", + "frac_of_max_UID | \n", + "frac_of_max_RID | \n", + "frac_of_max_EC | \n", + "DRFP | \n", + "y_true | \n", + "y_pred | \n", + "max_ident | \n", + "sim_pred | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "Reaction_3207 | \n", + "Sequence_2150 | \n", + "[219] | \n", + "[B9W4V6] | \n", + "[1] | \n", + "[0] | \n", + "[0] | \n", + "[False] | \n", + "MKYFPLFPTLVFAARVVAFPAYASLAGLSQQELDAIIPTLEAREPG... | \n", + "{InChI=1S/H2O2/c1-2/h1-2H, InChI=1S/C7H5NO4/c9... | \n", + "... | \n", + "[0.83155197, 0.08632717, -0.42143562, 0.419359... | \n", + "2.340444 | \n", + "0.665653 | \n", + "1.0 | \n", + "0.11466 | \n", + "[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... | \n", + "2.340444 | \n", + "0.781544 | \n", + "20.8 | \n", + "2.024332 | \n", + "
1 rows × 29 columns
\n", + "\n", + " | y_true | \n", + "y_pred | \n", + "Sequence | \n", + "max_ident | \n", + "sim_pred | \n", + "sim_pred_1 | \n", + "sim_pred_3 | \n", + "
---|---|---|---|---|---|---|---|
0 | \n", + "-2.207608 | \n", + "-0.071899 | \n", + "MSAIDCIITAAGLSSRMGQWKMMLPWEQGTILDTSIKNALQFCSRI... | \n", + "22.8 | \n", + "-1.486273 | \n", + "-2.275724 | \n", + "-1.486273 | \n", + "
1 | \n", + "-3.657577 | \n", + "-2.707640 | \n", + "MKEFYLTVEQIGDSIFERYIDSNGRERTREVEYKPSLFAHCPESQA... | \n", + "100.0 | \n", + "-2.369079 | \n", + "-2.221849 | \n", + "-2.369079 | \n", + "
2 | \n", + "0.949390 | \n", + "0.831021 | \n", + "MSPSKMNATVGSTSEVEQKIRQELALSDEVTTIRRNAPAAVLYEDG... | \n", + "100.0 | \n", + "0.946618 | \n", + "1.230449 | \n", + "0.455934 | \n", + "
3 | \n", + "1.672098 | \n", + "1.513026 | \n", + "MKNVGFIGWRGMVGSVLMQRMVEERDFDAIRPVFFSTSQLGQAAPS... | \n", + "100.0 | \n", + "1.045579 | \n", + "1.672098 | \n", + "1.045579 | \n", + "
4 | \n", + "-1.790485 | \n", + "-2.830310 | \n", + "MATSTETISSLAQPFVHLENPINSPLVKETIRPRNDTTITPPPTQW... | \n", + "99.4 | \n", + "-1.733113 | \n", + "0.995635 | \n", + "-1.733113 | \n", + "
\n", + " | Reaction | \n", + "Sequence | \n", + "kcats | \n", + "
---|---|---|---|
0 | \n", + "Reaction_0 | \n", + "Sequence_309 | \n", + "[2.8, 0.05, 0.11, 205.0, 2.3, 134.0, 360.0] | \n", + "
1 | \n", + "Reaction_1 | \n", + "Sequence_309 | \n", + "[1.2, 3.4, 0.61, 0.07] | \n", + "
2 | \n", + "Reaction_2 | \n", + "Sequence_3142 | \n", + "[6.18, 14.5, 11.58, 13.12, 11.9, 13.98, 14.08,... | \n", + "
3 | \n", + "Reaction_4 | \n", + "Sequence_3263 | \n", + "[57.1, 19.6, 5.96, 13.6, 26.4, 14.0, 41.1, 11.... | \n", + "
4 | \n", + "Reaction_5 | \n", + "Sequence_2101 | \n", + "[2.98, 0.87] | \n", + "
\n", + " | Reaction ID | \n", + "Sequence ID | \n", + "kcat_values | \n", + "Uniprot IDs | \n", + "from_BRENDA | \n", + "from_Sabio | \n", + "from_Uniprot | \n", + "checked | \n", + "Sequence | \n", + "substrates | \n", + "... | \n", + "ESM1b | \n", + "ESM1b_ts | \n", + "geomean_kcat | \n", + "frac_of_max_UID | \n", + "frac_of_max_RID | \n", + "frac_of_max_EC | \n", + "DRFP | \n", + "y_true | \n", + "y_pred | \n", + "y_pred_ENKIE | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "Reaction_3207 | \n", + "Sequence_2150 | \n", + "[219] | \n", + "[B9W4V6] | \n", + "[1] | \n", + "[0] | \n", + "[0] | \n", + "[False] | \n", + "MKYFPLFPTLVFAARVVAFPAYASLAGLSQQELDAIIPTLEAREPG... | \n", + "{InChI=1S/H2O2/c1-2/h1-2H, InChI=1S/C7H5NO4/c9... | \n", + "... | \n", + "[0.020693962, 0.16804111, 0.0377352, 0.1768811... | \n", + "[0.83155197, 0.08632717, -0.42143562, 0.419359... | \n", + "2.340444 | \n", + "0.665653 | \n", + "1.000000 | \n", + "0.114660 | \n", + "[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... | \n", + "2.34044 | \n", + "0.78154 | \n", + "0.057496 | \n", + "
1 | \n", + "Reaction_3629 | \n", + "Sequence_3212 | \n", + "[0.92] | \n", + "[Q0PC20] | \n", + "[1] | \n", + "[0] | \n", + "[0] | \n", + "[False] | \n", + "MMKIAILGAMSEEITPLLETLKDYTKIEHANNTYYFAKYKNHELVL... | \n", + "{InChI=1S/C10H13N5O3/c1-4-6(16)7(17)10(18-4)15... | \n", + "... | \n", + "[0.07429815, 0.14984865, -0.08539086, 0.098546... | \n", + "[0.13206507, -0.10826899, -0.31126085, 0.95038... | \n", + "-0.036212 | \n", + "0.340741 | \n", + "1.000000 | \n", + "0.090196 | \n", + "[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... | \n", + "-0.03621 | \n", + "0.53721 | \n", + "0.740980 | \n", + "
2 | \n", + "Reaction_375 | \n", + "Sequence_26 | \n", + "[21.0] | \n", + "[Q0GYU4] | \n", + "[0] | \n", + "[1] | \n", + "[0] | \n", + "[False] | \n", + "MASKTYTLNTGAKIPAVGFGTFANEGAKGETYAAVTKALDVGYRHL... | \n", + "{InChI=1S/p+1, InChI=1S/C4H8O2/c1-3(5)4(2)6/h3... | \n", + "... | \n", + "[-0.0272103, 0.2500836, 0.08181338, 0.03990136... | \n", + "[0.3617253, 0.8765441, -1.0668296, 1.5401511, ... | \n", + "1.322219 | \n", + "0.175000 | \n", + "0.147887 | \n", + "1.000000 | \n", + "[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, ... | \n", + "1.32222 | \n", + "0.92722 | \n", + "1.153964 | \n", + "
3 | \n", + "Reaction_4312 | \n", + "Sequence_3788 | \n", + "[4.4] | \n", + "[Q8ZNC4] | \n", + "[0] | \n", + "[0] | \n", + "[1] | \n", + "[False] | \n", + "MTDSIMQNYNQLREQVINGDRRFQHKDGHLCFEGVDLDALARQYPT... | \n", + "{InChI=1S/p+1, InChI=1S/C6H14N2O2/c7-4-2-1-3-5... | \n", + "... | \n", + "[0.079942256, 0.23130149, -0.012637342, 0.0787... | \n", + "[0.7798445, -0.7589981, -0.2779501, 0.2643281,... | \n", + "0.643453 | \n", + "1.000000 | \n", + "1.000000 | \n", + "1.000000 | \n", + "[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... | \n", + "0.64345 | \n", + "0.95993 | \n", + "0.984732 | \n", + "
4 | \n", + "Reaction_2115 | \n", + "Sequence_712 | \n", + "[4.5] | \n", + "[P53602] | \n", + "[1] | \n", + "[0] | \n", + "[0] | \n", + "[False] | \n", + "MASEKPLAAVTCTAPVNIAVIKYWGKRDEELVLPINSSLSVTLHQD... | \n", + "{InChI=1S/C10H16N5O13P3/c11-8-5-9(13-2-12-8)15... | \n", + "... | \n", + "[0.086191244, 0.21010432, 0.1960825, -0.041225... | \n", + "[-0.6100984, -0.054886594, -0.09893316, 0.2822... | \n", + "0.653213 | \n", + "1.000000 | \n", + "0.849057 | \n", + "0.112500 | \n", + "[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... | \n", + "0.65321 | \n", + "0.93310 | \n", + "0.971932 | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
845 | \n", + "Reaction_3029 | \n", + "Sequence_1106 | \n", + "[1.14] | \n", + "[Q8PDQ6] | \n", + "[1] | \n", + "[0] | \n", + "[0] | \n", + "[False] | \n", + "MSLAQLEHALQHDLQRLAHGGEPWVRPRVHPAGHVYDVVIVGAGQS... | \n", + "{InChI=1S/p+1, InChI=1S/C5H4N4O3/c10-3-1-2(7-4... | \n", + "... | \n", + "[0.07993014, 0.11095398, -0.0057218825, -0.049... | \n", + "[0.7720898, 0.107216544, -0.4964384, 0.1257281... | \n", + "0.056905 | \n", + "1.000000 | \n", + "1.000000 | \n", + "0.027143 | \n", + "[0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, ... | \n", + "0.05690 | \n", + "0.90836 | \n", + "0.207524 | \n", + "
846 | \n", + "Reaction_3310 | \n", + "Sequence_455 | \n", + "[5.8, 5.9, 4.8] | \n", + "[C7P8V7, C7P8V7, C7P8V7] | \n", + "[1, 1, 1] | \n", + "[0, 0, 0] | \n", + "[0, 0, 0] | \n", + "[False, False, False] | \n", + "MILFFEYAIASGFEDEGILEEGKMMFNTLLNQFLEIDNVTSLIHKD... | \n", + "{InChI=1S/C10H16N5O13P3/c11-8-5-9(13-2-12-8)15... | \n", + "... | \n", + "[0.15469086, 0.08214222, 0.006613599, 0.003951... | \n", + "[-1.1673366, -0.3592899, 0.034033816, -0.01010... | \n", + "0.738507 | \n", + "1.000000 | \n", + "1.000000 | \n", + "1.000000 | \n", + "[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... | \n", + "0.73851 | \n", + "0.98534 | \n", + "0.587646 | \n", + "
847 | \n", + "Reaction_1253 | \n", + "Sequence_1211 | \n", + "[3.3333] | \n", + "[O33289] | \n", + "[1] | \n", + "[0] | \n", + "[0] | \n", + "[True] | \n", + "MTERPRDCRPVVRRARTSDVPAIKQLVDTYAGKILLEKNLVTLYEA... | \n", + "{InChI=1S/C5H10N2O3/c6-3(5(9)10)1-2-4(7)8/h3H,... | \n", + "... | \n", + "[0.095282555, 0.077073924, 0.1310218, -0.01710... | \n", + "[-0.6074202, 0.69103533, -0.38513482, 0.311095... | \n", + "0.522874 | \n", + "1.000000 | \n", + "1.000000 | \n", + "4.273462 | \n", + "[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... | \n", + "0.52287 | \n", + "0.82624 | \n", + "0.315951 | \n", + "
848 | \n", + "Reaction_1626 | \n", + "Sequence_783 | \n", + "[18.9] | \n", + "[P0AEP7] | \n", + "[1] | \n", + "[0] | \n", + "[0] | \n", + "[True] | \n", + "MAKMRAVDAAMYVLEKEGITTAFGVPGAAINPFYSAMRKHGGIRHI... | \n", + "{InChI=1S/C2H2O3/c3-1-2(4)5/h1H,(H,4,5)} | \n", + "... | \n", + "[0.07920394, 0.22367033, 0.120473295, 0.001293... | \n", + "[0.80772597, -0.8157556, -0.43358412, 0.318514... | \n", + "1.276462 | \n", + "1.000000 | \n", + "1.000000 | \n", + "0.959391 | \n", + "[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... | \n", + "1.27646 | \n", + "1.26334 | \n", + "0.946862 | \n", + "
849 | \n", + "Reaction_898 | \n", + "Sequence_3700 | \n", + "[800.0] | \n", + "[P00387] | \n", + "[1] | \n", + "[0] | \n", + "[0] | \n", + "[True] | \n", + "MGAQLSTLGHMVLFPVWFLYSLLMKLFQRSTPAITLESPDIKYPLR... | \n", + "{InChI=1S/6CN.Fe/c6*1-2;/q;;;;;;-3, InChI=1S/C... | \n", + "... | \n", + "[-0.055920795, 0.26620504, 0.008486553, -0.058... | \n", + "[-0.21385467, -0.02001476, -0.59286845, 1.5044... | \n", + "2.903090 | \n", + "1.000000 | \n", + "0.727273 | \n", + "0.644641 | \n", + "[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, ... | \n", + "2.90309 | \n", + "2.17199 | \n", + "2.896796 | \n", + "
825 rows × 28 columns
\n", + "\n", - " | Reaction ID | \n", - "Sequence ID | \n", - "kcat_values | \n", - "Uniprot IDs | \n", - "from_BRENDA | \n", - "from_Sabio | \n", - "from_Uniprot | \n", - "checked | \n", - "Sequence | \n", - "substrates | \n", - "... | \n", - "structural_fp | \n", - "difference_fp | \n", - "ESM1b | \n", - "geomean_kcat | \n", - "frac_of_max_UID | \n", - "frac_of_max_RID | \n", - "frac_of_max_EC | \n", - "y_true | \n", - "y_pred | \n", - "max_ident | \n", - "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", - "Reaction_3207 | \n", - "Sequence_2150 | \n", - "[219] | \n", - "[B9W4V6] | \n", - "[1] | \n", - "[0] | \n", - "[0] | \n", - "[False] | \n", - "MKYFPLFPTLVFAARVVAFPAYASLAGLSQQELDAIIPTLEAREPG... | \n", - "{InChI=1S/C7H5NO4/c9-8(10)5-1-2-6-7(3-5)12-4-1... | \n", - "... | \n", - "1100100000000000000000000000000001000001001000... | \n", - "[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | \n", - "[0.020693962, 0.16804111, 0.0377352, 0.1768811... | \n", - "2.340444 | \n", - "0.665653 | \n", - "1.000000 | \n", - "0.114660 | \n", - "2.340444 | \n", - "1.082393 | \n", - "20.8 | \n", - "
1 | \n", - "Reaction_3629 | \n", - "Sequence_3212 | \n", - "[0.92] | \n", - "[Q0PC20] | \n", - "[1] | \n", - "[0] | \n", - "[0] | \n", - "[False] | \n", - "MMKIAILGAMSEEITPLLETLKDYTKIEHANNTYYFAKYKNHELVL... | \n", - "{InChI=1S/C10H13N5O3/c1-4-6(16)7(17)10(18-4)15... | \n", - "... | \n", - "1100100100000000000000100010010001000001001100... | \n", - "[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | \n", - "[0.07429815, 0.14984865, -0.08539086, 0.098546... | \n", - "-0.036212 | \n", - "0.340741 | \n", - "1.000000 | \n", - "0.090196 | \n", - "-0.036212 | \n", - "0.370715 | \n", - "35.3 | \n", - "
2 | \n", - "Reaction_375 | \n", - "Sequence_26 | \n", - "[21.0] | \n", - "[Q0GYU4] | \n", - "[0] | \n", - "[1] | \n", - "[0] | \n", - "[False] | \n", - "MASKTYTLNTGAKIPAVGFGTFANEGAKGETYAAVTKALDVGYRHL... | \n", - "{InChI=1S/C21H30N7O17P3/c22-17-12-19(25-7-24-1... | \n", - "... | \n", - "1100111100000001001000110110010001001111111100... | \n", - "[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | \n", - "[-0.0272103, 0.2500836, 0.08181338, 0.03990136... | \n", - "1.322219 | \n", - "0.175000 | \n", - "0.147887 | \n", - "1.000000 | \n", - "1.322219 | \n", - "-0.119795 | \n", - "40.1 | \n", - "
3 | \n", - "Reaction_4312 | \n", - "Sequence_3788 | \n", - "[4.4] | \n", - "[Q8ZNC4] | \n", - "[0] | \n", - "[0] | \n", - "[1] | \n", - "[False] | \n", - "MTDSIMQNYNQLREQVINGDRRFQHKDGHLCFEGVDLDALARQYPT... | \n", - "{InChI=1S/C6H14N2O2/c7-4-2-1-3-5(8)6(9)10/h5H,... | \n", - "... | \n", - "0000000000000000000000000000000001000001001000... | \n", - "[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | \n", - "[0.079942256, 0.23130149, -0.012637342, 0.0787... | \n", - "0.643453 | \n", - "1.000000 | \n", - "1.000000 | \n", - "1.000000 | \n", - "0.643453 | \n", - "1.042994 | \n", - "25.9 | \n", - "
4 | \n", - "Reaction_2115 | \n", - "Sequence_712 | \n", - "[4.5] | \n", - "[P53602] | \n", - "[1] | \n", - "[0] | \n", - "[0] | \n", - "[False] | \n", - "MASEKPLAAVTCTAPVNIAVIKYWGKRDEELVLPINSSLSVTLHQD... | \n", - "{InChI=1S/C10H16N5O13P3/c11-8-5-9(13-2-12-8)15... | \n", - "... | \n", - "1100110100000000000000110110010001000001111100... | \n", - "[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | \n", - "[0.086191244, 0.21010432, 0.1960825, -0.041225... | \n", - "0.653213 | \n", - "1.000000 | \n", - "0.849057 | \n", - "0.112500 | \n", - "0.653213 | \n", - "0.755961 | \n", - "49.3 | \n", - "
5 rows × 26 columns
\n", - "\n", - " | Reaction ID | \n", - "Sequence ID | \n", - "kcat_values | \n", - "Uniprot IDs | \n", - "from_BRENDA | \n", - "from_Sabio | \n", - "from_Uniprot | \n", - "checked | \n", - "Sequence | \n", - "substrates | \n", - "... | \n", - "difference_fp | \n", - "ESM1b | \n", - "geomean_kcat | \n", - "frac_of_max_UID | \n", - "frac_of_max_RID | \n", - "frac_of_max_EC | \n", - "y_true | \n", - "y_pred | \n", - "max_ident | \n", - "sim_pred | \n", - "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", - "Reaction_3207 | \n", - "Sequence_2150 | \n", - "[219] | \n", - "[B9W4V6] | \n", - "[1] | \n", - "[0] | \n", - "[0] | \n", - "[False] | \n", - "MKYFPLFPTLVFAARVVAFPAYASLAGLSQQELDAIIPTLEAREPG... | \n", - "{InChI=1S/C7H5NO4/c9-8(10)5-1-2-6-7(3-5)12-4-1... | \n", - "... | \n", - "[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | \n", - "[0.020693962, 0.16804111, 0.0377352, 0.1768811... | \n", - "2.340444 | \n", - "0.665653 | \n", - "1.000000 | \n", - "0.114660 | \n", - "2.340444 | \n", - "1.082393 | \n", - "20.8 | \n", - "2.024332 | \n", - "
1 | \n", - "Reaction_3629 | \n", - "Sequence_3212 | \n", - "[0.92] | \n", - "[Q0PC20] | \n", - "[1] | \n", - "[0] | \n", - "[0] | \n", - "[False] | \n", - "MMKIAILGAMSEEITPLLETLKDYTKIEHANNTYYFAKYKNHELVL... | \n", - "{InChI=1S/C10H13N5O3/c1-4-6(16)7(17)10(18-4)15... | \n", - "... | \n", - "[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | \n", - "[0.07429815, 0.14984865, -0.08539086, 0.098546... | \n", - "-0.036212 | \n", - "0.340741 | \n", - "1.000000 | \n", - "0.090196 | \n", - "-0.036212 | \n", - "0.370715 | \n", - "35.3 | \n", - "0.188301 | \n", - "
2 | \n", - "Reaction_375 | \n", - "Sequence_26 | \n", - "[21.0] | \n", - "[Q0GYU4] | \n", - "[0] | \n", - "[1] | \n", - "[0] | \n", - "[False] | \n", - "MASKTYTLNTGAKIPAVGFGTFANEGAKGETYAAVTKALDVGYRHL... | \n", - "{InChI=1S/C21H30N7O17P3/c22-17-12-19(25-7-24-1... | \n", - "... | \n", - "[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | \n", - "[-0.0272103, 0.2500836, 0.08181338, 0.03990136... | \n", - "1.322219 | \n", - "0.175000 | \n", - "0.147887 | \n", - "1.000000 | \n", - "1.322219 | \n", - "-0.119795 | \n", - "40.1 | \n", - "1.910943 | \n", - "
3 | \n", - "Reaction_4312 | \n", - "Sequence_3788 | \n", - "[4.4] | \n", - "[Q8ZNC4] | \n", - "[0] | \n", - "[0] | \n", - "[1] | \n", - "[False] | \n", - "MTDSIMQNYNQLREQVINGDRRFQHKDGHLCFEGVDLDALARQYPT... | \n", - "{InChI=1S/C6H14N2O2/c7-4-2-1-3-5(8)6(9)10/h5H,... | \n", - "... | \n", - "[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | \n", - "[0.079942256, 0.23130149, -0.012637342, 0.0787... | \n", - "0.643453 | \n", - "1.000000 | \n", - "1.000000 | \n", - "1.000000 | \n", - "0.643453 | \n", - "1.042994 | \n", - "25.9 | \n", - "0.817796 | \n", - "
4 | \n", - "Reaction_2115 | \n", - "Sequence_712 | \n", - "[4.5] | \n", - "[P53602] | \n", - "[1] | \n", - "[0] | \n", - "[0] | \n", - "[False] | \n", - "MASEKPLAAVTCTAPVNIAVIKYWGKRDEELVLPINSSLSVTLHQD... | \n", - "{InChI=1S/C10H16N5O13P3/c11-8-5-9(13-2-12-8)15... | \n", - "... | \n", - "[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | \n", - "[0.086191244, 0.21010432, 0.1960825, -0.041225... | \n", - "0.653213 | \n", - "1.000000 | \n", - "0.849057 | \n", - "0.112500 | \n", - "0.653213 | \n", - "0.755961 | \n", - "49.3 | \n", - "0.815944 | \n", - "
... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "
845 | \n", - "Reaction_3029 | \n", - "Sequence_1106 | \n", - "[1.14] | \n", - "[Q8PDQ6] | \n", - "[1] | \n", - "[0] | \n", - "[0] | \n", - "[False] | \n", - "MSLAQLEHALQHDLQRLAHGGEPWVRPRVHPAGHVYDVVIVGAGQS... | \n", - "{InChI=1S/O2/c1-2, InChI=1S/C21H30N7O17P3/c22-... | \n", - "... | \n", - "[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | \n", - "[0.07993014, 0.11095398, -0.0057218825, -0.049... | \n", - "0.056905 | \n", - "1.000000 | \n", - "1.000000 | \n", - "0.027143 | \n", - "0.056905 | \n", - "0.823211 | \n", - "21.6 | \n", - "1.452899 | \n", - "
846 | \n", - "Reaction_3310 | \n", - "Sequence_455 | \n", - "[5.8, 5.9, 4.8] | \n", - "[C7P8V7, C7P8V7, C7P8V7] | \n", - "[1, 1, 1] | \n", - "[0, 0, 0] | \n", - "[0, 0, 0] | \n", - "[False, False, False] | \n", - "MILFFEYAIASGFEDEGILEEGKMMFNTLLNQFLEIDNVTSLIHKD... | \n", - "{InChI=1S/C10H16N5O13P3/c11-8-5-9(13-2-12-8)15... | \n", - "... | \n", - "[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | \n", - "[0.15469086, 0.08214222, 0.006613599, 0.003951... | \n", - "0.738507 | \n", - "1.000000 | \n", - "1.000000 | \n", - "1.000000 | \n", - "0.738507 | \n", - "1.022113 | \n", - "21.1 | \n", - "0.696922 | \n", - "
847 | \n", - "Reaction_1253 | \n", - "Sequence_1211 | \n", - "[3.3333] | \n", - "[O33289] | \n", - "[1] | \n", - "[0] | \n", - "[0] | \n", - "[True] | \n", - "MTERPRDCRPVVRRARTSDVPAIKQLVDTYAGKILLEKNLVTLYEA... | \n", - "{InChI=1S/C5H10N2O3/c6-3(5(9)10)1-2-4(7)8/h3H,... | \n", - "... | \n", - "[0.0, 0.0, 0.0, 0.0, 10.0, 0.0, 0.0, 0.0, 0.0,... | \n", - "[0.095282555, 0.077073924, 0.1310218, -0.01710... | \n", - "0.522874 | \n", - "1.000000 | \n", - "1.000000 | \n", - "4.273462 | \n", - "0.522874 | \n", - "0.741099 | \n", - "22.8 | \n", - "1.854782 | \n", - "
848 | \n", - "Reaction_1626 | \n", - "Sequence_783 | \n", - "[18.9] | \n", - "[P0AEP7] | \n", - "[1] | \n", - "[0] | \n", - "[0] | \n", - "[True] | \n", - "MAKMRAVDAAMYVLEKEGITTAFGVPGAAINPFYSAMRKHGGIRHI... | \n", - "{InChI=1S/C2H2O3/c3-1-2(4)5/h1H,(H,4,5)} | \n", - "... | \n", - "[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | \n", - "[0.07920394, 0.22367033, 0.120473295, 0.001293... | \n", - "1.276462 | \n", - "1.000000 | \n", - "1.000000 | \n", - "0.959391 | \n", - "1.276462 | \n", - "1.353278 | \n", - "28.9 | \n", - "1.545336 | \n", - "
849 | \n", - "Reaction_898 | \n", - "Sequence_3700 | \n", - "[800.0] | \n", - "[P00387] | \n", - "[1] | \n", - "[0] | \n", - "[0] | \n", - "[True] | \n", - "MGAQLSTLGHMVLFPVWFLYSLLMKLFQRSTPAITLESPDIKYPLR... | \n", - "{InChI=1S/6CN.Fe/c6*1-2;/q;;;;;;-3, InChI=1S/C... | \n", - "... | \n", - "[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | \n", - "[-0.055920795, 0.26620504, 0.008486553, -0.058... | \n", - "2.903090 | \n", - "1.000000 | \n", - "0.727273 | \n", - "0.644641 | \n", - "2.903090 | \n", - "2.413414 | \n", - "87.0 | \n", - "2.706351 | \n", - "
850 rows × 27 columns
\n", - "\n", - " | y_true | \n", - "y_pred | \n", - "Sequence | \n", - "max_ident | \n", - "sim_pred | \n", - "
---|---|---|---|---|---|
0 | \n", - "-2.207608 | \n", - "-0.071899 | \n", - "MSAIDCIITAAGLSSRMGQWKMMLPWEQGTILDTSIKNALQFCSRI... | \n", - "22.8 | \n", - "-1.486273 | \n", - "
1 | \n", - "-3.657577 | \n", - "-2.707640 | \n", - "MKEFYLTVEQIGDSIFERYIDSNGRERTREVEYKPSLFAHCPESQA... | \n", - "100.0 | \n", - "-2.369079 | \n", - "
2 | \n", - "0.949390 | \n", - "0.831021 | \n", - "MSPSKMNATVGSTSEVEQKIRQELALSDEVTTIRRNAPAAVLYEDG... | \n", - "100.0 | \n", - "0.946618 | \n", - "
3 | \n", - "1.672098 | \n", - "1.513026 | \n", - "MKNVGFIGWRGMVGSVLMQRMVEERDFDAIRPVFFSTSQLGQAAPS... | \n", - "100.0 | \n", - "1.045579 | \n", - "
4 | \n", - "-1.790485 | \n", - "-2.830310 | \n", - "MATSTETISSLAQPFVHLENPINSPLVKETIRPRNDTTITPPPTQW... | \n", - "99.4 | \n", - "-1.733113 | \n", - "
\n", - " | Uniprot ID | \n", - "Organism | \n", - "
---|---|---|
0 | \n", - "F7YTI3 | \n", - "Pseudothermotoga thermarum DSM 5069 | \n", - "
1 | \n", - "O05306 | \n", - "Mycobacterium tuberculosis (strain ATCC 25618 ... | \n", - "
2 | \n", - "A8IKD2 | \n", - "Azorhizobium caulinodans (strain ATCC 43989 / ... | \n", - "
3 | \n", - "Q8PDQ6 | \n", - "Xanthomonas campestris pv. campestris (strain ... | \n", - "
4 | \n", - "Q6XL56 | \n", - "Fusobacterium nucleatum | \n", - "
\n", - " | Reaction | \n", - "Sequence | \n", - "kcats | \n", - "
---|---|---|---|
0 | \n", - "Reaction_0 | \n", - "Sequence_309 | \n", - "[2.8, 0.05, 0.11, 205.0, 2.3, 134.0, 360.0] | \n", - "
1 | \n", - "Reaction_1 | \n", - "Sequence_309 | \n", - "[1.2, 3.4, 0.61, 0.07] | \n", - "
2 | \n", - "Reaction_2 | \n", - "Sequence_3142 | \n", - "[6.18, 14.5, 11.58, 13.12, 11.9, 13.98, 14.08,... | \n", - "
3 | \n", - "Reaction_4 | \n", - "Sequence_3263 | \n", - "[57.1, 19.6, 5.96, 13.6, 26.4, 14.0, 41.1, 11.... | \n", - "
4 | \n", - "Reaction_5 | \n", - "Sequence_2101 | \n", - "[2.98, 0.87] | \n", - "
\n", + " | Reaction ID | \n", + "Sequence ID | \n", + "kcat_values | \n", + "Uniprot IDs | \n", + "from_BRENDA | \n", + "from_Sabio | \n", + "from_Uniprot | \n", + "checked | \n", + "Sequence | \n", + "substrates | \n", + "... | \n", + "ESM1b | \n", + "ESM1b_ts | \n", + "geomean_kcat | \n", + "frac_of_max_UID | \n", + "frac_of_max_RID | \n", + "frac_of_max_EC | \n", + "DRFP | \n", + "y_true | \n", + "y_pred | \n", + "max_ident | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "Reaction_3207 | \n", + "Sequence_2150 | \n", + "[219] | \n", + "[B9W4V6] | \n", + "[1] | \n", + "[0] | \n", + "[0] | \n", + "[False] | \n", + "MKYFPLFPTLVFAARVVAFPAYASLAGLSQQELDAIIPTLEAREPG... | \n", + "{InChI=1S/C7H5NO4/c9-8(10)5-1-2-6-7(3-5)12-4-1... | \n", + "... | \n", + "[0.020693962, 0.16804111, 0.0377352, 0.1768811... | \n", + "[0.83155197, 0.08632717, -0.42143562, 0.419359... | \n", + "2.340444 | \n", + "0.665653 | \n", + "1.000000 | \n", + "0.114660 | \n", + "[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... | \n", + "2.340444 | \n", + "0.781544 | \n", + "20.8 | \n", + "
1 | \n", + "Reaction_3629 | \n", + "Sequence_3212 | \n", + "[0.92] | \n", + "[Q0PC20] | \n", + "[1] | \n", + "[0] | \n", + "[0] | \n", + "[False] | \n", + "MMKIAILGAMSEEITPLLETLKDYTKIEHANNTYYFAKYKNHELVL... | \n", + "{InChI=1S/H2O/h1H2, InChI=1S/C10H13N5O3/c1-4-6... | \n", + "... | \n", + "[0.07429815, 0.14984865, -0.08539086, 0.098546... | \n", + "[0.13206507, -0.10826899, -0.31126085, 0.95038... | \n", + "-0.036212 | \n", + "0.340741 | \n", + "1.000000 | \n", + "0.090196 | \n", + "[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... | \n", + "-0.036212 | \n", + "0.537214 | \n", + "35.3 | \n", + "
2 | \n", + "Reaction_375 | \n", + "Sequence_26 | \n", + "[21.0] | \n", + "[Q0GYU4] | \n", + "[0] | \n", + "[1] | \n", + "[0] | \n", + "[False] | \n", + "MASKTYTLNTGAKIPAVGFGTFANEGAKGETYAAVTKALDVGYRHL... | \n", + "{InChI=1S/C4H8O2/c1-3(5)4(2)6/h3,5H,1-2H3, InC... | \n", + "... | \n", + "[-0.0272103, 0.2500836, 0.08181338, 0.03990136... | \n", + "[0.3617253, 0.8765441, -1.0668296, 1.5401511, ... | \n", + "1.322219 | \n", + "0.175000 | \n", + "0.147887 | \n", + "1.000000 | \n", + "[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, ... | \n", + "1.322219 | \n", + "0.927225 | \n", + "40.1 | \n", + "
3 | \n", + "Reaction_4312 | \n", + "Sequence_3788 | \n", + "[4.4] | \n", + "[Q8ZNC4] | \n", + "[0] | \n", + "[0] | \n", + "[1] | \n", + "[False] | \n", + "MTDSIMQNYNQLREQVINGDRRFQHKDGHLCFEGVDLDALARQYPT... | \n", + "{InChI=1S/p+1, InChI=1S/C6H14N2O2/c7-4-2-1-3-5... | \n", + "... | \n", + "[0.079942256, 0.23130149, -0.012637342, 0.0787... | \n", + "[0.7798445, -0.7589981, -0.2779501, 0.2643281,... | \n", + "0.643453 | \n", + "1.000000 | \n", + "1.000000 | \n", + "1.000000 | \n", + "[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... | \n", + "0.643453 | \n", + "0.959929 | \n", + "25.9 | \n", + "
4 | \n", + "Reaction_2115 | \n", + "Sequence_712 | \n", + "[4.5] | \n", + "[P53602] | \n", + "[1] | \n", + "[0] | \n", + "[0] | \n", + "[False] | \n", + "MASEKPLAAVTCTAPVNIAVIKYWGKRDEELVLPINSSLSVTLHQD... | \n", + "{InChI=1S/C6H14O10P2/c1-6(9,4-5(7)8)2-3-15-18(... | \n", + "... | \n", + "[0.086191244, 0.21010432, 0.1960825, -0.041225... | \n", + "[-0.6100984, -0.054886594, -0.09893316, 0.2822... | \n", + "0.653213 | \n", + "1.000000 | \n", + "0.849057 | \n", + "0.112500 | \n", + "[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... | \n", + "0.653213 | \n", + "0.933098 | \n", + "49.3 | \n", + "
5 rows × 28 columns
\n", + "\n", + " | Reaction ID | \n", + "Sequence ID | \n", + "kcat_values | \n", + "Uniprot IDs | \n", + "from_BRENDA | \n", + "from_Sabio | \n", + "from_Uniprot | \n", + "checked | \n", + "Sequence | \n", + "substrates | \n", + "... | \n", + "ESM1b_ts | \n", + "geomean_kcat | \n", + "frac_of_max_UID | \n", + "frac_of_max_RID | \n", + "frac_of_max_EC | \n", + "DRFP | \n", + "y_true | \n", + "y_pred | \n", + "max_ident | \n", + "sim_pred | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "Reaction_3207 | \n", + "Sequence_2150 | \n", + "[219] | \n", + "[B9W4V6] | \n", + "[1] | \n", + "[0] | \n", + "[0] | \n", + "[False] | \n", + "MKYFPLFPTLVFAARVVAFPAYASLAGLSQQELDAIIPTLEAREPG... | \n", + "{InChI=1S/C7H5NO4/c9-8(10)5-1-2-6-7(3-5)12-4-1... | \n", + "... | \n", + "[0.83155197, 0.08632717, -0.42143562, 0.419359... | \n", + "2.340444 | \n", + "0.665653 | \n", + "1.0 | \n", + "0.11466 | \n", + "[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... | \n", + "2.340444 | \n", + "0.781544 | \n", + "20.8 | \n", + "2.024332 | \n", + "
1 rows × 29 columns
\n", + "\n", + " | y_true | \n", + "y_pred | \n", + "Sequence | \n", + "max_ident | \n", + "sim_pred | \n", + "sim_pred_1 | \n", + "sim_pred_3 | \n", + "
---|---|---|---|---|---|---|---|
0 | \n", + "-2.207608 | \n", + "-0.071899 | \n", + "MSAIDCIITAAGLSSRMGQWKMMLPWEQGTILDTSIKNALQFCSRI... | \n", + "22.8 | \n", + "-1.486273 | \n", + "-2.275724 | \n", + "-1.486273 | \n", + "
1 | \n", + "-3.657577 | \n", + "-2.707640 | \n", + "MKEFYLTVEQIGDSIFERYIDSNGRERTREVEYKPSLFAHCPESQA... | \n", + "100.0 | \n", + "-2.369079 | \n", + "-2.221849 | \n", + "-2.369079 | \n", + "
2 | \n", + "0.949390 | \n", + "0.831021 | \n", + "MSPSKMNATVGSTSEVEQKIRQELALSDEVTTIRRNAPAAVLYEDG... | \n", + "100.0 | \n", + "0.946618 | \n", + "1.230449 | \n", + "0.455934 | \n", + "
3 | \n", + "1.672098 | \n", + "1.513026 | \n", + "MKNVGFIGWRGMVGSVLMQRMVEERDFDAIRPVFFSTSQLGQAAPS... | \n", + "100.0 | \n", + "1.045579 | \n", + "1.672098 | \n", + "1.045579 | \n", + "
4 | \n", + "-1.790485 | \n", + "-2.830310 | \n", + "MATSTETISSLAQPFVHLENPINSPLVKETIRPRNDTTITPPPTQW... | \n", + "99.4 | \n", + "-1.733113 | \n", + "0.995635 | \n", + "-1.733113 | \n", + "
\n", + " | Reaction | \n", + "Sequence | \n", + "kcats | \n", + "
---|---|---|---|
0 | \n", + "Reaction_0 | \n", + "Sequence_309 | \n", + "[2.8, 0.05, 0.11, 205.0, 2.3, 134.0, 360.0] | \n", + "
1 | \n", + "Reaction_1 | \n", + "Sequence_309 | \n", + "[1.2, 3.4, 0.61, 0.07] | \n", + "
2 | \n", + "Reaction_2 | \n", + "Sequence_3142 | \n", + "[6.18, 14.5, 11.58, 13.12, 11.9, 13.98, 14.08,... | \n", + "
3 | \n", + "Reaction_4 | \n", + "Sequence_3263 | \n", + "[57.1, 19.6, 5.96, 13.6, 26.4, 14.0, 41.1, 11.... | \n", + "
4 | \n", + "Reaction_5 | \n", + "Sequence_2101 | \n", + "[2.98, 0.87] | \n", + "