From cc686df34fa94895403b0249f01cfc5e0f0c3038 Mon Sep 17 00:00:00 2001 From: nkempynck Date: Thu, 28 Nov 2024 15:26:37 +0100 Subject: [PATCH 1/2] msecosinelog function multiplier parameter --- docs/tutorials/model_training_and_eval.ipynb | 205 ++++--------------- src/crested/tl/losses/_cosinemse_log.py | 10 +- 2 files changed, 44 insertions(+), 171 deletions(-) diff --git a/docs/tutorials/model_training_and_eval.ipynb b/docs/tutorials/model_training_and_eval.ipynb index 3a130f0..1f06143 100644 --- a/docs/tutorials/model_training_and_eval.ipynb +++ b/docs/tutorials/model_training_and_eval.ipynb @@ -53,11 +53,10 @@ "name": "stderr", "output_type": "stream", "text": [ - "2024-11-15 13:47:25.650703: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n", - "2024-11-15 13:47:25.687972: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", + "2024-11-28 15:24:43.441628: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n", + "2024-11-28 15:24:43.480900: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", "To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 AVX512_FP16 AVX_VNNI AMX_TILE AMX_INT8 AMX_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2024-11-15 13:47:28.235522: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n", - "\u001b[32m2024-11-15 13:47:36.768\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mcrested.tl\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m25\u001b[0m - \u001b[33m\u001b[1mmodiscolite is not installed, 'crested.tl.modisco' module will not be available.\u001b[0m\n" + "2024-11-28 15:24:45.799208: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n" ] } ], @@ -112,27 +111,15 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "2024-11-15T13:43:03.892523+0100 INFO Extracting values from 19 bigWig files...\n" + "2024-11-28T15:25:02.622578+0100 INFO Extracting values from 19 bigWig files...\n" ] - }, - { - "data": { - "text/plain": [ - "AnnData object with n_obs × n_vars = 19 × 546993\n", - " obs: 'file_path'\n", - " var: 'chr', 'start', 'end'" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" } ], "source": [ @@ -156,173 +143,55 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Choose the chromosomes for the validation and test sets\n", + "crested.pp.train_val_test_split(\n", + " adata, strategy=\"chr\", val_chroms=[\"chr8\", \"chr10\"], test_chroms=[\"chr9\", \"chr18\"]\n", + ")\n", + "\n", + "# Alternatively, We can split randomly on the regions\n", + "# crested.pp.train_val_test_split(\n", + "# adata, strategy=\"region\", val_size=0.1, test_size=0.1, random_state=42\n", + "# )\n", + "\n", + "print(adata.var[\"split\"].value_counts())\n", + "adata.var" + ] + }, + { + "cell_type": "code", + "execution_count": 34, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "split\n", - "train 440993\n", - "val 56064\n", - "test 49936\n", - "Name: count, dtype: int64\n" + "chr1:9458485-9458985\n" ] }, { "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
chrstartendsplit
region
chr1:3093998-3096112chr130939983096112train
chr1:3094663-3096777chr130946633096777train
chr1:3111367-3113481chr131113673113481train
chr1:3112727-3114841chr131127273114841train
chr1:3118939-3121053chr131189393121053train
...............
chrX:169878506-169880620chrX169878506169880620train
chrX:169879374-169881488chrX169879374169881488train
chrX:169924670-169926784chrX169924670169926784train
chrX:169947743-169949857chrX169947743169949857train
chrX:169950171-169952285chrX169950171169952285train
\n", - "

546993 rows × 4 columns

\n", - "
" - ], + "image/png": "iVBORw0KGgoAAAANSUhEUgAABjQAAAESCAYAAABaTzw3AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8pXeV/AAAACXBIWXMAAA9hAAAPYQGoP6dpAAA3eklEQVR4nO3deZhWZfk48HtYZhhgQBZZZFURBBU1NAXMpVQQc7kswSVZNU1BcQW+aW4lapqkKaY/BfyKuOZSpt9wwUoxEwStDMqVRKNEQVJZ5Pn94TVvvDAD78AMc4DP57re6+I923s/N2ebc59znqKUUgoAAAAAAIAMq1PbAQAAAAAAAGyIggYAAAAAAJB5ChoAAAAAAEDmKWgAAAAAAACZp6ABAAAAAABknoIGAAAAAACQeQoaAAAAAABA5tXb3D+4evXqWLhwYZSVlUVRUdHm/nkAAAAAACBDUkrxySefxA477BB16lT+HMZmL2gsXLgwOnTosLl/FgAAAAAAyLAFCxZE+/btKx2/2QsaZWVlEfFlYE2aNNncPw8AAAAAAGTI0qVLo0OHDrn6QWU2e0Gj/DVTTZo0UdAAAAAAAAAiIjbYTYVOwQEAAAAAgMxT0AAAAAAAADJPQQMAAAAAAMg8BQ0AAAAAACDzqlTQWLVqVVx88cWx4447Rmlpaey0005xxRVXxOrVq2sqPgAAAAAAgKhXlYmvueaauPXWW2PKlCmx2267xcsvvxzDhg2Lpk2bxjnnnFNTMQIAAAAAANu4KhU0Zs6cGcccc0wceeSRERHRuXPnmDZtWrz88ss1EhwAAAAAAEBEFV85dcABB8TTTz8d8+fPj4iIuXPnxu9///sYMGBApfMsX748li5dmvcBAAAAAACoiio9oTFmzJhYsmRJ7LrrrlG3bt344osv4kc/+lGceOKJlc4zfvz4uPzyyzc5UICa1nns47Udwmb19tVH1nYIAAAAAFCwKj2hcd9998Xdd98d99xzT8yePTumTJkS1113XUyZMqXSecaNGxdLlizJfRYsWLDJQQMAAAAAANuWKj2hceGFF8bYsWPjhBNOiIiIPfbYI955550YP358DBkypMJ5SkpKoqSkZNMjBQAAAAAAtllVekLj008/jTp18mepW7durF69ulqDAgAAAAAAWFOVntA46qij4kc/+lF07Ngxdtttt3jllVfiJz/5SQwfPrym4gMAAAAAAKhaQeOmm26KSy65JM4888xYtGhR7LDDDnH66afHD37wg5qKDwAAAAAAoGoFjbKyspgwYUJMmDChhsIBAAAAAABYV5X60AAAAAAAAKgNChoAAAAAAEDmKWgAAAAAAACZp6ABAAAAAABknoIGAAAAAACQeQoaAAAAAABA5iloAAAAAAAAmaegAQAAAAAAZJ6CBgAAAAAAkHkKGgAAAAAAQOYpaAAAAAAAAJmnoAEAAAAAAGSeggYAAAAAAJB5ChoAAAAAAEDmKWgAAAAAAACZp6ABAAAAAABknoIGAAAAAACQeQoaAAAAAABA5iloAAAAAAAAmaegAQAAAAAAZJ6CBgAAAAAAkHkKGgAAAAAAQOYpaAAAAAAAAJmnoAEAAAAAAGSeggYAAAAAAJB59Wo7AAAAAACoLZ3HPl7bIWxWb1995EbPK1dAbfOEBgAAAAAAkHkKGgAAAAAAQOYpaAAAAAAAAJmnoAEAAAAAAGSeggYAAAAAAJB5ChoAAAAAAEDmKWgAAAAAAACZp6ABAAAAAABknoIGAAAAAACQeQoaAAAAAABA5iloAAAAAAAAmaegAQAAAAAAZF692g4AgC1P57GP13YIm9XbVx9Z2yEAAAAAbPM8oQEAAAAAAGSeJzQAAAAAAKgV3gJBVXhCAwAAAAAAyDwFDQAAAAAAIPMUNAAAAAAAgMyrch8a7733XowZMyaeeOKJ+Oyzz6Jr165xxx13RK9evWoiPgAA2GjexwsAALD1qFJB46OPPoq+ffvGIYccEk888US0atUq3njjjdhuu+1qKDwAYFvhwjMAAACwPlUqaFxzzTXRoUOHmDRpUm5Y586dqzsmANhquEhPTbBeAQAAsC2qUh8ajz32WOyzzz5x/PHHR6tWrWLvvfeO22+/fb3zLF++PJYuXZr3AQAAAAAAqIoqFTTefPPNmDhxYuyyyy7xf//3f3HGGWfE2WefHXfddVel84wfPz6aNm2a+3To0GGTgwYAAAAAALYtVXrl1OrVq2OfffaJq666KiIi9t577/jzn/8cEydOjMGDB1c4z7hx4+K8887LfV+6dKmiBgAAAACw1fKaWKgZVSpotG3bNnr06JE3rHv37vHQQw9VOk9JSUmUlJRsXHRQCQcFAAAAAIBtS5VeOdW3b9+YN29e3rD58+dHp06dqjUoAAAAAACANVWpoHHuuefGiy++GFdddVX8/e9/j3vuuSduu+22OOuss2oqPgAAAAAAgKoVNPbdd994+OGHY9q0abH77rvHlVdeGRMmTIiTTz65puIDAAAAAACoWh8aERHf/OY345vf/GZNxAIAAAAAAFChKj2hAQAAAAAAUBsUNAAAAAAAgMxT0AAAAAAAADKvyn1oAAAAANSGzmMfr+0QNqu3rz6ytkMAgExR0ICtnBN+AAAAAGBr4JVTAAAAAABA5nlCAwAA8FQnAACQeZ7QAAAAAAAAMk9BAwAAAAAAyDwFDQAAAAAAIPMUNAAAAAAAgMxT0AAAAAAAADJPQQMAAAAAAMg8BQ0AAAAAACDzFDQAAAAAAIDMU9AAAAAAAAAyT0EDAAAAAADIvHq1HQD/1Xns47Udwmb19tVH1nYIAAAAAABsITyhAQAAAAAAZJ6CBgAAAAAAkHkKGgAAAAAAQOYpaAAAAAAAAJmnoAEAAAAAAGSeggYAAAAAAJB5ChoAAAAAAEDm1avtAAAAALYkncc+XtshbFZvX31kbYcAAAAR4QkNAAAAAABgC6CgAQAAAAAAZJ6CBgAAAAAAkHkKGgAAAAAAQOYpaAAAAAAAAJmnoAEAAAAAAGSeggYAAAAAAJB5ChoAAAAAAEDmKWgAAAAAAACZp6ABAAAAAABknoIGAAAAAACQeQoaAAAAAABA5iloAAAAAAAAmaegAQAAAAAAZF692g4AAAAAtmWdxz5e2yFsVm9ffWRthwAAbKE8oQEAAAAAAGSeggYAAAAAAJB5ChoAAAAAAEDmbVJBY/z48VFUVBSjR4+upnAAAAAAAADWtdEFjT/+8Y9x2223Rc+ePaszHgAAAAAAgHVsVEFj2bJlcfLJJ8ftt98ezZo1q+6YAAAAAAAA8mxUQeOss86KI488Mg499NANTrt8+fJYunRp3gcAAAAAAKAq6lV1hnvvvTdmz54df/zjHwuafvz48XH55ZdXOTAAAAAAAIByVXpCY8GCBXHOOefE3XffHQ0aNChonnHjxsWSJUtynwULFmxUoAAAAAAAwLarSk9ozJo1KxYtWhS9evXKDfviiy/it7/9bfzsZz+L5cuXR926dfPmKSkpiZKSkuqJFgAAAAAA2CZVqaDxjW98I1577bW8YcOGDYtdd901xowZs04xAwAAAAAAoDpUqaBRVlYWu+++e96wRo0aRYsWLdYZDgAAAAAAUF2q1IcGAAAAAABAbajSExoVmTFjRjWEAQAAAAAAUDlPaAAAAAAAAJmnoAEAAAAAAGSeggYAAAAAAJB5ChoAAAAAAEDmKWgAAAAAAACZp6ABAAAAAABknoIGAAAAAACQeQoaAAAAAABA5iloAAAAAAAAmaegAQAAAAAAZJ6CBgAAAAAAkHkKGgAAAAAAQOYpaAAAAAAAAJmnoAEAAAAAAGSeggYAAAAAAJB5ChoAAAAAAEDmKWgAAAAAAACZp6ABAAAAAABknoIGAAAAAACQeQoaAAAAAABA5iloAAAAAAAAmaegAQAAAAAAZJ6CBgAAAAAAkHkKGgAAAAAAQOYpaAAAAAAAAJmnoAEAAAAAAGSeggYAAAAAAJB5ChoAAAAAAEDmKWgAAAAAAACZp6ABAAAAAABknoIGAAAAAACQeQoaAAAAAABA5iloAAAAAAAAmaegAQAAAAAAZJ6CBgAAAAAAkHkKGgAAAAAAQOYpaAAAAAAAAJmnoAEAAAAAAGSeggYAAAAAAJB5ChoAAAAAAEDmKWgAAAAAAACZp6ABAAAAAABknoIGAAAAAACQeQoaAAAAAABA5lWpoDF+/PjYd999o6ysLFq1ahXHHntszJs3r6ZiAwAAAAAAiIgqFjSee+65OOuss+LFF1+M6dOnx6pVq+Lwww+P//znPzUVHwAAAAAAQNSrysRPPvlk3vdJkyZFq1atYtasWXHggQdWa2AAAAAAAADlqlTQWNuSJUsiIqJ58+aVTrN8+fJYvnx57vvSpUs35ScBAAAAAIBt0EZ3Cp5SivPOOy8OOOCA2H333Sudbvz48dG0adPcp0OHDhv7kwAAAAAAwDZqowsaI0eOjFdffTWmTZu23unGjRsXS5YsyX0WLFiwsT8JAAAAAABsozbqlVOjRo2Kxx57LH77299G+/bt1zttSUlJlJSUbFRwAAAAAAAAEVUsaKSUYtSoUfHwww/HjBkzYscdd6ypuAAAAAAAAHKqVNA466yz4p577olHH300ysrK4oMPPoiIiKZNm0ZpaWmNBAgAAAAAAFClPjQmTpwYS5YsiYMPPjjatm2b+9x33301FR8AAAAAAEDVXzkFAAAAAACwuVXpCQ0AAAAAAIDaoKABAAAAAABknoIGAAAAAACQeQoaAAAAAABA5iloAAAAAAAAmaegAQAAAAAAZJ6CBgAAAAAAkHkKGgAAAAAAQOYpaAAAAAAAAJmnoAEAAAAAAGSeggYAAAAAAJB5ChoAAAAAAEDmKWgAAAAAAACZp6ABAAAAAABknoIGAAAAAACQeQoaAAAAAABA5iloAAAAAAAAmaegAQAAAAAAZJ6CBgAAAAAAkHkKGgAAAAAAQOYpaAAAAAAAAJmnoAEAAAAAAGSeggYAAAAAAJB5ChoAAAAAAEDmKWgAAAAAAACZp6ABAAAAAABknoIGAAAAAACQeQoaAAAAAABA5iloAAAAAAAAmaegAQAAAAAAZJ6CBgAAAAAAkHkKGgAAAAAAQOYpaAAAAAAAAJmnoAEAAAAAAGSeggYAAAAAAJB5ChoAAAAAAEDmKWgAAAAAAACZp6ABAAAAAABknoIGAAAAAACQeQoaAAAAAABA5iloAAAAAAAAmaegAQAAAAAAZJ6CBgAAAAAAkHkKGgAAAAAAQOZtVEHjlltuiR133DEaNGgQvXr1it/97nfVHRcAAAAAAEBOlQsa9913X4wePTq+//3vxyuvvBJf+9rX4ogjjoh33323JuIDAAAAAACoekHjJz/5SYwYMSJOPfXU6N69e0yYMCE6dOgQEydOrIn4AAAAAAAAol5VJl6xYkXMmjUrxo4dmzf88MMPjxdeeKHCeZYvXx7Lly/PfV+yZElERCxdurSqsW71Vi//tLZD2Kw2ZR2Qq8LJVeHkqnByVTi5KpxcFU6uCidXhZOrwskV1c06VTi5oiZYrwonV4WTq8LJFRH/zUtKab3TFaUNTbGGhQsXRrt27eL555+PPn365IZfddVVMWXKlJg3b94681x22WVx+eWXF/oTAAAAAADANmjBggXRvn37SsdX6QmNckVFRXnfU0rrDCs3bty4OO+883LfV69eHYsXL44WLVpUOg+bz9KlS6NDhw6xYMGCaNKkSW2Hk2lyVTi5KpxcFU6uCidXhZOrwslV4eSqcHJVOLkqnFwVRp4KJ1eFk6vCyVXh5KpwclU4ucqelFJ88sknscMOO6x3uioVNFq2bBl169aNDz74IG/4okWLonXr1hXOU1JSEiUlJXnDtttuu6r8LJtBkyZNbLwFkqvCyVXh5KpwclU4uSqcXBVOrgonV4WTq8LJVeHkqjDyVDi5KpxcFU6uCidXhZOrwslVtjRt2nSD01SpU/Di4uLo1atXTJ8+PW/49OnT815BBQAAAAAAUJ2q/Mqp8847L0455ZTYZ599onfv3nHbbbfFu+++G2eccUZNxAcAAAAAAFD1gsagQYPiww8/jCuuuCLef//92H333ePXv/51dOrUqSbio4aVlJTEpZdeus5rwViXXBVOrgonV4WTq8LJVeHkqnByVTi5KpxcFU6uCidXhZGnwslV4eSqcHJVOLkqnFwVTq62XEUppVTbQQAAAAAAAKxPlfrQAAAAAAAAqA0KGgAAAAAAQOYpaAAAAAAAAJmnoAEAAAAAAGSeggZsohkzZkRRUVF8/PHHtR0KAJBBb7/9dhQVFcWcOXNqOxRq0MEHHxyjR4+u7TCgQp07d44JEybkvhcVFcUjjzxSa/FsbSZPnhzbbbdd7vtll10We+21V63FA1uzoUOHxrHHHlvty7Udb1v8/27ZFDS2Ei+88ELUrVs3+vfvX/A82+If10OHDo2ioqJ1PlXJ29aoshOCxYsXx6hRo6Jbt27RsGHD6NixY5x99tmxZMmSgpd99NFHR8eOHaNBgwbRtm3bOOWUU2LhwoW58Wuuh5dddlmF/z9rft5+++1qaHH1Wt8J1cEHH7xOG0444YS8aSpr67333lvpOrvmZ0tVHXl75JFHYvLkyRvM0YwZM2q+QVWwqW3fkMcffzz222+/KC0tjZYtW8Zxxx2XG1e+zbVq1So++eSTvPn22muvuOyyyyqMpaSkJLp27RpXXXVVfPHFF1WKp6Zt6I+amTNnxte//vVo1KhRbLfddnHwwQfHZ599ljfNs88+GwMGDIgWLVpEw4YNo0ePHnH++efHe++9t1Vuh5uasy15+9tY1bGebclq6uLBpqqOfeb6lG//Z5xxxjrjzjzzzCgqKoqhQ4dGRMQvfvGLuPLKK6vtt6uqfP9e/mnWrFkceOCB8dxzz0VExFFHHRWHHnpohfPOnDkzioqKYvbs2bnl1KtXL95777286d5///2oV69erZyTLViwIEaMGBE77LBDFBcXR6dOneKcc86JDz/8MDdNIcetlFLcdtttsd9++0Xjxo1ju+22i3322ScmTJgQn3766WZtU3UpJDdre//99+OII47YjFHWrjWP5fXr14+ddtopLrjggvjPf/5T26Fl1qJFi+L000+Pjh07RklJSbRp0yb69esXM2fOrO3QthibksO1i5BZVBPt2xr30YUq9Di9vut45efm3bt3X2fc/fffH0VFRdG5c+e84StWrIhrr7029txzz2jYsGG0bNky+vbtG5MmTYqVK1duarNqRSG5/PrXvx5PP/30Zo6M6qKgsZW48847Y9SoUfH73/8+3n333Wpd9ooVK6p1ebWtf//+8f777+d9pk2bVtthZdLChQtj4cKFcd1118Vrr70WkydPjieffDJGjBhR8DIOOeSQuP/++2PevHnx0EMPxRtvvBHf/va3K5z2ggsuyPt/ad++fVxxxRV5wzp06FBdzdtsTjvttLw2/PznP19nmkmTJq2zXh577LHx05/+NG9YRdNurQrJW0TEoEGD8qbr3bv3OvP26dNnM0e/aQpte0UeeuihOOWUU2LYsGExd+7ceP755+Okk05aZ7pPPvkkrrvuuoJjmTdvXpx99tlx8cUXFzRfVsycOTP69+8fhx9+eLz00kvxxz/+MUaOHBl16vz3FOjnP/95HHroodGmTZt46KGH4i9/+UvceuutsWTJkrj++uu3ue2wkJyV2xq3v41RlZxR/TZln1mIDh06xL333ptXoPr8889j2rRp0bFjx9yw5s2bR1lZ2Ub9RkopVq1atcmxRkQ89dRT8f7778dzzz0XTZo0iQEDBsRbb70VI0aMiGeeeSbeeeeddea58847Y6+99oqvfOUruWE77LBD3HXXXXnTTZkyJdq1a1ctcVbFm2++Gfvss0/Mnz8/pk2bFn//+9/j1ltvjaeffjp69+4dixcvzk27oePWKaecEqNHj45jjjkmnn322ZgzZ05ccskl8eijj8ZvfvObzd62TVWV3KypTZs2UVJSspmjrV3lfwe++eab8cMf/jBuueWWuOCCC2o7rMz61re+FXPnzo0pU6bE/Pnz47HHHouDDz640nWKdW3tOayJ9m1t++iqKOQ43bx58w0up1GjRrFo0aJ1Ckt33nln3nlLxJfX+/r16xdXX311fPe7340XXnghXnrppTjrrLPipptuij//+c+b1qhaUkguDzzwwGjRokUtREe1SGzxli1blsrKytJf//rXNGjQoHT55Zfnxi1evDiddNJJqWXLlqlBgwapS5cu6c4770wppRQReZ+DDjoopZTSkCFD0jHHHJOuuuqq1LZt29SpU6eUUkqvvvpqOuSQQ1KDBg1S8+bN02mnnZY++eSTzd3cTVLetspERLr99tvTsccem0pLS1OXLl3So48+mjfN448/nnbZZZfUoEGDdPDBB6dJkyaliEgfffRRbpoHH3ww9ejRIxUXF6dOnTql6667roZaVD02lJc13X///am4uDitXLlyo37r0UcfTUVFRWnFihUppZTeeuutFBHplVdeWWfaTp06pRtuuGGjfmdzWl/+DjrooHTOOeesd/6ISA8//HBBv1WVabOupvJWyLy1bVPbXpmVK1emdu3apf/3//5fpdOUb3MXXnhhaty4cfrnP/+ZG7fnnnumSy+9dL2xHHrooWn//fffqPhqyvryud9++6WLL7640nkXLFiQiouL0+jRoyscv+a+vdzWsB1uSs5S2rK3v421KTkr3+6mTZuWevfunUpKSlKPHj3Ss88+WzPB1oD1tf/6669Pu+++e2rYsGFq3759+t73vpd3jjhp0qTUtGnT9Mtf/jJ17do1lZaWpm9961tp2bJlafLkyalTp05pu+22SyNHjkyrVq3KzdepU6d0xRVXpBNPPDE1atQotW3bNt144415v13T61x5u/fYY490991354ZPnTo17bHHHumYY45JQ4YMqTCWzz//PF144YWpffv2qbi4OHXp0iW3f3722WdTRKQnn3wy9erVK9WvXz8988wz6fPPP0+jRo1K22+/fSopKUl9+/ZNL730UkGxVnRO9Y9//CNFRLr11lvTypUrU+vWrdNll12WN99//vOfVFZWlm666aa85Vx88cVpl112yZu2W7du6ZJLLkkRkd56660Cs7jp+vfvn9q3b58+/fTTvOHvv/9+atiwYTrjjDNSShs+bt13330pItIjjzyyzm+sXr06ffzxxzXTgBpUaG7WPq9eez/+/PPPpz333DOVlJSkXr16pYcffnid9WnGjBlp3333TcXFxalNmzZpzJgxG/03weZW0T7s1FNPTW3atEnt2rVLEydOzBs3a9asFBHpjTfeSCkVvp8rd+mll6Y999wz3Xrrral9+/aptLQ0ffvb367wvCKLPvrooxQRacaMGZVOU8jfzn/605/SgAEDUllZWWrcuHE64IAD0t///veU0pfnraNGjUpNmzZNzZs3TxdddFEaPHhwwX+XZl0hObz00ktThw4dUnFxcWrbtm0aNWpUSunLfdna12uypibaV+g+unx7/vGPf5zatGmTmjdvns4888zcNYaUvrwWdsopp6TtttsulZaWpv79+6f58+fnLXPSpEmpQ4cOqbS0NB177LHpuuuuq3A73lwKOU6v7/pJSv/dF40cOTKdeuqpueELFixIJSUlaezYsblrfCmldM0116Q6deqk2bNnr7OsFStWpGXLllVL2za3QnK59v9v+Xp12WWXpe233z6VlZWl7373u2n58uWbOXoK4baxrcB9990X3bp1i27dusV3vvOdmDRpUqSUIiLikksuib/85S/xxBNPxOuvvx4TJ06Mli1bRkTESy+9FBH/vYvrF7/4RW6ZTz/9dLz++usxffr0+NWvfhWffvpp9O/fP5o1axZ//OMf44EHHoinnnoqRo4cufkbXMMuv/zyGDhwYLz66qsxYMCAOPnkk3N3GCxYsCCOO+64GDBgQMyZMydOPfXUGDt2bN78s2bNioEDB8YJJ5wQr732Wlx22WVxySWXxOTJk2uhNdVvyZIl0aRJk6hXr16V5128eHFMnTo1+vTpE/Xr16+B6LJp6tSp0bJly9htt93iggsuWOc1P1RsW87bxrZ99uzZ8d5770WdOnVi7733jrZt28YRRxxR4Z01J554YnTp0iWuuOKKKsVWWlq6xTx6vGjRovjDH/4QrVq1ij59+kTr1q3joIMOit///ve5aR544IFYsWJFXHTRRRUuY8336G4LCskZ+aqSswsvvDDOP//8eOWVV6JPnz5x9NFHr/eVMFuKOnXqxI033hh/+tOfYsqUKfHMM8+ss019+umnceONN8a9994bTz75ZMyYMSOOO+64+PWvfx2//vWv43//93/jtttuiwcffDBvvh//+MfRs2fPmD17dowbNy7OPffcmD59et40m+N4MWzYsJg0aVLu+5133hnDhw9f7zyDBw+Oe++9N2688cZ4/fXX49Zbb43GjRvnTXPRRRfF+PHj4/XXX4+ePXvGRRddFA899FBMmTIlZs+eHV26dIl+/fpt9N2uDRs2jIiIlStXRr169WLw4MExefLk3N8KEf/dD5588sl58x599NHx0Ucf5dbl3//+97F48eI46qijNiqWjbV48eL4v//7vzjzzDOjtLQ0b1ybNm3i5JNPjvvuuy+vTWta87g1derU6NatWxxzzDHrTFdUVBRNmzat/gbUoE3NTblPPvkkjjrqqNhjjz1i9uzZceWVV8aYMWPypnnvvfdiwIABse+++8bcuXNj4sSJcccdd8QPf/jDam/X5lK+bpxwwgkxderUvHH33HNP9O7dO3baaaeIKGw/t7a///3vcf/998cvf/nLePLJJ2POnDlx1lln1Vh7qlPjxo2jcePG8cgjj8Ty5csrnW59fzu/9957ceCBB0aDBg3imWeeiVmzZsXw4cNzT6Ndc801MXXq1Jg0aVI8//zzsXTp0q2qX5cN5fDBBx+MG264IX7+85/H3/72t3jkkUdijz32iIgvX2G49tsKsqYm2leVffSzzz4bb7zxRjz77LMxZcqUmDx5ct41l6FDh8bLL78cjz32WMycOTNSSjFgwIDc8eAPf/hDDB8+PM4888yYM2dOHHLIIbW+P6vqcXp9RowYEffdd1/uNV2TJ0+O/v37R+vWrfOmmzp1ahx66KGx9957r7OM+vXrR6NGjTayNbVrY3NZfi302WefjWnTpsXDDz8cl19++eYKm6qoxWIK1aRPnz5pwoQJKaUvq5AtW7ZM06dPTymldNRRR6Vhw4ZVOF9lld0hQ4ak1q1b51Uhb7vtttSsWbO86uzjjz+e6tSpkz744INqblHNGTJkSKpbt25q1KhR3ueKK65IKaXc3Wjlli1bloqKitITTzyRUkpp3LhxqXv37mn16tW5acaMGZP3hMZJJ52UDjvssLzfvfDCC1OPHj1quHUbr9AnNP7973+njh07pu9///tVWv5FF12UGjZsmCIi7b///unf//53btzW/oTGbbfdlqZPn55ee+21NG3atNS5c+d06KGH5k0TEalBgwbrrJfld4StPe2Wfmd4uerI25Z6h/imtr0y06ZNSxGROnbsmB588MH08ssvpxNPPDG1aNEiffjhhyml/G3uySefTPXr18/dKbe+JzS++OKL9MQTT6Ti4uJ00UUXbXTba0Jl+Zw5c2aKiNS8efN05513ptmzZ6fRo0en4uLi3B1a3/ve91KTJk2q9Htbw3a4KTlLacve/jbWpuSsfLu7+uqrc/OtXLkytW/fPl1zzTWbqwmbpKpPc7Zo0SL3vfxp1vJ9TUopnX766alhw4Z5dzj369cvnX766bnvnTp1Sv37989b9qBBg9IRRxyR+74p+8xClLf7X//6VyopKUlvvfVWevvtt1ODBg3Sv/71r0qf0Jg3b16KiNw5+drKn9BY8y7UZcuWpfr166epU6fmhq1YsSLtsMMO6dprr91grGufUy1btiydfvrpqW7duunVV19NKaX0+uuvp4hIzzzzTG6+Aw88MJ144okVLmf06NG5vyWGDRuWzj333PTKK69s1ic0XnzxxfXud3/yk5+kiEj//Oc/N3jc6t69ezr66KM3S9ybQ1Vys74nNCZOnJhatGiRPvvss9z422+/PW99+p//+Z/UrVu3vL+Dbr755tS4ceP0xRdfVHfTqt3a+7A//OEPqUWLFmngwIFp9uzZqaioKL399tsppS/XnXbt2qWbb7650uVVtJ9b+87uunXrpgULFuSGPfHEE6lOnTrp/fffr76G1aAHH3wwNWvWLDVo0CD16dMnjRs3Ls2dOzc3vpC/nXfccce8u+bX1Lp16/TjH/84933VqlWpY8eOW80TGimtP4fXX3996tq1a6X52RL+Fq7u9hW6jx4yZEjq1KlT3lOdxx9/fBo0aFBKKaX58+eniEjPP/98bvy///3vVFpamu6///6UUkonnnhihecYtfmERkobPk4X+oRGSinttddeacqUKWn16tVp5513To8++mi64YYb8p7QKC0tTWeffXZNNadWbSiXFT2h0bx58/Sf//wnN2zixIlbzHFuW+MJjS3cvHnz4qWXXsp1flivXr0YNGhQ3HnnnRER8b3vfS/uvffe2GuvveKiiy6KF154oaDl7rHHHlFcXJz7/vrrr8eee+6ZV53t27dvrF69OubNm1eNLap5hxxySMyZMyfvs+adMj179sz9u1GjRlFWVhaLFi2KiC/zsP/+++d1ANu7d++85b/++uvRt2/fvGF9+/aNv/3tb5nrSLcqli5dGkceeWT06NEjLr300irNe+GFF8Yrr7wSv/nNb6Ju3boxePDgDd4ptrU47bTT4tBDD43dd989TjjhhHjwwQfjqaeeitmzZ+dNd8MNN6yzXm6J/YVUl0LztjXalLavXr06IiK+//3vx7e+9a3o1atXTJo0KYqKiuKBBx5YZ/p+/frFAQccEJdcckmly7zllluicePG0aBBgzj66KPjO9/5TpX3AbWlPB+nn356DBs2LPbee++44YYbolu3brnjZEppi+zUu6YUkjPyVSVna54z1KtXL/bZZ594/fXXN2u8NeHZZ5+Nww47LNq1axdlZWUxePDg+PDDD/M6223YsGHsvPPOue+tW7eOzp075z2x0Lp169w5V7m1z7N69+6dl7PNdbxo2bJlHHnkkTFlypSYNGlSHHnkkbmnnisyZ86cqFu3bhx00EHrXe4+++yT+/cbb7wRK1euzDuPrF+/fnz1q1+t0nrSp0+faNy4cZSVlcUvf/nLmDx5cu6u2F133TX69OmTWzffeOON+N3vflfp0yYjRoyIBx54ID744IN44IEHNvhUSm0oP6cs35ev77i1re3z185NZebNmxc9e/aMBg0a5IZ99atfzZvm9ddfj969e+ctq2/fvrFs2bL4xz/+UY1R15xf/epXuXWjd+/eceCBB8ZNN90Ue++9d+y66665fhWfe+65WLRoUQwcODA3byH7ubV17Ngx2rdvn/veu3fvLerv529961uxcOHCeOyxx6Jfv34xY8aM+MpXvpJ3F/z6/naeM2dOfO1rX6vwyfwlS5bEP//5z7z1rG7dutGrV6+aa1AtWF8Ojz/++Pjss89ip512itNOOy0efvjhautLaXOp7vZVZR+92267Rd26dXPf27Ztm3fdpl69erHffvvlxrdo0SK6deuWO56W79PWtPb32lDV4/T6DB8+PCZNmhTPPfdcLFu2LAYMGLDONFvzcXFjclneMXq53r17x7Jly2LBggU1Hi9Vo6Cxhbvjjjti1apV0a5du6hXr17Uq1cvJk6cGL/4xS/io48+iiOOOCLeeeedGD16dCxcuDC+8Y1vFNTx2dqPla1vJ7el7fwaNWoUXbp0yfus2bHS2idcRUVFuYsVhVyEryhXW/rF+08++ST69+8fjRs3jocffrjKr4tq2bJldO3aNQ477LC4995749e//nW8+OKLNRRttn3lK1+J+vXrx9/+9re84W3atFlnvdyWXsu1IZXlbVtQlba3bds2IiJ69OiRG1ZSUhI77bRTvPvuuxXOc/XVV8d9990Xr7zySoXjTz755JgzZ0688cYb8dlnn8Udd9yRd5KXZRXlIyKie/fuuXx07do1lixZkslH+WtDITkj36bmbEs7j1rbO++8EwMGDIjdd989HnrooZg1a1bcfPPNERF5r6er6Pxqfedc67O+nNXk8WL48OExefLkmDJlygYvLKz9+p/KrHnOXdnF56pebLjvvvti7ty58a9//Svee++9+M53vpM3fsSIEfHQQw/F0qVLY9KkSdGpU6f4xje+UeGydt9999h1113jxBNPjO7du8fuu+9ecBzVpUuXLlFUVBR/+ctfKhz/17/+NZo1a5YrMK3vuNW1a9etoohYrqq5qUwhf7+sb5otZT9WfmPbvHnz4vPPP49f/OIX0apVq4j4cr255557IuLL103169cvl7dC93MbUp6nLSVfERENGjSIww47LH7wgx/ECy+8EEOHDs27sWV9+/FC9oNb29/NFakshx06dIh58+bFzTffHKWlpXHmmWfGgQceuMW82rVcdbavKvvojblus+Z+LMvrWlWO0+tz8sknx4svvhiXXXZZDB48uMLXhm9tx8W1VVcut6T99rZCQWMLtmrVqrjrrrvi+uuvz7ure+7cudGpU6fce0C33377GDp0aNx9990xYcKEuO222yIick9gFPLUQI8ePWLOnDl5d6A8//zzUadOnejatWsNtC6bevTosc6F+LW/9+jRY533Zr/wwgvRtWvXvDsIthRLly6Nww8/PIqLi+Oxxx7Lu3NrY5SfOKzvXaxbsz//+c+xcuXK3AUwCrMt560qbe/Vq1eUlJTk3fm3cuXKePvtt6NTp04VzvPVr341jjvuuHX6AyrXtGnT6NKlS3To0GGL24d17tw5dthhh3XuhJw/f34uH9/+9rejuLg4rr322gqX8fHHH9d0mJlSSM7IV5WcrXnOsGrVqpg1a1bsuuuumyXOmvLyyy/HqlWr4vrrr4/9998/unbtGgsXLqy25Vd03rW+nNXk8aJ///6xYsWKWLFiRfTr12+90+6xxx6xevXqeO655wpefpcuXaK4uDjvPHLlypXx8ssvR/fu3QteTocOHWLnnXeOFi1aVDh+4MCBUbdu3bjnnntiypQpMWzYsPX+oT58+PCYMWNGrT2d0aJFizjssMPilltuic8++yxv3AcffBBTp06NQYMG5dqwvuPWSSedFPPnz49HH310nd9JKcWSJUtqriE1oKq5qcyuu+4ar776at75+csvv5w3TY8ePeKFF17Iuwj4wgsvRFlZWbRr164aWlPzym9s69Sp0zoXQ0866aR47bXXYtasWfHggw/mvV99Y/dz7777bt50M2fO3OL/fu7Ro8d6n0pZU8+ePeN3v/tdhRewmzZtGq1bt8716xnx5XWJym6w2ZqsmcPS0tI4+uij48Ybb4wZM2bEzJkz47XXXouIL6/XbIlveNiU9lXXPrpHjx6xatWq+MMf/pAb9uGHH8b8+fNzx9NCru3UlqoepyvTvHnzOProo+O5556r9Bh+0kknxVNPPVXhtrdq1aqCt/esqmou586dm3c8ffHFF6Nx48Z5T9uRDVXv1ZfM+NWvfhUfffRRjBgxYp0O7L797W/HHXfcEYsWLYpevXrFbrvtFsuXL49f/epXuR14q1atorS0NJ588slo3759NGjQoNKO8E4++eS49NJLY8iQIXHZZZfFv/71rxg1alSccsop63QqlHXLly+PDz74IG9YvXr1NnjnUkTEGWecEddff32cd955cfrpp8esWbPW6ez7/PPPj3333TeuvPLKGDRoUMycOTN+9rOfxS233FKdzah2S5YsiTlz5uQNa9asWQwaNCg+/fTTuPvuu2Pp0qWxdOnSiPiyULahi5svvfRSvPTSS3HAAQdEs2bN4s0334wf/OAHsfPOO2ficc7qVFH+lixZEs8991wMGDAgWrZsGX/5y1/i/PPPj7333nud15J9/PHH66yXZWVlW2wnXIXa1LxtyWqi7U2aNIkzzjgjd1dUp06d4sc//nFERBx//PGVzvejH/0odttttwrv2tlSVJTP5s2bx4UXXhiXXnpp7LnnnrHXXnvFlClT4q9//Wuu4+EOHTrEDTfcECNHjoylS5fG4MGDo3PnzvGPf/wj7rrrrmjcuHFcf/31tdCimrexOduWbWrObr755thll12ie/fuccMNN8RHH32UyVf4VKai9m+//faxatWquOmmm+Koo46K559/Pm699dZq+83nn38+rr322jj22GNj+vTp8cADD8Tjjz8eEV++OmDq1Kmb7XhRt27d3F2MGzoH6ty5cwwZMiSGDx8eN954Y+y5557xzjvvrPMamzU1atQovve978WFF14YzZs3j44dO8a1114bn376aYwYMaLa2tG4ceMYNGhQ/M///E8sWbIkhg4dut7pTzvttDj++ONju+22q7YYqupnP/tZ9OnTJ/r16xc//OEPY8cdd4w///nPceGFF0a7du3iRz/6UUHLGThwYDz88MNx4oknxiWXXBKHHXZYbL/99vHaa6/FDTfcEKNGjYpjjz22ZhtTzaojNyeddFJ8//vfj+9+97sxduzYePfdd+O6666LiP/elXrmmWfGhAkTYtSoUTFy5MiYN29eXHrppXHeeedFnTpb/n2SO+64Y/Tp0ydGjBgRq1atyuuUeOedd96o/VyDBg1iyJAhcd1118XSpUvj7LPPjoEDB0abNm1qsinV4sMPP4zjjz8+hg8fHj179oyysrJ4+eWX49prr62ww+aKjBw5Mm666aY44YQTYty4cdG0adN48cUX46tf/Wp069YtRo0aFePHj48uXbrErrvuGjfddFN89NFHW82d0BvK4eTJk+OLL76I/fbbLxo2bBj/+7//G6WlpbmbITp37hy//e1v44QTToiSkpKCrldsTjXRvuraR++yyy5xzDHHxGmnnRY///nPo6ysLMaOHRvt2rXLrb9nn3129OnTJ3eO8Zvf/CaefPLJmkxZwQo5Tlf06rq1nxaO+LIz8FtuuaXSmxxGjx4djz/+eHzjG9+IK6+8Mg444IDc/+U111wTd9xxR+y1116b2qRaU9VznhUrVsSIESPi4osvjnfeeScuvfTSGDly5FZxnNvqbLbeOqh23/zmN9OAAQMqHDdr1qwUEenyyy9P3bt3T6Wlpal58+bpmGOOSW+++WZuuttvvz116NAh1alTJx100EEppco7fXz11VfTIYcckho0aJCaN2+eTjvttLyOHLcEQ4YMSRGxzqdbt24ppYo7OW3atGmaNGlS7vsvf/nL1KVLl1RSUpK+9rWvpTvvvDOvU/CUvuwcq0ePHql+/fqpY8eOeZ2dZVFleenUqVOFw6PAjiDL15nmzZunkpKS1Llz53TGGWekf/zjH7lptpZOwSvK0de//vV04IEHpubNm6fi4uK08847p7PPPjvXOXO5ynI8fvz4dX6ronV0S1UdedtSOyXe1Lavz4oVK9L555+fWrVqlcrKytKhhx6a/vSnP+XGV7bNffe7300RUWmn4FlWWT7LO+odP358at++fWrYsGHq3bt3+t3vfrfOMqZPn5769euX69hw1113TRdccEFauHDhOtNuDdvhpuZsS97+Ntam5Kx8u7vnnnvSfvvtl4qLi1P37t3T008/XUutqbr1tf8nP/lJatu2bSotLU39+vVLd911V9650dqd5aZUcUeba5+DdurUKV1++eVp4MCBqWHDhql169ZpwoQJufHvvvvuJu8zC2n3+jqoraxT8JRS+uyzz9K5556b2rZtm4qLi1OXLl3SnXfemVL6b6fga54/ls8zatSo1LJly1RSUpL69u2bXnrppYJi3VBHoWt64YUXUkSkww8/vMrL2dydgpd7++2309ChQ1ObNm1S/fr1U4cOHdKoUaPSv//979w0heyDvvjiizRx4sS07777poYNG6YmTZqkXr16pZ/+9Kfp008/reFW1IxCcrO+TsFTSun5559PPXv2TMXFxalXr17pnnvuSRGR/vrXv+ammTFjRtp3331TcXFxatOmTRozZkxauXLl5mjiJtvQtpzSl52cR0QaPHjwOuOqup8r38fdcsstaYcddkgNGjRIxx13XFq8eHE1tqrmfP7552ns2LHpK1/5SmratGlq2LBh6tatW7r44otz20khfzvPnTs3HX744alhw4aprKwsfe1rX0tvvPFGSimllStXppEjR6YmTZqkZs2apTFjxqTjjz8+nXDCCZurmTVqQzl8+OGH03777ZeaNGmSGjVqlPbff//01FNP5eafOXNm6tmzZyopKUlZvHRXU+0rZB9d0fZ8zjnn5K5ppZTS4sWL0ymnnJKaNm2a227nz5+fN88dd9yR2rdvn0pLS9NRRx2VrrvuulrvFLxcZcfp8mN0ZddoKjrnWtPanYKn9OX/5fjx49Mee+yRu97Xt2/fNHny5C1mH78+leWyok7BjznmmPSDH/wgtWjRIjVu3Dideuqp6fPPP9/MEVOIopQy/OI4AABgm9S5c+cYPXp0jB49urZDgW3O1KlTY9iwYbFkyZKC+4SBTbF69ero3r17DBw4MK688sraDgfYxgwdOjQ+/vjjeOSRR2o7FAqw5b5bAgAAANhkd911V+y0007Rrl27mDt3bowZMyYGDhyomEGNeeedd+I3v/lNHHTQQbF8+fL42c9+Fm+99VacdNJJtR0aABnnJWDARrvqqquicePGFX6OOOKI2g4Ptjq2OYBt1xlnnFHpMeCMM86o7fDYwn3wwQfxne98J7p37x7nnntuHH/88XHbbbfVdlhsxerUqROTJ0+OfffdN/r27RuvvfZaPPXUU7k+PwGgMl45BWy0xYsXx+LFiyscV1paGu3atdvMEcHWzTYHsO1atGhRLF26tMJxTZo0iVatWm3miAAAYPNT0AAAAAAAADLPK6cAAAAAAIDMU9AAAAAAAAAyT0EDAAAAAADIPAUNAAAAAAAg8xQ0AAAAAACAzFPQAAAAAAAAMk9BAwAAAAAAyLz/D+Zf6/uoa8htAAAAAElFTkSuQmCC", "text/plain": [ - " chr start end split\n", - "region \n", - "chr1:3093998-3096112 chr1 3093998 3096112 train\n", - "chr1:3094663-3096777 chr1 3094663 3096777 train\n", - "chr1:3111367-3113481 chr1 3111367 3113481 train\n", - "chr1:3112727-3114841 chr1 3112727 3114841 train\n", - "chr1:3118939-3121053 chr1 3118939 3121053 train\n", - "... ... ... ... ...\n", - "chrX:169878506-169880620 chrX 169878506 169880620 train\n", - "chrX:169879374-169881488 chrX 169879374 169881488 train\n", - "chrX:169924670-169926784 chrX 169924670 169926784 train\n", - "chrX:169947743-169949857 chrX 169947743 169949857 train\n", - "chrX:169950171-169952285 chrX 169950171 169952285 train\n", - "\n", - "[546993 rows x 4 columns]" + "
" ] }, - "execution_count": 5, "metadata": {}, - "output_type": "execute_result" + "output_type": "display_data" } ], "source": [ - "# Choose the chromosomes for the validation and test sets\n", - "crested.pp.train_val_test_split(\n", - " adata, strategy=\"chr\", val_chroms=[\"chr8\", \"chr10\"], test_chroms=[\"chr9\", \"chr18\"]\n", - ")\n", - "\n", - "# Alternatively, We can split randomly on the regions\n", - "# crested.pp.train_val_test_split(\n", - "# adata, strategy=\"region\", val_size=0.1, test_size=0.1, random_state=42\n", - "# )\n", - "\n", - "print(adata.var[\"split\"].value_counts())\n", - "adata.var" + "%matplotlib inline\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "plt.figure(figsize=(20,3))\n", + "index=1998\n", + "plt.bar(adata.obs_names, np.log1p(1000*adata.X.T[index]))\n", + "print(adata.var.index[index])" ] }, { diff --git a/src/crested/tl/losses/_cosinemse_log.py b/src/crested/tl/losses/_cosinemse_log.py index c4e067d..9ea261a 100644 --- a/src/crested/tl/losses/_cosinemse_log.py +++ b/src/crested/tl/losses/_cosinemse_log.py @@ -30,6 +30,8 @@ class CosineMSELogLoss(keras.losses.Loss): Name of the loss function. reduction Type of reduction to apply to loss. + multiplier + Scalar to multiply the predicted value with. When predicting mean coverage, multiply by 1000 to get actual count. Keep to 1 when predicting insertion counts. Notes ----- @@ -50,11 +52,13 @@ def __init__( max_weight: float = 1.0, name: str | None = "CosineMSELogLoss", reduction: str = "sum_over_batch_size", + multiplier: float = 1000, ): """Initialize the loss function.""" super().__init__(name=name) self.max_weight = max_weight self.reduction = reduction + self.multiplier = multiplier def call(self, y_true, y_pred): """Compute the loss value.""" @@ -64,13 +68,13 @@ def call(self, y_true, y_pred): y_true1 = keras.utils.normalize(y_true, axis=-1) y_pred1 = keras.utils.normalize(y_pred, axis=-1) - log_y_pred_pos = keras.ops.log(1 + 1000 * keras.ops.maximum(y_pred, 0)) + log_y_pred_pos = keras.ops.log(1 + self.multiplier * keras.ops.maximum(y_pred, 0)) log_y_pred_neg = -keras.ops.log( - 1 + keras.ops.abs(1000 * keras.ops.minimum(y_pred, 0)) + 1 + keras.ops.abs(self.multiplier * keras.ops.minimum(y_pred, 0)) ) log_y_pred = log_y_pred_pos + log_y_pred_neg - log_y_true = keras.ops.log(1 + 1000 * y_true) + log_y_true = keras.ops.log(1 + self.multiplier * y_true) mse_loss = keras.ops.mean(keras.ops.square(log_y_pred - log_y_true)) weight = keras.ops.abs(mse_loss) From da4dfd93b7ca5930e927f6eeaef25c0b26f14ad1 Mon Sep 17 00:00:00 2001 From: nkempynck Date: Thu, 28 Nov 2024 15:37:40 +0100 Subject: [PATCH 2/2] notebook update from main --- docs/tutorials/model_training_and_eval.ipynb | 205 +++++++++++++++---- 1 file changed, 168 insertions(+), 37 deletions(-) diff --git a/docs/tutorials/model_training_and_eval.ipynb b/docs/tutorials/model_training_and_eval.ipynb index b5b9b98..e235e7e 100644 --- a/docs/tutorials/model_training_and_eval.ipynb +++ b/docs/tutorials/model_training_and_eval.ipynb @@ -53,10 +53,11 @@ "name": "stderr", "output_type": "stream", "text": [ - "2024-11-28 15:24:43.441628: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n", - "2024-11-28 15:24:43.480900: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", + "2024-11-15 13:47:25.650703: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n", + "2024-11-15 13:47:25.687972: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", "To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 AVX512_FP16 AVX_VNNI AMX_TILE AMX_INT8 AMX_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2024-11-28 15:24:45.799208: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n" + "2024-11-15 13:47:28.235522: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n", + "\u001b[32m2024-11-15 13:47:36.768\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mcrested.tl\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m25\u001b[0m - \u001b[33m\u001b[1mmodiscolite is not installed, 'crested.tl.modisco' module will not be available.\u001b[0m\n" ] } ], @@ -113,15 +114,27 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "2024-11-28T15:25:02.622578+0100 INFO Extracting values from 19 bigWig files...\n" + "2024-11-15T13:43:03.892523+0100 INFO Extracting values from 19 bigWig files...\n" ] + }, + { + "data": { + "text/plain": [ + "AnnData object with n_obs × n_vars = 19 × 546993\n", + " obs: 'file_path'\n", + " var: 'chr', 'start', 'end'" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -145,55 +158,173 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Choose the chromosomes for the validation and test sets\n", - "crested.pp.train_val_test_split(\n", - " adata, strategy=\"chr\", val_chroms=[\"chr8\", \"chr10\"], test_chroms=[\"chr9\", \"chr18\"]\n", - ")\n", - "\n", - "# Alternatively, We can split randomly on the regions\n", - "# crested.pp.train_val_test_split(\n", - "# adata, strategy=\"region\", val_size=0.1, test_size=0.1, random_state=42\n", - "# )\n", - "\n", - "print(adata.var[\"split\"].value_counts())\n", - "adata.var" - ] - }, - { - "cell_type": "code", - "execution_count": 34, + "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "chr1:9458485-9458985\n" + "split\n", + "train 440993\n", + "val 56064\n", + "test 49936\n", + "Name: count, dtype: int64\n" ] }, { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAABjQAAAESCAYAAABaTzw3AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8pXeV/AAAACXBIWXMAAA9hAAAPYQGoP6dpAAA3eklEQVR4nO3deZhWZfk48HtYZhhgQBZZZFURBBU1NAXMpVQQc7kswSVZNU1BcQW+aW4lapqkKaY/BfyKuOZSpt9wwUoxEwStDMqVRKNEQVJZ5Pn94TVvvDAD78AMc4DP57re6+I923s/N2ebc59znqKUUgoAAAAAAIAMq1PbAQAAAAAAAGyIggYAAAAAAJB5ChoAAAAAAEDmKWgAAAAAAACZp6ABAAAAAABknoIGAAAAAACQeQoaAAAAAABA5tXb3D+4evXqWLhwYZSVlUVRUdHm/nkAAAAAACBDUkrxySefxA477BB16lT+HMZmL2gsXLgwOnTosLl/FgAAAAAAyLAFCxZE+/btKx2/2QsaZWVlEfFlYE2aNNncPw8AAAAAAGTI0qVLo0OHDrn6QWU2e0Gj/DVTTZo0UdAAAAAAAAAiIjbYTYVOwQEAAAAAgMxT0AAAAAAAADJPQQMAAAAAAMg8BQ0AAAAAACDzqlTQWLVqVVx88cWx4447Rmlpaey0005xxRVXxOrVq2sqPgAAAAAAgKhXlYmvueaauPXWW2PKlCmx2267xcsvvxzDhg2Lpk2bxjnnnFNTMQIAAAAAANu4KhU0Zs6cGcccc0wceeSRERHRuXPnmDZtWrz88ss1EhwAAAAAAEBEFV85dcABB8TTTz8d8+fPj4iIuXPnxu9///sYMGBApfMsX748li5dmvcBAAAAAACoiio9oTFmzJhYsmRJ7LrrrlG3bt344osv4kc/+lGceOKJlc4zfvz4uPzyyzc5UICa1nns47Udwmb19tVH1nYIAAAAAFCwKj2hcd9998Xdd98d99xzT8yePTumTJkS1113XUyZMqXSecaNGxdLlizJfRYsWLDJQQMAAAAAANuWKj2hceGFF8bYsWPjhBNOiIiIPfbYI955550YP358DBkypMJ5SkpKoqSkZNMjBQAAAAAAtllVekLj008/jTp18mepW7durF69ulqDAgAAAAAAWFOVntA46qij4kc/+lF07Ngxdtttt3jllVfiJz/5SQwfPrym4gMAAAAAAKhaQeOmm26KSy65JM4888xYtGhR7LDDDnH66afHD37wg5qKDwAAAAAAoGoFjbKyspgwYUJMmDChhsIBAAAAAABYV5X60AAAAAAAAKgNChoAAAAAAEDmKWgAAAAAAACZp6ABAAAAAABknoIGAAAAAACQeQoaAAAAAABA5iloAAAAAAAAmaegAQAAAAAAZJ6CBgAAAAAAkHkKGgAAAAAAQOYpaAAAAAAAAJmnoAEAAAAAAGSeggYAAAAAAJB5ChoAAAAAAEDmKWgAAAAAAACZp6ABAAAAAABknoIGAAAAAACQeQoaAAAAAABA5iloAAAAAAAAmaegAQAAAAAAZJ6CBgAAAAAAkHkKGgAAAAAAQOYpaAAAAAAAAJmnoAEAAAAAAGSeggYAAAAAAJB59Wo7AAAAAACoLZ3HPl7bIWxWb1995EbPK1dAbfOEBgAAAAAAkHkKGgAAAAAAQOYpaAAAAAAAAJmnoAEAAAAAAGSeggYAAAAAAJB5ChoAAAAAAEDmKWgAAAAAAACZp6ABAAAAAABknoIGAAAAAACQeQoaAAAAAABA5iloAAAAAAAAmaegAQAAAAAAZF692g4AgC1P57GP13YIm9XbVx9Z2yEAAAAAbPM8oQEAAAAAAGSeJzQAAAAAAKgV3gJBVXhCAwAAAAAAyDwFDQAAAAAAIPMUNAAAAAAAgMyrch8a7733XowZMyaeeOKJ+Oyzz6Jr165xxx13RK9evWoiPgAA2GjexwsAALD1qFJB46OPPoq+ffvGIYccEk888US0atUq3njjjdhuu+1qKDwAYFvhwjMAAACwPlUqaFxzzTXRoUOHmDRpUm5Y586dqzsmANhquEhPTbBeAQAAsC2qUh8ajz32WOyzzz5x/PHHR6tWrWLvvfeO22+/fb3zLF++PJYuXZr3AQAAAAAAqIoqFTTefPPNmDhxYuyyyy7xf//3f3HGGWfE2WefHXfddVel84wfPz6aNm2a+3To0GGTgwYAAAAAALYtVXrl1OrVq2OfffaJq666KiIi9t577/jzn/8cEydOjMGDB1c4z7hx4+K8887LfV+6dKmiBgAAAACw1fKaWKgZVSpotG3bNnr06JE3rHv37vHQQw9VOk9JSUmUlJRsXHRQCQcFAAAAAIBtS5VeOdW3b9+YN29e3rD58+dHp06dqjUoAAAAAACANVWpoHHuuefGiy++GFdddVX8/e9/j3vuuSduu+22OOuss2oqPgAAAAAAgKoVNPbdd994+OGHY9q0abH77rvHlVdeGRMmTIiTTz65puIDAAAAAACoWh8aERHf/OY345vf/GZNxAIAAAAAAFChKj2hAQAAAAAAUBsUNAAAAAAAgMxT0AAAAAAAADKvyn1oAAAAANSGzmMfr+0QNqu3rz6ytkMAgExR0ICtnBN+AAAAAGBr4JVTAAAAAABA5nlCAwAA8FQnAACQeZ7QAAAAAAAAMk9BAwAAAAAAyDwFDQAAAAAAIPMUNAAAAAAAgMxT0AAAAAAAADJPQQMAAAAAAMg8BQ0AAAAAACDzFDQAAAAAAIDMU9AAAAAAAAAyT0EDAAAAAADIvHq1HQD/1Xns47Udwmb19tVH1nYIAAAAAABsITyhAQAAAAAAZJ6CBgAAAAAAkHkKGgAAAAAAQOYpaAAAAAAAAJmnoAEAAAAAAGSeggYAAAAAAJB5ChoAAAAAAEDm1avtAAAAALYkncc+XtshbFZvX31kbYcAAAAR4QkNAAAAAABgC6CgAQAAAAAAZJ6CBgAAAAAAkHkKGgAAAAAAQOYpaAAAAAAAAJmnoAEAAAAAAGSeggYAAAAAAJB5ChoAAAAAAEDmKWgAAAAAAACZp6ABAAAAAABknoIGAAAAAACQeQoaAAAAAABA5iloAAAAAAAAmaegAQAAAAAAZF692g4AAAAAtmWdxz5e2yFsVm9ffWRthwAAbKE8oQEAAAAAAGSeggYAAAAAAJB5ChoAAAAAAEDmbVJBY/z48VFUVBSjR4+upnAAAAAAAADWtdEFjT/+8Y9x2223Rc+ePaszHgAAAAAAgHVsVEFj2bJlcfLJJ8ftt98ezZo1q+6YAAAAAAAA8mxUQeOss86KI488Mg499NANTrt8+fJYunRp3gcAAAAAAKAq6lV1hnvvvTdmz54df/zjHwuafvz48XH55ZdXOTAAAAAAAIByVXpCY8GCBXHOOefE3XffHQ0aNChonnHjxsWSJUtynwULFmxUoAAAAAAAwLarSk9ozJo1KxYtWhS9evXKDfviiy/it7/9bfzsZz+L5cuXR926dfPmKSkpiZKSkuqJFgAAAAAA2CZVqaDxjW98I1577bW8YcOGDYtdd901xowZs04xAwAAAAAAoDpUqaBRVlYWu+++e96wRo0aRYsWLdYZDgAAAAAAUF2q1IcGAAAAAABAbajSExoVmTFjRjWEAQAAAAAAUDlPaAAAAAAAAJmnoAEAAAAAAGSeggYAAAAAAJB5ChoAAAAAAEDmKWgAAAAAAACZp6ABAAAAAABknoIGAAAAAACQeQoaAAAAAABA5iloAAAAAAAAmaegAQAAAAAAZJ6CBgAAAAAAkHkKGgAAAAAAQOYpaAAAAAAAAJmnoAEAAAAAAGSeggYAAAAAAJB5ChoAAAAAAEDmKWgAAAAAAACZp6ABAAAAAABknoIGAAAAAACQeQoaAAAAAABA5iloAAAAAAAAmaegAQAAAAAAZJ6CBgAAAAAAkHkKGgAAAAAAQOYpaAAAAAAAAJmnoAEAAAAAAGSeggYAAAAAAJB5ChoAAAAAAEDmKWgAAAAAAACZp6ABAAAAAABknoIGAAAAAACQeQoaAAAAAABA5iloAAAAAAAAmaegAQAAAAAAZJ6CBgAAAAAAkHkKGgAAAAAAQOYpaAAAAAAAAJmnoAEAAAAAAGSeggYAAAAAAJB5ChoAAAAAAEDmKWgAAAAAAACZp6ABAAAAAABknoIGAAAAAACQeQoaAAAAAABA5lWpoDF+/PjYd999o6ysLFq1ahXHHntszJs3r6ZiAwAAAAAAiIgqFjSee+65OOuss+LFF1+M6dOnx6pVq+Lwww+P//znPzUVHwAAAAAAQNSrysRPPvlk3vdJkyZFq1atYtasWXHggQdWa2AAAAAAAADlqlTQWNuSJUsiIqJ58+aVTrN8+fJYvnx57vvSpUs35ScBAAAAAIBt0EZ3Cp5SivPOOy8OOOCA2H333Sudbvz48dG0adPcp0OHDhv7kwAAAAAAwDZqowsaI0eOjFdffTWmTZu23unGjRsXS5YsyX0WLFiwsT8JAAAAAABsozbqlVOjRo2Kxx57LH77299G+/bt1zttSUlJlJSUbFRwAAAAAAAAEVUsaKSUYtSoUfHwww/HjBkzYscdd6ypuAAAAAAAAHKqVNA466yz4p577olHH300ysrK4oMPPoiIiKZNm0ZpaWmNBAgAAAAAAFClPjQmTpwYS5YsiYMPPjjatm2b+9x33301FR8AAAAAAEDVXzkFAAAAAACwuVXpCQ0AAAAAAIDaoKABAAAAAABknoIGAAAAAACQeQoaAAAAAABA5iloAAAAAAAAmaegAQAAAAAAZJ6CBgAAAAAAkHkKGgAAAAAAQOYpaAAAAAAAAJmnoAEAAAAAAGSeggYAAAAAAJB5ChoAAAAAAEDmKWgAAAAAAACZp6ABAAAAAABknoIGAAAAAACQeQoaAAAAAABA5iloAAAAAAAAmaegAQAAAAAAZJ6CBgAAAAAAkHkKGgAAAAAAQOYpaAAAAAAAAJmnoAEAAAAAAGSeggYAAAAAAJB5ChoAAAAAAEDmKWgAAAAAAACZp6ABAAAAAABknoIGAAAAAACQeQoaAAAAAABA5iloAAAAAAAAmaegAQAAAAAAZJ6CBgAAAAAAkHkKGgAAAAAAQOYpaAAAAAAAAJmnoAEAAAAAAGSeggYAAAAAAJB5ChoAAAAAAEDmKWgAAAAAAACZp6ABAAAAAABknoIGAAAAAACQeQoaAAAAAABA5iloAAAAAAAAmaegAQAAAAAAZJ6CBgAAAAAAkHkKGgAAAAAAQOZtVEHjlltuiR133DEaNGgQvXr1it/97nfVHRcAAAAAAEBOlQsa9913X4wePTq+//3vxyuvvBJf+9rX4ogjjoh33323JuIDAAAAAACoekHjJz/5SYwYMSJOPfXU6N69e0yYMCE6dOgQEydOrIn4AAAAAAAAol5VJl6xYkXMmjUrxo4dmzf88MMPjxdeeKHCeZYvXx7Lly/PfV+yZElERCxdurSqsW71Vi//tLZD2Kw2ZR2Qq8LJVeHkqnByVTi5KpxcFU6uCidXhZOrwskV1c06VTi5oiZYrwonV4WTq8LJFRH/zUtKab3TFaUNTbGGhQsXRrt27eL555+PPn365IZfddVVMWXKlJg3b94681x22WVx+eWXF/oTAAAAAADANmjBggXRvn37SsdX6QmNckVFRXnfU0rrDCs3bty4OO+883LfV69eHYsXL44WLVpUOg+bz9KlS6NDhw6xYMGCaNKkSW2Hk2lyVTi5KpxcFU6uCidXhZOrwslV4eSqcHJVOLkqnFwVRp4KJ1eFk6vCyVXh5KpwclU4ucqelFJ88sknscMOO6x3uioVNFq2bBl169aNDz74IG/4okWLonXr1hXOU1JSEiUlJXnDtttuu6r8LJtBkyZNbLwFkqvCyVXh5KpwclU4uSqcXBVOrgonV4WTq8LJVeHkqjDyVDi5KpxcFU6uCidXhZOrwslVtjRt2nSD01SpU/Di4uLo1atXTJ8+PW/49OnT815BBQAAAAAAUJ2q/Mqp8847L0455ZTYZ599onfv3nHbbbfFu+++G2eccUZNxAcAAAAAAFD1gsagQYPiww8/jCuuuCLef//92H333ePXv/51dOrUqSbio4aVlJTEpZdeus5rwViXXBVOrgonV4WTq8LJVeHkqnByVTi5KpxcFU6uCidXhZGnwslV4eSqcHJVOLkqnFwVTq62XEUppVTbQQAAAAAAAKxPlfrQAAAAAAAAqA0KGgAAAAAAQOYpaAAAAAAAAJmnoAEAAAAAAGSeggZsohkzZkRRUVF8/PHHtR0KAJBBb7/9dhQVFcWcOXNqOxRq0MEHHxyjR4+u7TCgQp07d44JEybkvhcVFcUjjzxSa/FsbSZPnhzbbbdd7vtll10We+21V63FA1uzoUOHxrHHHlvty7Udb1v8/27ZFDS2Ei+88ELUrVs3+vfvX/A82+If10OHDo2ioqJ1PlXJ29aoshOCxYsXx6hRo6Jbt27RsGHD6NixY5x99tmxZMmSgpd99NFHR8eOHaNBgwbRtm3bOOWUU2LhwoW58Wuuh5dddlmF/z9rft5+++1qaHH1Wt8J1cEHH7xOG0444YS8aSpr67333lvpOrvmZ0tVHXl75JFHYvLkyRvM0YwZM2q+QVWwqW3fkMcffzz222+/KC0tjZYtW8Zxxx2XG1e+zbVq1So++eSTvPn22muvuOyyyyqMpaSkJLp27RpXXXVVfPHFF1WKp6Zt6I+amTNnxte//vVo1KhRbLfddnHwwQfHZ599ljfNs88+GwMGDIgWLVpEw4YNo0ePHnH++efHe++9t1Vuh5uasy15+9tY1bGebclq6uLBpqqOfeb6lG//Z5xxxjrjzjzzzCgqKoqhQ4dGRMQvfvGLuPLKK6vtt6uqfP9e/mnWrFkceOCB8dxzz0VExFFHHRWHHnpohfPOnDkzioqKYvbs2bnl1KtXL95777286d5///2oV69erZyTLViwIEaMGBE77LBDFBcXR6dOneKcc86JDz/8MDdNIcetlFLcdtttsd9++0Xjxo1ju+22i3322ScmTJgQn3766WZtU3UpJDdre//99+OII47YjFHWrjWP5fXr14+ddtopLrjggvjPf/5T26Fl1qJFi+L000+Pjh07RklJSbRp0yb69esXM2fOrO3QthibksO1i5BZVBPt2xr30YUq9Di9vut45efm3bt3X2fc/fffH0VFRdG5c+e84StWrIhrr7029txzz2jYsGG0bNky+vbtG5MmTYqVK1duarNqRSG5/PrXvx5PP/30Zo6M6qKgsZW48847Y9SoUfH73/8+3n333Wpd9ooVK6p1ebWtf//+8f777+d9pk2bVtthZdLChQtj4cKFcd1118Vrr70WkydPjieffDJGjBhR8DIOOeSQuP/++2PevHnx0EMPxRtvvBHf/va3K5z2ggsuyPt/ad++fVxxxRV5wzp06FBdzdtsTjvttLw2/PznP19nmkmTJq2zXh577LHx05/+NG9YRdNurQrJW0TEoEGD8qbr3bv3OvP26dNnM0e/aQpte0UeeuihOOWUU2LYsGExd+7ceP755+Okk05aZ7pPPvkkrrvuuoJjmTdvXpx99tlx8cUXFzRfVsycOTP69+8fhx9+eLz00kvxxz/+MUaOHBl16vz3FOjnP/95HHroodGmTZt46KGH4i9/+UvceuutsWTJkrj++uu3ue2wkJyV2xq3v41RlZxR/TZln1mIDh06xL333ptXoPr8889j2rRp0bFjx9yw5s2bR1lZ2Ub9RkopVq1atcmxRkQ89dRT8f7778dzzz0XTZo0iQEDBsRbb70VI0aMiGeeeSbeeeeddea58847Y6+99oqvfOUruWE77LBD3HXXXXnTTZkyJdq1a1ctcVbFm2++Gfvss0/Mnz8/pk2bFn//+9/j1ltvjaeffjp69+4dixcvzk27oePWKaecEqNHj45jjjkmnn322ZgzZ05ccskl8eijj8ZvfvObzd62TVWV3KypTZs2UVJSspmjrV3lfwe++eab8cMf/jBuueWWuOCCC2o7rMz61re+FXPnzo0pU6bE/Pnz47HHHouDDz640nWKdW3tOayJ9m1t++iqKOQ43bx58w0up1GjRrFo0aJ1Ckt33nln3nlLxJfX+/r16xdXX311fPe7340XXnghXnrppTjrrLPipptuij//+c+b1qhaUkguDzzwwGjRokUtREe1SGzxli1blsrKytJf//rXNGjQoHT55Zfnxi1evDiddNJJqWXLlqlBgwapS5cu6c4770wppRQReZ+DDjoopZTSkCFD0jHHHJOuuuqq1LZt29SpU6eUUkqvvvpqOuSQQ1KDBg1S8+bN02mnnZY++eSTzd3cTVLetspERLr99tvTsccem0pLS1OXLl3So48+mjfN448/nnbZZZfUoEGDdPDBB6dJkyaliEgfffRRbpoHH3ww9ejRIxUXF6dOnTql6667roZaVD02lJc13X///am4uDitXLlyo37r0UcfTUVFRWnFihUppZTeeuutFBHplVdeWWfaTp06pRtuuGGjfmdzWl/+DjrooHTOOeesd/6ISA8//HBBv1WVabOupvJWyLy1bVPbXpmVK1emdu3apf/3//5fpdOUb3MXXnhhaty4cfrnP/+ZG7fnnnumSy+9dL2xHHrooWn//fffqPhqyvryud9++6WLL7640nkXLFiQiouL0+jRoyscv+a+vdzWsB1uSs5S2rK3v421KTkr3+6mTZuWevfunUpKSlKPHj3Ss88+WzPB1oD1tf/6669Pu+++e2rYsGFq3759+t73vpd3jjhp0qTUtGnT9Mtf/jJ17do1lZaWpm9961tp2bJlafLkyalTp05pu+22SyNHjkyrVq3KzdepU6d0xRVXpBNPPDE1atQotW3bNt144415v13T61x5u/fYY490991354ZPnTo17bHHHumYY45JQ4YMqTCWzz//PF144YWpffv2qbi4OHXp0iW3f3722WdTRKQnn3wy9erVK9WvXz8988wz6fPPP0+jRo1K22+/fSopKUl9+/ZNL730UkGxVnRO9Y9//CNFRLr11lvTypUrU+vWrdNll12WN99//vOfVFZWlm666aa85Vx88cVpl112yZu2W7du6ZJLLkkRkd56660Cs7jp+vfvn9q3b58+/fTTvOHvv/9+atiwYTrjjDNSShs+bt13330pItIjjzyyzm+sXr06ffzxxzXTgBpUaG7WPq9eez/+/PPPpz333DOVlJSkXr16pYcffnid9WnGjBlp3333TcXFxalNmzZpzJgxG/03weZW0T7s1FNPTW3atEnt2rVLEydOzBs3a9asFBHpjTfeSCkVvp8rd+mll6Y999wz3Xrrral9+/aptLQ0ffvb367wvCKLPvrooxQRacaMGZVOU8jfzn/605/SgAEDUllZWWrcuHE64IAD0t///veU0pfnraNGjUpNmzZNzZs3TxdddFEaPHhwwX+XZl0hObz00ktThw4dUnFxcWrbtm0aNWpUSunLfdna12uypibaV+g+unx7/vGPf5zatGmTmjdvns4888zcNYaUvrwWdsopp6TtttsulZaWpv79+6f58+fnLXPSpEmpQ4cOqbS0NB177LHpuuuuq3A73lwKOU6v7/pJSv/dF40cOTKdeuqpueELFixIJSUlaezYsblrfCmldM0116Q6deqk2bNnr7OsFStWpGXLllVL2za3QnK59v9v+Xp12WWXpe233z6VlZWl7373u2n58uWbOXoK4baxrcB9990X3bp1i27dusV3vvOdmDRpUqSUIiLikksuib/85S/xxBNPxOuvvx4TJ06Mli1bRkTESy+9FBH/vYvrF7/4RW6ZTz/9dLz++usxffr0+NWvfhWffvpp9O/fP5o1axZ//OMf44EHHoinnnoqRo4cufkbXMMuv/zyGDhwYLz66qsxYMCAOPnkk3N3GCxYsCCOO+64GDBgQMyZMydOPfXUGDt2bN78s2bNioEDB8YJJ5wQr732Wlx22WVxySWXxOTJk2uhNdVvyZIl0aRJk6hXr16V5128eHFMnTo1+vTpE/Xr16+B6LJp6tSp0bJly9htt93iggsuWOc1P1RsW87bxrZ99uzZ8d5770WdOnVi7733jrZt28YRRxxR4Z01J554YnTp0iWuuOKKKsVWWlq6xTx6vGjRovjDH/4QrVq1ij59+kTr1q3joIMOit///ve5aR544IFYsWJFXHTRRRUuY8336G4LCskZ+aqSswsvvDDOP//8eOWVV6JPnz5x9NFHr/eVMFuKOnXqxI033hh/+tOfYsqUKfHMM8+ss019+umnceONN8a9994bTz75ZMyYMSOOO+64+PWvfx2//vWv43//93/jtttuiwcffDBvvh//+MfRs2fPmD17dowbNy7OPffcmD59et40m+N4MWzYsJg0aVLu+5133hnDhw9f7zyDBw+Oe++9N2688cZ4/fXX49Zbb43GjRvnTXPRRRfF+PHj4/XXX4+ePXvGRRddFA899FBMmTIlZs+eHV26dIl+/fpt9N2uDRs2jIiIlStXRr169WLw4MExefLk3N8KEf/dD5588sl58x599NHx0Ucf5dbl3//+97F48eI46qijNiqWjbV48eL4v//7vzjzzDOjtLQ0b1ybNm3i5JNPjvvuuy+vTWta87g1derU6NatWxxzzDHrTFdUVBRNmzat/gbUoE3NTblPPvkkjjrqqNhjjz1i9uzZceWVV8aYMWPypnnvvfdiwIABse+++8bcuXNj4sSJcccdd8QPf/jDam/X5lK+bpxwwgkxderUvHH33HNP9O7dO3baaaeIKGw/t7a///3vcf/998cvf/nLePLJJ2POnDlx1lln1Vh7qlPjxo2jcePG8cgjj8Ty5csrnW59fzu/9957ceCBB0aDBg3imWeeiVmzZsXw4cNzT6Ndc801MXXq1Jg0aVI8//zzsXTp0q2qX5cN5fDBBx+MG264IX7+85/H3/72t3jkkUdijz32iIgvX2G49tsKsqYm2leVffSzzz4bb7zxRjz77LMxZcqUmDx5ct41l6FDh8bLL78cjz32WMycOTNSSjFgwIDc8eAPf/hDDB8+PM4888yYM2dOHHLIIbW+P6vqcXp9RowYEffdd1/uNV2TJ0+O/v37R+vWrfOmmzp1ahx66KGx9957r7OM+vXrR6NGjTayNbVrY3NZfi302WefjWnTpsXDDz8cl19++eYKm6qoxWIK1aRPnz5pwoQJKaUvq5AtW7ZM06dPTymldNRRR6Vhw4ZVOF9lld0hQ4ak1q1b51Uhb7vtttSsWbO86uzjjz+e6tSpkz744INqblHNGTJkSKpbt25q1KhR3ueKK65IKaXc3Wjlli1bloqKitITTzyRUkpp3LhxqXv37mn16tW5acaMGZP3hMZJJ52UDjvssLzfvfDCC1OPHj1quHUbr9AnNP7973+njh07pu9///tVWv5FF12UGjZsmCIi7b///unf//53btzW/oTGbbfdlqZPn55ee+21NG3atNS5c+d06KGH5k0TEalBgwbrrJfld4StPe2Wfmd4uerI25Z6h/imtr0y06ZNSxGROnbsmB588MH08ssvpxNPPDG1aNEiffjhhyml/G3uySefTPXr18/dKbe+JzS++OKL9MQTT6Ti4uJ00UUXbXTba0Jl+Zw5c2aKiNS8efN05513ptmzZ6fRo0en4uLi3B1a3/ve91KTJk2q9Htbw3a4KTlLacve/jbWpuSsfLu7+uqrc/OtXLkytW/fPl1zzTWbqwmbpKpPc7Zo0SL3vfxp1vJ9TUopnX766alhw4Z5dzj369cvnX766bnvnTp1Sv37989b9qBBg9IRRxyR+74p+8xClLf7X//6VyopKUlvvfVWevvtt1ODBg3Sv/71r0qf0Jg3b16KiNw5+drKn9BY8y7UZcuWpfr166epU6fmhq1YsSLtsMMO6dprr91grGufUy1btiydfvrpqW7duunVV19NKaX0+uuvp4hIzzzzTG6+Aw88MJ144okVLmf06NG5vyWGDRuWzj333PTKK69s1ic0XnzxxfXud3/yk5+kiEj//Oc/N3jc6t69ezr66KM3S9ybQ1Vys74nNCZOnJhatGiRPvvss9z422+/PW99+p//+Z/UrVu3vL+Dbr755tS4ceP0xRdfVHfTqt3a+7A//OEPqUWLFmngwIFp9uzZqaioKL399tsppS/XnXbt2qWbb7650uVVtJ9b+87uunXrpgULFuSGPfHEE6lOnTrp/fffr76G1aAHH3wwNWvWLDVo0CD16dMnjRs3Ls2dOzc3vpC/nXfccce8u+bX1Lp16/TjH/84933VqlWpY8eOW80TGimtP4fXX3996tq1a6X52RL+Fq7u9hW6jx4yZEjq1KlT3lOdxx9/fBo0aFBKKaX58+eniEjPP/98bvy///3vVFpamu6///6UUkonnnhihecYtfmERkobPk4X+oRGSinttddeacqUKWn16tVp5513To8++mi64YYb8p7QKC0tTWeffXZNNadWbSiXFT2h0bx58/Sf//wnN2zixIlbzHFuW+MJjS3cvHnz4qWXXsp1flivXr0YNGhQ3HnnnRER8b3vfS/uvffe2GuvveKiiy6KF154oaDl7rHHHlFcXJz7/vrrr8eee+6ZV53t27dvrF69OubNm1eNLap5hxxySMyZMyfvs+adMj179sz9u1GjRlFWVhaLFi2KiC/zsP/+++d1ANu7d++85b/++uvRt2/fvGF9+/aNv/3tb5nrSLcqli5dGkceeWT06NEjLr300irNe+GFF8Yrr7wSv/nNb6Ju3boxePDgDd4ptrU47bTT4tBDD43dd989TjjhhHjwwQfjqaeeitmzZ+dNd8MNN6yzXm6J/YVUl0LztjXalLavXr06IiK+//3vx7e+9a3o1atXTJo0KYqKiuKBBx5YZ/p+/frFAQccEJdcckmly7zllluicePG0aBBgzj66KPjO9/5TpX3AbWlPB+nn356DBs2LPbee++44YYbolu3brnjZEppi+zUu6YUkjPyVSVna54z1KtXL/bZZ594/fXXN2u8NeHZZ5+Nww47LNq1axdlZWUxePDg+PDDD/M6223YsGHsvPPOue+tW7eOzp075z2x0Lp169w5V7m1z7N69+6dl7PNdbxo2bJlHHnkkTFlypSYNGlSHHnkkbmnnisyZ86cqFu3bhx00EHrXe4+++yT+/cbb7wRK1euzDuPrF+/fnz1q1+t0nrSp0+faNy4cZSVlcUvf/nLmDx5cu6u2F133TX69OmTWzffeOON+N3vflfp0yYjRoyIBx54ID744IN44IEHNvhUSm0oP6cs35ev77i1re3z185NZebNmxc9e/aMBg0a5IZ99atfzZvm9ddfj969e+ctq2/fvrFs2bL4xz/+UY1R15xf/epXuXWjd+/eceCBB8ZNN90Ue++9d+y66665fhWfe+65WLRoUQwcODA3byH7ubV17Ngx2rdvn/veu3fvLerv529961uxcOHCeOyxx6Jfv34xY8aM+MpXvpJ3F/z6/naeM2dOfO1rX6vwyfwlS5bEP//5z7z1rG7dutGrV6+aa1AtWF8Ojz/++Pjss89ip512itNOOy0efvjhautLaXOp7vZVZR+92267Rd26dXPf27Ztm3fdpl69erHffvvlxrdo0SK6deuWO56W79PWtPb32lDV4/T6DB8+PCZNmhTPPfdcLFu2LAYMGLDONFvzcXFjclneMXq53r17x7Jly2LBggU1Hi9Vo6Cxhbvjjjti1apV0a5du6hXr17Uq1cvJk6cGL/4xS/io48+iiOOOCLeeeedGD16dCxcuDC+8Y1vFNTx2dqPla1vJ7el7fwaNWoUXbp0yfus2bHS2idcRUVFuYsVhVyEryhXW/rF+08++ST69+8fjRs3jocffrjKr4tq2bJldO3aNQ477LC4995749e//nW8+OKLNRRttn3lK1+J+vXrx9/+9re84W3atFlnvdyWXsu1IZXlbVtQlba3bds2IiJ69OiRG1ZSUhI77bRTvPvuuxXOc/XVV8d9990Xr7zySoXjTz755JgzZ0688cYb8dlnn8Udd9yRd5KXZRXlIyKie/fuuXx07do1lixZkslH+WtDITkj36bmbEs7j1rbO++8EwMGDIjdd989HnrooZg1a1bcfPPNERF5r6er6Pxqfedc67O+nNXk8WL48OExefLkmDJlygYvLKz9+p/KrHnOXdnF56pebLjvvvti7ty58a9//Svee++9+M53vpM3fsSIEfHQQw/F0qVLY9KkSdGpU6f4xje+UeGydt9999h1113jxBNPjO7du8fuu+9ecBzVpUuXLlFUVBR/+ctfKhz/17/+NZo1a5YrMK3vuNW1a9etoohYrqq5qUwhf7+sb5otZT9WfmPbvHnz4vPPP49f/OIX0apVq4j4cr255557IuLL103169cvl7dC93MbUp6nLSVfERENGjSIww47LH7wgx/ECy+8EEOHDs27sWV9+/FC9oNb29/NFakshx06dIh58+bFzTffHKWlpXHmmWfGgQceuMW82rVcdbavKvvojblus+Z+LMvrWlWO0+tz8sknx4svvhiXXXZZDB48uMLXhm9tx8W1VVcut6T99rZCQWMLtmrVqrjrrrvi+uuvz7ure+7cudGpU6fce0C33377GDp0aNx9990xYcKEuO222yIick9gFPLUQI8ePWLOnDl5d6A8//zzUadOnejatWsNtC6bevTosc6F+LW/9+jRY533Zr/wwgvRtWvXvDsIthRLly6Nww8/PIqLi+Oxxx7Lu3NrY5SfOKzvXaxbsz//+c+xcuXK3AUwCrMt560qbe/Vq1eUlJTk3fm3cuXKePvtt6NTp04VzvPVr341jjvuuHX6AyrXtGnT6NKlS3To0GGL24d17tw5dthhh3XuhJw/f34uH9/+9rejuLg4rr322gqX8fHHH9d0mJlSSM7IV5WcrXnOsGrVqpg1a1bsuuuumyXOmvLyyy/HqlWr4vrrr4/9998/unbtGgsXLqy25Vd03rW+nNXk8aJ///6xYsWKWLFiRfTr12+90+6xxx6xevXqeO655wpefpcuXaK4uDjvPHLlypXx8ssvR/fu3QteTocOHWLnnXeOFi1aVDh+4MCBUbdu3bjnnntiypQpMWzYsPX+oT58+PCYMWNGrT2d0aJFizjssMPilltuic8++yxv3AcffBBTp06NQYMG5dqwvuPWSSedFPPnz49HH310nd9JKcWSJUtqriE1oKq5qcyuu+4ar776at75+csvv5w3TY8ePeKFF17Iuwj4wgsvRFlZWbRr164aWlPzym9s69Sp0zoXQ0866aR47bXXYtasWfHggw/mvV99Y/dz7777bt50M2fO3OL/fu7Ro8d6n0pZU8+ePeN3v/tdhRewmzZtGq1bt8716xnx5XWJym6w2ZqsmcPS0tI4+uij48Ybb4wZM2bEzJkz47XXXouIL6/XbIlveNiU9lXXPrpHjx6xatWq+MMf/pAb9uGHH8b8+fNzx9NCru3UlqoepyvTvHnzOProo+O5556r9Bh+0kknxVNPPVXhtrdq1aqCt/esqmou586dm3c8ffHFF6Nx48Z5T9uRDVXv1ZfM+NWvfhUfffRRjBgxYp0O7L797W/HHXfcEYsWLYpevXrFbrvtFsuXL49f/epXuR14q1atorS0NJ588slo3759NGjQoNKO8E4++eS49NJLY8iQIXHZZZfFv/71rxg1alSccsop63QqlHXLly+PDz74IG9YvXr1NnjnUkTEGWecEddff32cd955cfrpp8esWbPW6ez7/PPPj3333TeuvPLKGDRoUMycOTN+9rOfxS233FKdzah2S5YsiTlz5uQNa9asWQwaNCg+/fTTuPvuu2Pp0qWxdOnSiPiyULahi5svvfRSvPTSS3HAAQdEs2bN4s0334wf/OAHsfPOO2ficc7qVFH+lixZEs8991wMGDAgWrZsGX/5y1/i/PPPj7333nud15J9/PHH66yXZWVlW2wnXIXa1LxtyWqi7U2aNIkzzjgjd1dUp06d4sc//nFERBx//PGVzvejH/0odttttwrv2tlSVJTP5s2bx4UXXhiXXnpp7LnnnrHXXnvFlClT4q9//Wuu4+EOHTrEDTfcECNHjoylS5fG4MGDo3PnzvGPf/wj7rrrrmjcuHFcf/31tdCimrexOduWbWrObr755thll12ie/fuccMNN8RHH32UyVf4VKai9m+//faxatWquOmmm+Koo46K559/Pm699dZq+83nn38+rr322jj22GNj+vTp8cADD8Tjjz8eEV++OmDq1Kmb7XhRt27d3F2MGzoH6ty5cwwZMiSGDx8eN954Y+y5557xzjvvrPMamzU1atQovve978WFF14YzZs3j44dO8a1114bn376aYwYMaLa2tG4ceMYNGhQ/M///E8sWbIkhg4dut7pTzvttDj++ONju+22q7YYqupnP/tZ9OnTJ/r16xc//OEPY8cdd4w///nPceGFF0a7du3iRz/6UUHLGThwYDz88MNx4oknxiWXXBKHHXZYbL/99vHaa6/FDTfcEKNGjYpjjz22ZhtTzaojNyeddFJ8//vfj+9+97sxduzYePfdd+O6666LiP/elXrmmWfGhAkTYtSoUTFy5MiYN29eXHrppXHeeedFnTpb/n2SO+64Y/Tp0ydGjBgRq1atyuuUeOedd96o/VyDBg1iyJAhcd1118XSpUvj7LPPjoEDB0abNm1qsinV4sMPP4zjjz8+hg8fHj179oyysrJ4+eWX49prr62ww+aKjBw5Mm666aY44YQTYty4cdG0adN48cUX46tf/Wp069YtRo0aFePHj48uXbrErrvuGjfddFN89NFHW82d0BvK4eTJk+OLL76I/fbbLxo2bBj/+7//G6WlpbmbITp37hy//e1v44QTToiSkpKCrldsTjXRvuraR++yyy5xzDHHxGmnnRY///nPo6ysLMaOHRvt2rXLrb9nn3129OnTJ3eO8Zvf/CaefPLJmkxZwQo5Tlf06rq1nxaO+LIz8FtuuaXSmxxGjx4djz/+eHzjG9+IK6+8Mg444IDc/+U111wTd9xxR+y1116b2qRaU9VznhUrVsSIESPi4osvjnfeeScuvfTSGDly5FZxnNvqbLbeOqh23/zmN9OAAQMqHDdr1qwUEenyyy9P3bt3T6Wlpal58+bpmGOOSW+++WZuuttvvz116NAh1alTJx100EEppco7fXz11VfTIYcckho0aJCaN2+eTjvttLyOHLcEQ4YMSRGxzqdbt24ppYo7OW3atGmaNGlS7vsvf/nL1KVLl1RSUpK+9rWvpTvvvDOvU/CUvuwcq0ePHql+/fqpY8eOeZ2dZVFleenUqVOFw6PAjiDL15nmzZunkpKS1Llz53TGGWekf/zjH7lptpZOwSvK0de//vV04IEHpubNm6fi4uK08847p7PPPjvXOXO5ynI8fvz4dX6ronV0S1UdedtSOyXe1Lavz4oVK9L555+fWrVqlcrKytKhhx6a/vSnP+XGV7bNffe7300RUWmn4FlWWT7LO+odP358at++fWrYsGHq3bt3+t3vfrfOMqZPn5769euX69hw1113TRdccEFauHDhOtNuDdvhpuZsS97+Ntam5Kx8u7vnnnvSfvvtl4qLi1P37t3T008/XUutqbr1tf8nP/lJatu2bSotLU39+vVLd911V9650dqd5aZUcUeba5+DdurUKV1++eVp4MCBqWHDhql169ZpwoQJufHvvvvuJu8zC2n3+jqoraxT8JRS+uyzz9K5556b2rZtm4qLi1OXLl3SnXfemVL6b6fga54/ls8zatSo1LJly1RSUpL69u2bXnrppYJi3VBHoWt64YUXUkSkww8/vMrL2dydgpd7++2309ChQ1ObNm1S/fr1U4cOHdKoUaPSv//979w0heyDvvjiizRx4sS07777poYNG6YmTZqkXr16pZ/+9Kfp008/reFW1IxCcrO+TsFTSun5559PPXv2TMXFxalXr17pnnvuSRGR/vrXv+ammTFjRtp3331TcXFxatOmTRozZkxauXLl5mjiJtvQtpzSl52cR0QaPHjwOuOqup8r38fdcsstaYcddkgNGjRIxx13XFq8eHE1tqrmfP7552ns2LHpK1/5SmratGlq2LBh6tatW7r44otz20khfzvPnTs3HX744alhw4aprKwsfe1rX0tvvPFGSimllStXppEjR6YmTZqkZs2apTFjxqTjjz8+nXDCCZurmTVqQzl8+OGH03777ZeaNGmSGjVqlPbff//01FNP5eafOXNm6tmzZyopKUlZvHRXU+0rZB9d0fZ8zjnn5K5ppZTS4sWL0ymnnJKaNm2a227nz5+fN88dd9yR2rdvn0pLS9NRRx2VrrvuulrvFLxcZcfp8mN0ZddoKjrnWtPanYKn9OX/5fjx49Mee+yRu97Xt2/fNHny5C1mH78+leWyok7BjznmmPSDH/wgtWjRIjVu3Dideuqp6fPPP9/MEVOIopQy/OI4AABgm9S5c+cYPXp0jB49urZDgW3O1KlTY9iwYbFkyZKC+4SBTbF69ero3r17DBw4MK688sraDgfYxgwdOjQ+/vjjeOSRR2o7FAqw5b5bAgAAANhkd911V+y0007Rrl27mDt3bowZMyYGDhyomEGNeeedd+I3v/lNHHTQQbF8+fL42c9+Fm+99VacdNJJtR0aABnnJWDARrvqqquicePGFX6OOOKI2g4Ptjq2OYBt1xlnnFHpMeCMM86o7fDYwn3wwQfxne98J7p37x7nnntuHH/88XHbbbfVdlhsxerUqROTJ0+OfffdN/r27RuvvfZaPPXUU7k+PwGgMl45BWy0xYsXx+LFiyscV1paGu3atdvMEcHWzTYHsO1atGhRLF26tMJxTZo0iVatWm3miAAAYPNT0AAAAAAAADLPK6cAAAAAAIDMU9AAAAAAAAAyT0EDAAAAAADIPAUNAAAAAAAg8xQ0AAAAAACAzFPQAAAAAAAAMk9BAwAAAAAAyLz/D+Zf6/uoa8htAAAAAElFTkSuQmCC", + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
chrstartendsplit
region
chr1:3093998-3096112chr130939983096112train
chr1:3094663-3096777chr130946633096777train
chr1:3111367-3113481chr131113673113481train
chr1:3112727-3114841chr131127273114841train
chr1:3118939-3121053chr131189393121053train
...............
chrX:169878506-169880620chrX169878506169880620train
chrX:169879374-169881488chrX169879374169881488train
chrX:169924670-169926784chrX169924670169926784train
chrX:169947743-169949857chrX169947743169949857train
chrX:169950171-169952285chrX169950171169952285train
\n", + "

546993 rows × 4 columns

\n", + "
" + ], "text/plain": [ - "
" + " chr start end split\n", + "region \n", + "chr1:3093998-3096112 chr1 3093998 3096112 train\n", + "chr1:3094663-3096777 chr1 3094663 3096777 train\n", + "chr1:3111367-3113481 chr1 3111367 3113481 train\n", + "chr1:3112727-3114841 chr1 3112727 3114841 train\n", + "chr1:3118939-3121053 chr1 3118939 3121053 train\n", + "... ... ... ... ...\n", + "chrX:169878506-169880620 chrX 169878506 169880620 train\n", + "chrX:169879374-169881488 chrX 169879374 169881488 train\n", + "chrX:169924670-169926784 chrX 169924670 169926784 train\n", + "chrX:169947743-169949857 chrX 169947743 169949857 train\n", + "chrX:169950171-169952285 chrX 169950171 169952285 train\n", + "\n", + "[546993 rows x 4 columns]" ] }, + "execution_count": 5, "metadata": {}, - "output_type": "display_data" + "output_type": "execute_result" } ], "source": [ - "%matplotlib inline\n", - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "plt.figure(figsize=(20,3))\n", - "index=1998\n", - "plt.bar(adata.obs_names, np.log1p(1000*adata.X.T[index]))\n", - "print(adata.var.index[index])" + "# Choose the chromosomes for the validation and test sets\n", + "crested.pp.train_val_test_split(\n", + " adata, strategy=\"chr\", val_chroms=[\"chr8\", \"chr10\"], test_chroms=[\"chr9\", \"chr18\"]\n", + ")\n", + "\n", + "# Alternatively, We can split randomly on the regions\n", + "# crested.pp.train_val_test_split(\n", + "# adata, strategy=\"region\", val_size=0.1, test_size=0.1, random_state=42\n", + "# )\n", + "\n", + "print(adata.var[\"split\"].value_counts())\n", + "adata.var" ] }, {