Merge pull request #32 from ihmeuw-msca/update-basic-example

Updated examples, fixed dataframe splitting for rates
ihmeuw-msca · Aug 23, 2023 · add32f6 · add32f6
2 parents 6ab2d90 + 508e18f
commit add32f6
Show file tree

Hide file tree

Showing 4 changed files with 123 additions and 269 deletions.
diff --git a/examples/Basic Splitting Examples.ipynb b/examples/Basic Splitting Examples.ipynb
@@ -116,128 +116,18 @@
    "metadata": {},
    "outputs": [
     {
-     "data": {
-      "text/plain": [
-       "array([0.47619048, 1.19047619, 0.83333333])"
-      ]
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "rmm.count_split_standard_errors(b,rate_pattern,populations,observed_total_SE)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "b=oddm.fit_beta(observed_total,rate_pattern,populations)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.30223366091526715"
-      ]
-     },
-     "execution_count": 10,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "1/oddm.H_diff_beta(b,rate_pattern,populations)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "3.679899347162453"
-      ]
-     },
-     "execution_count": 12,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "oddm.fit_beta(observed_total,rate_pattern,populations)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.7555841522881679"
-      ]
-     },
-     "execution_count": 15,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "oddm.beta_standard_error(\n",
-    "    b,\n",
-    "    rate_pattern,\n",
-    "    populations,\n",
-    "    observed_total_SE,\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {},
-   "outputs": [
-    {
-     "ename": "AttributeError",
-     "evalue": "'RateMultiplicativeModel' object has no attribute 'split_groups'",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
-      "\u001b[1;32m/Users/owlx/Dropbox/UW Math Work/Research with Sasha/IHME work/Splitting Problems/splitting-python-package/examples/Basic Splitting Examples.ipynb Cell 15\u001b[0m in \u001b[0;36m<cell line: 1>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> <a href='vscode-notebook-cell:/Users/owlx/Dropbox/UW%20Math%20Work/Research%20with%20Sasha/IHME%20work/Splitting%20Problems/splitting-python-package/examples/Basic%20Splitting%20Examples.ipynb#X36sZmlsZQ%3D%3D?line=0'>1</a>\u001b[0m estimate,SE,CI\u001b[39m=\u001b[39msplit_datapoint(\n\u001b[1;32m      <a href='vscode-notebook-cell:/Users/owlx/Dropbox/UW%20Math%20Work/Research%20with%20Sasha/IHME%20work/Splitting%20Problems/splitting-python-package/examples/Basic%20Splitting%20Examples.ipynb#X36sZmlsZQ%3D%3D?line=1'>2</a>\u001b[0m     observed_total,\n\u001b[1;32m      <a href='vscode-notebook-cell:/Users/owlx/Dropbox/UW%20Math%20Work/Research%20with%20Sasha/IHME%20work/Splitting%20Problems/splitting-python-package/examples/Basic%20Splitting%20Examples.ipynb#X36sZmlsZQ%3D%3D?line=2'>3</a>\u001b[0m     populations,\n\u001b[1;32m      <a href='vscode-notebook-cell:/Users/owlx/Dropbox/UW%20Math%20Work/Research%20with%20Sasha/IHME%20work/Splitting%20Problems/splitting-python-package/examples/Basic%20Splitting%20Examples.ipynb#X36sZmlsZQ%3D%3D?line=3'>4</a>\u001b[0m     rate_pattern,\n\u001b[1;32m      <a href='vscode-notebook-cell:/Users/owlx/Dropbox/UW%20Math%20Work/Research%20with%20Sasha/IHME%20work/Splitting%20Problems/splitting-python-package/examples/Basic%20Splitting%20Examples.ipynb#X36sZmlsZQ%3D%3D?line=4'>5</a>\u001b[0m     observed_total_SE,\n\u001b[1;32m      <a href='vscode-notebook-cell:/Users/owlx/Dropbox/UW%20Math%20Work/Research%20with%20Sasha/IHME%20work/Splitting%20Problems/splitting-python-package/examples/Basic%20Splitting%20Examples.ipynb#X36sZmlsZQ%3D%3D?line=5'>6</a>\u001b[0m     model\u001b[39m=\u001b[39;49mRateMultiplicativeModel()\n\u001b[1;32m      <a href='vscode-notebook-cell:/Users/owlx/Dropbox/UW%20Math%20Work/Research%20with%20Sasha/IHME%20work/Splitting%20Problems/splitting-python-package/examples/Basic%20Splitting%20Examples.ipynb#X36sZmlsZQ%3D%3D?line=6'>7</a>\u001b[0m )\n\u001b[1;32m      <a href='vscode-notebook-cell:/Users/owlx/Dropbox/UW%20Math%20Work/Research%20with%20Sasha/IHME%20work/Splitting%20Problems/splitting-python-package/examples/Basic%20Splitting%20Examples.ipynb#X36sZmlsZQ%3D%3D?line=7'>8</a>\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39mEstimated incidence in each group\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m      <a href='vscode-notebook-cell:/Users/owlx/Dropbox/UW%20Math%20Work/Research%20with%20Sasha/IHME%20work/Splitting%20Problems/splitting-python-package/examples/Basic%20Splitting%20Examples.ipynb#X36sZmlsZQ%3D%3D?line=8'>9</a>\u001b[0m \u001b[39mprint\u001b[39m(estimate)\n",
-      "File \u001b[0;32m~/Dropbox/UW Math Work/Research with Sasha/IHME work/Splitting Problems/splitting-python-package/src/pydisagg/disaggregate.py:70\u001b[0m, in \u001b[0;36msplit_datapoint\u001b[0;34m(observed_total, bucket_populations, rate_pattern, observed_total_se, model, output_type, CI_method)\u001b[0m\n\u001b[1;32m     21\u001b[0m \u001b[39m\"\"\"Disaggregate a datapoint using the model given as input.\u001b[39;00m\n\u001b[1;32m     22\u001b[0m \u001b[39mDefaults to assuming multiplicativity in the odds ratio\u001b[39;00m\n\u001b[1;32m     23\u001b[0m \n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m     67\u001b[0m \u001b[39m    (point_estimate,standard_error,(CI_lower,CI_upper))\u001b[39;00m\n\u001b[1;32m     68\u001b[0m \u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m     69\u001b[0m \u001b[39mif\u001b[39;00m output_type\u001b[39m==\u001b[39m\u001b[39m'\u001b[39m\u001b[39mtotal\u001b[39m\u001b[39m'\u001b[39m:\n\u001b[0;32m---> 70\u001b[0m     \u001b[39mreturn\u001b[39;00m model\u001b[39m.\u001b[39;49msplit_groups(\n\u001b[1;32m     71\u001b[0m         bucket_populations,\n\u001b[1;32m     72\u001b[0m         observed_total,\n\u001b[1;32m     73\u001b[0m         observed_total_se,\n\u001b[1;32m     74\u001b[0m         rate_pattern,\n\u001b[1;32m     75\u001b[0m         CI_method\u001b[39m=\u001b[39mCI_method\n\u001b[1;32m     76\u001b[0m     )\n\u001b[1;32m     77\u001b[0m \u001b[39mif\u001b[39;00m output_type\u001b[39m==\u001b[39m\u001b[39m'\u001b[39m\u001b[39mrate\u001b[39m\u001b[39m'\u001b[39m:\n\u001b[1;32m     78\u001b[0m     \u001b[39mreturn\u001b[39;00m model\u001b[39m.\u001b[39msplit_groups_rate(\n\u001b[1;32m     79\u001b[0m         bucket_populations,\n\u001b[1;32m     80\u001b[0m         observed_total,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m     83\u001b[0m         CI_method\u001b[39m=\u001b[39mCI_method\n\u001b[1;32m     84\u001b[0m     )\n",
-      "\u001b[0;31mAttributeError\u001b[0m: 'RateMultiplicativeModel' object has no attribute 'split_groups'"
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Estimated incidence in each group\n",
+      "[ 5.9047619  14.76190476 10.33333333]\n",
+      "Standard Error of Above Estimates\n",
+      "[0.47619048 1.19047619 0.83333333]\n"
      ]
     }
    ],
    "source": [
-    "estimate,SE,CI=split_datapoint(\n",
+    "estimate,SE=split_datapoint(\n",
     "    observed_total,\n",
     "    populations,\n",
     "    rate_pattern,\n",
@@ -248,17 +138,12 @@
     "print(estimate)\n",
     "\n",
     "print(\"Standard Error of Above Estimates\")\n",
-    "print(SE)\n",
-    "\n",
-    "print(\"CI Lower\")\n",
-    "print(CI[0])\n",
-    "print(\"CI Upper\")\n",
-    "print(CI[1])\n"
+    "print(SE)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
@@ -268,16 +153,12 @@
       "Estimated incidence in each group\n",
       "[16.29952498  9.75395156  4.94652346]\n",
       "Standard Error of Above Estimates\n",
-      "[2.27869012 0.18133601 0.03997387]\n",
-      "CI Lower\n",
-      "[11.83337441  9.39853952  4.86817611]\n",
-      "CI Upper\n",
-      "[20.76567555 10.1093636   5.02487081]\n"
+      "[2.27869012 0.18133601 0.03997387]\n"
      ]
     }
    ],
    "source": [
-    "estimate,SE,CI=split_datapoint(\n",
+    "estimate,SE=split_datapoint(\n",
     "    observed_total,\n",
     "    populations,\n",
     "    rate_pattern,\n",
@@ -288,14 +169,7 @@
     "print(estimate)\n",
     "\n",
     "print(\"Standard Error of Above Estimates\")\n",
-    "print(SE)\n",
-    "\n",
-    "print(\"CI Lower\")\n",
-    "print(CI[0])\n",
-    "\n",
-    "print(\"CI Upper\")\n",
-    "print(CI[1])\n",
-    "\n"
+    "print(SE)"
    ]
   },
   {
@@ -308,7 +182,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
@@ -354,7 +228,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [
     {
@@ -364,7 +238,7 @@
       "Estimates:\n",
       "[648.14814815 855.55555556]\n",
       "SE\n",
-      "[ 92.59259259 122.22222222]\n"
+      "[1.07758621 1.42241379]\n"
      ]
     }
    ],
@@ -383,18 +257,24 @@
     "#This hack of using the ratio of incidence rates works when we assume multiplicativity in rate rather than in odds\n",
     "baseline_male_rate=1.2\n",
     "baseline_female_rate=1\n",
-    "sex_splitting_model=RateMultiplicativeModel(np.array([baseline_female_rate,baseline_male_rate]))\n",
+    "sex_splitting_model=RateMultiplicativeModel()\n",
+    "pattern = np.array([baseline_female_rate,baseline_male_rate])\n",
     "\n",
     "#Fit with study_props, the study population proportions\n",
-    "sex_splitting_model.fit_beta(\n",
-    "    bucket_populations=study_props,\n",
+    "fitted_beta = sex_splitting_model.fit_beta(\n",
     "    observed_total=observed_study_rate,\n",
-    "    observed_total_se=study_se\n",
+    "    rate_pattern=pattern,\n",
+    "    bucket_populations=study_props,\n",
+    "    #observed_total_se=study_se,\n",
     ")\n",
     "\n",
     "#Predict with population proportions\n",
-    "estimated_by_sex=sex_splitting_model.predict_count(pop_by_sex)\n",
-    "SE_by_sex=sex_splitting_model.predict_count_SE(pop_by_sex)\n",
+    "estimated_by_sex=sex_splitting_model.predict_count(fitted_beta,pattern, pop_by_sex)\n",
+    "SE_by_sex=sex_splitting_model.count_split_standard_errors(\n",
+    "    fitted_beta,\n",
+    "    pattern,\n",
+    "    pop_by_sex,\n",
+    "    observed_total_SE)\n",
     "print(\"Estimates:\")\n",
     "print(estimated_by_sex)\n",
     "\n",
@@ -411,7 +291,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -447,33 +327,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "combined_split_results=np.zeros((4,3))\n",
-    "combined_split_results[:,0]=age_results\n",
-    "\n",
-    "for age_incidence,sex_proportions,population_at_age,rate_ratio,age_id in zip(\n",
-    "    age_results,\n",
-    "    within_age_sex_proportions,\n",
-    "    population_age_pattern,\n",
-    "    within_age_sex_rate_ratios,\n",
-    "    range(len(age_results))\n",
-    "    ):\n",
-    "    combined_split_results[age_id,1:3]=split_datapoint(\n",
-    "        age_incidence,\n",
-    "        population_at_age*sex_proportions,\n",
-    "        np.array([rate_ratio,1]),\n",
-    "        model=sex_splitting_model\n",
-    "    )\n",
-    "    \n",
-    "full_results=pd.DataFrame(combined_split_results,columns=['Total in Age Group','Male','Female'])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
@@ -539,21 +393,32 @@
        "3           80.384758   52.343563  28.041195"
       ]
      },
-     "execution_count": 9,
      "metadata": {},
-     "output_type": "execute_result"
+     "output_type": "display_data"
     }
    ],
    "source": [
-    "full_results"
+    "combined_split_results=np.zeros((4,3))\n",
+    "combined_split_results[:,0]=age_results\n",
+    "\n",
+    "for age_incidence,sex_proportions,population_at_age,rate_ratio,age_id in zip(\n",
+    "    age_results,\n",
+    "    within_age_sex_proportions,\n",
+    "    population_age_pattern,\n",
+    "    within_age_sex_rate_ratios,\n",
+    "    range(len(age_results))\n",
+    "    ):\n",
+    "    combined_split_results[age_id,1:3]=split_datapoint(\n",
+    "        age_incidence,\n",
+    "        population_at_age*sex_proportions,\n",
+    "        np.array([rate_ratio,1]),\n",
+    "        model=sex_splitting_model\n",
+    "    )\n",
+    "    \n",
+    "full_results=pd.DataFrame(combined_split_results,columns=['Total in Age Group','Male','Female'])\n",
+    "\n",
+    "display(full_results)"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {