From aa85560ceca9eae1c3b456d8d3af357ff674057e Mon Sep 17 00:00:00 2001
From: AHsu98 <34590951+AHsu98@users.noreply.github.com>
Date: Wed, 23 Aug 2023 12:56:14 -0700
Subject: [PATCH 1/3] Updated example, fixed some of the docstrings

---
 examples/Basic Splitting Examples.ipynb | 191 ++++--------------------
 src/pydisagg/disaggregate.py            |   3 +-
 2 files changed, 33 insertions(+), 161 deletions(-)
diff --git a/examples/Basic Splitting Examples.ipynb b/examples/Basic Splitting Examples.ipynb
index 6c05063..6d44984 100644
--- a/examples/Basic Splitting Examples.ipynb	
+++ b/examples/Basic Splitting Examples.ipynb	
@@ -110,134 +110,24 @@
     "print(np.sum(bse * rmm.count_diff_beta(b,rate_pattern,populations)))"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([0.47619048, 1.19047619, 0.83333333])"
-      ]
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "rmm.count_split_standard_errors(b,rate_pattern,populations,observed_total_SE)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "b=oddm.fit_beta(observed_total,rate_pattern,populations)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.30223366091526715"
-      ]
-     },
-     "execution_count": 10,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "1/oddm.H_diff_beta(b,rate_pattern,populations)"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
-     "data": {
-      "text/plain": [
-       "3.679899347162453"
-      ]
-     },
-     "execution_count": 12,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "oddm.fit_beta(observed_total,rate_pattern,populations)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.7555841522881679"
-      ]
-     },
-     "execution_count": 15,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "oddm.beta_standard_error(\n",
-    "    b,\n",
-    "    rate_pattern,\n",
-    "    populations,\n",
-    "    observed_total_SE,\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {},
-   "outputs": [
-    {
-     "ename": "AttributeError",
-     "evalue": "'RateMultiplicativeModel' object has no attribute 'split_groups'",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
-      "\u001b[1;32m/Users/owlx/Dropbox/UW Math Work/Research with Sasha/IHME work/Splitting Problems/splitting-python-package/examples/Basic Splitting Examples.ipynb Cell 15\u001b[0m in \u001b[0;36m<cell line: 1>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> <a href='vscode-notebook-cell:/Users/owlx/Dropbox/UW%20Math%20Work/Research%20with%20Sasha/IHME%20work/Splitting%20Problems/splitting-python-package/examples/Basic%20Splitting%20Examples.ipynb#X36sZmlsZQ%3D%3D?line=0'>1</a>\u001b[0m estimate,SE,CI\u001b[39m=\u001b[39msplit_datapoint(\n\u001b[1;32m      <a href='vscode-notebook-cell:/Users/owlx/Dropbox/UW%20Math%20Work/Research%20with%20Sasha/IHME%20work/Splitting%20Problems/splitting-python-package/examples/Basic%20Splitting%20Examples.ipynb#X36sZmlsZQ%3D%3D?line=1'>2</a>\u001b[0m     observed_total,\n\u001b[1;32m      <a href='vscode-notebook-cell:/Users/owlx/Dropbox/UW%20Math%20Work/Research%20with%20Sasha/IHME%20work/Splitting%20Problems/splitting-python-package/examples/Basic%20Splitting%20Examples.ipynb#X36sZmlsZQ%3D%3D?line=2'>3</a>\u001b[0m     populations,\n\u001b[1;32m      <a href='vscode-notebook-cell:/Users/owlx/Dropbox/UW%20Math%20Work/Research%20with%20Sasha/IHME%20work/Splitting%20Problems/splitting-python-package/examples/Basic%20Splitting%20Examples.ipynb#X36sZmlsZQ%3D%3D?line=3'>4</a>\u001b[0m     rate_pattern,\n\u001b[1;32m      <a href='vscode-notebook-cell:/Users/owlx/Dropbox/UW%20Math%20Work/Research%20with%20Sasha/IHME%20work/Splitting%20Problems/splitting-python-package/examples/Basic%20Splitting%20Examples.ipynb#X36sZmlsZQ%3D%3D?line=4'>5</a>\u001b[0m     observed_total_SE,\n\u001b[1;32m      <a href='vscode-notebook-cell:/Users/owlx/Dropbox/UW%20Math%20Work/Research%20with%20Sasha/IHME%20work/Splitting%20Problems/splitting-python-package/examples/Basic%20Splitting%20Examples.ipynb#X36sZmlsZQ%3D%3D?line=5'>6</a>\u001b[0m     model\u001b[39m=\u001b[39;49mRateMultiplicativeModel()\n\u001b[1;32m      <a href='vscode-notebook-cell:/Users/owlx/Dropbox/UW%20Math%20Work/Research%20with%20Sasha/IHME%20work/Splitting%20Problems/splitting-python-package/examples/Basic%20Splitting%20Examples.ipynb#X36sZmlsZQ%3D%3D?line=6'>7</a>\u001b[0m )\n\u001b[1;32m      <a href='vscode-notebook-cell:/Users/owlx/Dropbox/UW%20Math%20Work/Research%20with%20Sasha/IHME%20work/Splitting%20Problems/splitting-python-package/examples/Basic%20Splitting%20Examples.ipynb#X36sZmlsZQ%3D%3D?line=7'>8</a>\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39mEstimated incidence in each group\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m      <a href='vscode-notebook-cell:/Users/owlx/Dropbox/UW%20Math%20Work/Research%20with%20Sasha/IHME%20work/Splitting%20Problems/splitting-python-package/examples/Basic%20Splitting%20Examples.ipynb#X36sZmlsZQ%3D%3D?line=8'>9</a>\u001b[0m \u001b[39mprint\u001b[39m(estimate)\n",
-      "File \u001b[0;32m~/Dropbox/UW Math Work/Research with Sasha/IHME work/Splitting Problems/splitting-python-package/src/pydisagg/disaggregate.py:70\u001b[0m, in \u001b[0;36msplit_datapoint\u001b[0;34m(observed_total, bucket_populations, rate_pattern, observed_total_se, model, output_type, CI_method)\u001b[0m\n\u001b[1;32m     21\u001b[0m \u001b[39m\"\"\"Disaggregate a datapoint using the model given as input.\u001b[39;00m\n\u001b[1;32m     22\u001b[0m \u001b[39mDefaults to assuming multiplicativity in the odds ratio\u001b[39;00m\n\u001b[1;32m     23\u001b[0m \n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m     67\u001b[0m \u001b[39m    (point_estimate,standard_error,(CI_lower,CI_upper))\u001b[39;00m\n\u001b[1;32m     68\u001b[0m \u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m     69\u001b[0m \u001b[39mif\u001b[39;00m output_type\u001b[39m==\u001b[39m\u001b[39m'\u001b[39m\u001b[39mtotal\u001b[39m\u001b[39m'\u001b[39m:\n\u001b[0;32m---> 70\u001b[0m     \u001b[39mreturn\u001b[39;00m model\u001b[39m.\u001b[39;49msplit_groups(\n\u001b[1;32m     71\u001b[0m         bucket_populations,\n\u001b[1;32m     72\u001b[0m         observed_total,\n\u001b[1;32m     73\u001b[0m         observed_total_se,\n\u001b[1;32m     74\u001b[0m         rate_pattern,\n\u001b[1;32m     75\u001b[0m         CI_method\u001b[39m=\u001b[39mCI_method\n\u001b[1;32m     76\u001b[0m     )\n\u001b[1;32m     77\u001b[0m \u001b[39mif\u001b[39;00m output_type\u001b[39m==\u001b[39m\u001b[39m'\u001b[39m\u001b[39mrate\u001b[39m\u001b[39m'\u001b[39m:\n\u001b[1;32m     78\u001b[0m     \u001b[39mreturn\u001b[39;00m model\u001b[39m.\u001b[39msplit_groups_rate(\n\u001b[1;32m     79\u001b[0m         bucket_populations,\n\u001b[1;32m     80\u001b[0m         observed_total,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m     83\u001b[0m         CI_method\u001b[39m=\u001b[39mCI_method\n\u001b[1;32m     84\u001b[0m     )\n",
-      "\u001b[0;31mAttributeError\u001b[0m: 'RateMultiplicativeModel' object has no attribute 'split_groups'"
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Estimated incidence in each group\n",
+      "[ 5.9047619  14.76190476 10.33333333]\n",
+      "Standard Error of Above Estimates\n",
+      "[0.47619048 1.19047619 0.83333333]\n"
      ]
     }
    ],
    "source": [
-    "estimate,SE,CI=split_datapoint(\n",
+    "estimate,SE=split_datapoint(\n",
     "    observed_total,\n",
     "    populations,\n",
     "    rate_pattern,\n",
@@ -248,17 +138,12 @@
     "print(estimate)\n",
     "\n",
     "print(\"Standard Error of Above Estimates\")\n",
-    "print(SE)\n",
-    "\n",
-    "print(\"CI Lower\")\n",
-    "print(CI[0])\n",
-    "print(\"CI Upper\")\n",
-    "print(CI[1])\n"
+    "print(SE)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [
     {
@@ -268,16 +153,12 @@
       "Estimated incidence in each group\n",
       "[16.29952498  9.75395156  4.94652346]\n",
       "Standard Error of Above Estimates\n",
-      "[2.27869012 0.18133601 0.03997387]\n",
-      "CI Lower\n",
-      "[11.83337441  9.39853952  4.86817611]\n",
-      "CI Upper\n",
-      "[20.76567555 10.1093636   5.02487081]\n"
+      "[2.27869012 0.18133601 0.03997387]\n"
      ]
     }
    ],
    "source": [
-    "estimate,SE,CI=split_datapoint(\n",
+    "estimate,SE=split_datapoint(\n",
     "    observed_total,\n",
     "    populations,\n",
     "    rate_pattern,\n",
@@ -288,14 +169,7 @@
     "print(estimate)\n",
     "\n",
     "print(\"Standard Error of Above Estimates\")\n",
-    "print(SE)\n",
-    "\n",
-    "print(\"CI Lower\")\n",
-    "print(CI[0])\n",
-    "\n",
-    "print(\"CI Upper\")\n",
-    "print(CI[1])\n",
-    "\n"
+    "print(SE)"
    ]
   },
   {
@@ -308,7 +182,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [
     {
@@ -354,7 +228,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 26,
    "metadata": {},
    "outputs": [
     {
@@ -364,7 +238,7 @@
       "Estimates:\n",
       "[648.14814815 855.55555556]\n",
       "SE\n",
-      "[ 92.59259259 122.22222222]\n"
+      "[1.07758621 1.42241379]\n"
      ]
     }
    ],
@@ -383,18 +257,24 @@
     "#This hack of using the ratio of incidence rates works when we assume multiplicativity in rate rather than in odds\n",
     "baseline_male_rate=1.2\n",
     "baseline_female_rate=1\n",
-    "sex_splitting_model=RateMultiplicativeModel(np.array([baseline_female_rate,baseline_male_rate]))\n",
+    "sex_splitting_model=RateMultiplicativeModel()\n",
+    "pattern = np.array([baseline_female_rate,baseline_male_rate])\n",
     "\n",
     "#Fit with study_props, the study population proportions\n",
-    "sex_splitting_model.fit_beta(\n",
-    "    bucket_populations=study_props,\n",
+    "fitted_beta = sex_splitting_model.fit_beta(\n",
     "    observed_total=observed_study_rate,\n",
-    "    observed_total_se=study_se\n",
+    "    rate_pattern=pattern,\n",
+    "    bucket_populations=study_props,\n",
+    "    #observed_total_se=study_se,\n",
     ")\n",
     "\n",
     "#Predict with population proportions\n",
-    "estimated_by_sex=sex_splitting_model.predict_count(pop_by_sex)\n",
-    "SE_by_sex=sex_splitting_model.predict_count_SE(pop_by_sex)\n",
+    "estimated_by_sex=sex_splitting_model.predict_count(fitted_beta,pattern, pop_by_sex)\n",
+    "SE_by_sex=sex_splitting_model.count_split_standard_errors(\n",
+    "    fitted_beta,\n",
+    "    pattern,\n",
+    "    pop_by_sex,\n",
+    "    observed_total_SE)\n",
     "print(\"Estimates:\")\n",
     "print(estimated_by_sex)\n",
     "\n",
@@ -411,7 +291,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 27,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -447,7 +327,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 28,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -473,7 +353,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 29,
    "metadata": {},
    "outputs": [
     {
@@ -539,7 +419,7 @@
        "3           80.384758   52.343563  28.041195"
       ]
      },
-     "execution_count": 9,
+     "execution_count": 29,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -547,13 +427,6 @@
    "source": [
     "full_results"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
diff --git a/src/pydisagg/disaggregate.py b/src/pydisagg/disaggregate.py
index 04822c7..08f8c39 100644
--- a/src/pydisagg/disaggregate.py
+++ b/src/pydisagg/disaggregate.py
@@ -55,7 +55,6 @@ def split_datapoint(
             (
                 estimate_in_each_bucket,
                 se_of_estimate_bucket,
-                (CI_lower_in_each_bucket,CI_upper_in_each_bucket)
             )
         Otherwise, if standard errors are not available, 
         this will return a numpy array of the disaggregated estimates
@@ -64,7 +63,7 @@ def split_datapoint(
     -----
     If no observed_total_se is given, returns point estimates
     If observed_total_se is given, then returns a tuple
-        (point_estimate,standard_error,(CI_lower,CI_upper))
+        (point_estimate,standard_error)
     """
     if output_type not in ['total', 'rate']:
         raise ValueError("output_type must be one of either 'total' or 'rate'")

From 611de71093a2587ebaa8c17082cc7ec7c17a975d Mon Sep 17 00:00:00 2001
From: AHsu98 <34590951+AHsu98@users.noreply.github.com>
Date: Wed, 23 Aug 2023 13:02:00 -0700
Subject: [PATCH 2/3] Dropped dropped CI_method input

No longer supporting confidence intervals for now, we can figure out how to add them back later if someone explicitly asks
---
 examples/Basic Splitting Examples.ipynb | 62 +++++++++++--------------
 src/pydisagg/disaggregate.py            |  4 --
 2 files changed, 27 insertions(+), 39 deletions(-)

diff --git a/examples/Basic Splitting Examples.ipynb b/examples/Basic Splitting Examples.ipynb
index 6d44984..72fc5a0 100644
--- a/examples/Basic Splitting Examples.ipynb	
+++ b/examples/Basic Splitting Examples.ipynb	
@@ -112,7 +112,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
@@ -143,7 +143,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
@@ -182,7 +182,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
@@ -228,7 +228,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [
     {
@@ -291,7 +291,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -327,33 +327,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "combined_split_results=np.zeros((4,3))\n",
-    "combined_split_results[:,0]=age_results\n",
-    "\n",
-    "for age_incidence,sex_proportions,population_at_age,rate_ratio,age_id in zip(\n",
-    "    age_results,\n",
-    "    within_age_sex_proportions,\n",
-    "    population_age_pattern,\n",
-    "    within_age_sex_rate_ratios,\n",
-    "    range(len(age_results))\n",
-    "    ):\n",
-    "    combined_split_results[age_id,1:3]=split_datapoint(\n",
-    "        age_incidence,\n",
-    "        population_at_age*sex_proportions,\n",
-    "        np.array([rate_ratio,1]),\n",
-    "        model=sex_splitting_model\n",
-    "    )\n",
-    "    \n",
-    "full_results=pd.DataFrame(combined_split_results,columns=['Total in Age Group','Male','Female'])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
@@ -419,13 +393,31 @@
        "3           80.384758   52.343563  28.041195"
       ]
      },
-     "execution_count": 29,
      "metadata": {},
-     "output_type": "execute_result"
+     "output_type": "display_data"
     }
    ],
    "source": [
-    "full_results"
+    "combined_split_results=np.zeros((4,3))\n",
+    "combined_split_results[:,0]=age_results\n",
+    "\n",
+    "for age_incidence,sex_proportions,population_at_age,rate_ratio,age_id in zip(\n",
+    "    age_results,\n",
+    "    within_age_sex_proportions,\n",
+    "    population_age_pattern,\n",
+    "    within_age_sex_rate_ratios,\n",
+    "    range(len(age_results))\n",
+    "    ):\n",
+    "    combined_split_results[age_id,1:3]=split_datapoint(\n",
+    "        age_incidence,\n",
+    "        population_at_age*sex_proportions,\n",
+    "        np.array([rate_ratio,1]),\n",
+    "        model=sex_splitting_model\n",
+    "    )\n",
+    "    \n",
+    "full_results=pd.DataFrame(combined_split_results,columns=['Total in Age Group','Male','Female'])\n",
+    "\n",
+    "display(full_results)"
    ]
   }
  ],
diff --git a/src/pydisagg/disaggregate.py b/src/pydisagg/disaggregate.py
index 08f8c39..ffd6e61 100644
--- a/src/pydisagg/disaggregate.py
+++ b/src/pydisagg/disaggregate.py
@@ -16,7 +16,6 @@ def split_datapoint(
     observed_total_se: Optional[float] = None,
     model: Optional[DisaggModel] = LogOdds_model(),
     output_type: Literal['total', 'rate'] = 'total',
-    CI_method: Optional[str] = 'delta-wald'
 ) -> Union[tuple, NDArray]:
     """Disaggregate a datapoint using the model given as input.
     Defaults to assuming multiplicativity in the odds ratio
@@ -44,9 +43,6 @@ def split_datapoint(
         in each group, or estimate the rate per population unit. 
     model : Optional[DisaggModel], optional
         DisaggModel to use, by default LMO_model(1)
-    CI_method : Optional[str], optional
-        method to use for confidence intervals,
-        see documentation for standard error methods in DisaggModel, by default 'delta-wald'
 
     Returns
     -------

From 508e18f4a243a9af6d07ebcadd13943e9fdae303 Mon Sep 17 00:00:00 2001
From: AHsu98 <34590951+AHsu98@users.noreply.github.com>
Date: Wed, 23 Aug 2023 13:19:14 -0700
Subject: [PATCH 3/3] Fixed Dataframe Rate Splitting

Adding extra input on whether to clear predicted rates in model.split_to_rates for groups for population zero. This means that we only make predictions for groups that have nonzero population in our study.
---
 examples/Dataframe Splitting Demo.ipynb | 132 +++++++++++-------------
 src/pydisagg/DisaggModel.py             |  13 ++-
 src/pydisagg/disaggregate.py            |   3 +-
 3 files changed, 71 insertions(+), 77 deletions(-)

diff --git a/examples/Dataframe Splitting Demo.ipynb b/examples/Dataframe Splitting Demo.ipynb
index 5d900c4..4e7cc94 100644
--- a/examples/Dataframe Splitting Demo.ipynb	
+++ b/examples/Dataframe Splitting Demo.ipynb	
@@ -469,7 +469,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
@@ -635,7 +635,7 @@
        "7    0.000000    1.763117    1.236883  "
       ]
      },
-     "execution_count": 10,
+     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -648,12 +648,12 @@
     "    baseline_patterns,\n",
     "    use_se=True\n",
     ")\n",
-    "df.reset_index()"
+    "df"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
@@ -672,20 +672,18 @@
        "    .dataframe thead tr th {\n",
        "        text-align: left;\n",
        "    }\n",
-       "\n",
-       "    .dataframe thead tr:last-of-type th {\n",
-       "        text-align: right;\n",
-       "    }\n",
        "</style>\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr>\n",
        "      <th></th>\n",
+       "      <th>demographic_id</th>\n",
        "      <th colspan=\"4\" halign=\"left\">estimate</th>\n",
        "      <th colspan=\"4\" halign=\"left\">se</th>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th></th>\n",
+       "      <th></th>\n",
        "      <th>age_group_0</th>\n",
        "      <th>age_group_1</th>\n",
        "      <th>age_group_2</th>\n",
@@ -695,34 +693,25 @@
        "      <th>age_group_2</th>\n",
        "      <th>age_group_3</th>\n",
        "    </tr>\n",
-       "    <tr>\n",
-       "      <th>demographic_id</th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
+       "      <td>0</td>\n",
        "      <td>0.458789</td>\n",
        "      <td>0.241211</td>\n",
-       "      <td>0.352732</td>\n",
-       "      <td>0.792302</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
        "      <td>0.057566</td>\n",
        "      <td>0.042434</td>\n",
        "      <td>0.052932</td>\n",
        "      <td>0.038152</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>0.370255</td>\n",
-       "      <td>0.180650</td>\n",
+       "      <th>1</th>\n",
+       "      <td>0</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
        "      <td>0.274292</td>\n",
        "      <td>0.725708</td>\n",
        "      <td>0.058568</td>\n",
@@ -731,21 +720,23 @@
        "      <td>0.050000</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>1</th>\n",
+       "      <th>2</th>\n",
+       "      <td>1</td>\n",
        "      <td>0.297618</td>\n",
        "      <td>0.137111</td>\n",
        "      <td>0.214081</td>\n",
-       "      <td>0.655975</td>\n",
+       "      <td>0.000000</td>\n",
        "      <td>0.080178</td>\n",
        "      <td>0.045378</td>\n",
        "      <td>0.064533</td>\n",
        "      <td>0.086557</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>0.550000</td>\n",
-       "      <td>0.314286</td>\n",
-       "      <td>0.440000</td>\n",
+       "      <th>3</th>\n",
+       "      <td>1</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
        "      <td>0.846154</td>\n",
        "      <td>0.146250</td>\n",
        "      <td>0.127347</td>\n",
@@ -753,7 +744,8 @@
        "      <td>0.076923</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>2</th>\n",
+       "      <th>4</th>\n",
+       "      <td>2</td>\n",
        "      <td>0.059787</td>\n",
        "      <td>0.091730</td>\n",
        "      <td>0.125166</td>\n",
@@ -764,31 +756,34 @@
        "      <td>0.057983</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>3</th>\n",
+       "      <th>5</th>\n",
+       "      <td>3</td>\n",
        "      <td>0.160000</td>\n",
-       "      <td>0.232258</td>\n",
-       "      <td>0.300000</td>\n",
-       "      <td>0.800000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
        "      <td>0.020000</td>\n",
        "      <td>0.026535</td>\n",
        "      <td>0.031250</td>\n",
        "      <td>0.023810</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>0.136000</td>\n",
+       "      <th>6</th>\n",
+       "      <td>3</td>\n",
+       "      <td>0.000000</td>\n",
        "      <td>0.200000</td>\n",
-       "      <td>0.261538</td>\n",
-       "      <td>0.767742</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
        "      <td>0.018360</td>\n",
        "      <td>0.025000</td>\n",
        "      <td>0.030178</td>\n",
        "      <td>0.027862</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>0.121529</td>\n",
-       "      <td>0.180139</td>\n",
+       "      <th>7</th>\n",
+       "      <td>3</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
        "      <td>0.237380</td>\n",
        "      <td>0.743930</td>\n",
        "      <td>0.034659</td>\n",
@@ -801,32 +796,30 @@
        "</div>"
       ],
       "text/plain": [
-       "                  estimate                                              se  \\\n",
-       "               age_group_0 age_group_1 age_group_2 age_group_3 age_group_0   \n",
-       "demographic_id                                                               \n",
-       "0                 0.458789    0.241211    0.352732    0.792302    0.057566   \n",
-       "0                 0.370255    0.180650    0.274292    0.725708    0.058568   \n",
-       "1                 0.297618    0.137111    0.214081    0.655975    0.080178   \n",
-       "1                 0.550000    0.314286    0.440000    0.846154    0.146250   \n",
-       "2                 0.059787    0.091730    0.125166    0.571801    0.013312   \n",
-       "3                 0.160000    0.232258    0.300000    0.800000    0.020000   \n",
-       "3                 0.136000    0.200000    0.261538    0.767742    0.018360   \n",
-       "3                 0.121529    0.180139    0.237380    0.743930    0.034659   \n",
+       "  demographic_id    estimate                                              se  \\\n",
+       "                 age_group_0 age_group_1 age_group_2 age_group_3 age_group_0   \n",
+       "0              0    0.458789    0.241211    0.000000    0.000000    0.057566   \n",
+       "1              0    0.000000    0.000000    0.274292    0.725708    0.058568   \n",
+       "2              1    0.297618    0.137111    0.214081    0.000000    0.080178   \n",
+       "3              1    0.000000    0.000000    0.000000    0.846154    0.146250   \n",
+       "4              2    0.059787    0.091730    0.125166    0.571801    0.013312   \n",
+       "5              3    0.160000    0.000000    0.000000    0.000000    0.020000   \n",
+       "6              3    0.000000    0.200000    0.000000    0.000000    0.018360   \n",
+       "7              3    0.000000    0.000000    0.237380    0.743930    0.034659   \n",
        "\n",
-       "                                                    \n",
-       "               age_group_1 age_group_2 age_group_3  \n",
-       "demographic_id                                      \n",
-       "0                 0.042434    0.052932    0.038152  \n",
-       "0                 0.037179    0.050000    0.050000  \n",
-       "1                 0.045378    0.064533    0.086557  \n",
-       "1                 0.127347    0.145600    0.076923  \n",
-       "2                 0.019730    0.025931    0.057983  \n",
-       "3                 0.026535    0.031250    0.023810  \n",
-       "3                 0.025000    0.030178    0.027862  \n",
-       "3                 0.047946    0.058771    0.061844  "
+       "                                       \n",
+       "  age_group_1 age_group_2 age_group_3  \n",
+       "0    0.042434    0.052932    0.038152  \n",
+       "1    0.037179    0.050000    0.050000  \n",
+       "2    0.045378    0.064533    0.086557  \n",
+       "3    0.127347    0.145600    0.076923  \n",
+       "4    0.019730    0.025931    0.057983  \n",
+       "5    0.026535    0.031250    0.023810  \n",
+       "6    0.025000    0.030178    0.027862  \n",
+       "7    0.047946    0.058771    0.061844  "
       ]
      },
-     "execution_count": 15,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -842,13 +835,6 @@
     ")\n",
     "df_rate"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
@@ -867,7 +853,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.0"
+   "version": "3.9.12"
   },
   "orig_nbformat": 4,
   "vscode": {
diff --git a/src/pydisagg/DisaggModel.py b/src/pydisagg/DisaggModel.py
index 4a8b45a..fc00faa 100644
--- a/src/pydisagg/DisaggModel.py
+++ b/src/pydisagg/DisaggModel.py
@@ -267,6 +267,7 @@ def split_to_rates(
         observed_total_se: Optional[float] = None,
         lower_guess: float = -50,
         upper_guess: float = 50,
+        reduce_output: bool = False,
     ) -> NDArray:
         """Splits the given total to rates
 
@@ -287,7 +288,8 @@ def split_to_rates(
         verbose : Optional[int], optional
             how much to print, 1 prints the root value,
             2 prints the entire rootfinding output, by default 0
-
+        reduce_output : bool, by default False
+            boolean for whether or not to set groups with zero population to have zero rate
         Returns
         -------
         if observed_total_se is not given, we return
@@ -303,11 +305,16 @@ def split_to_rates(
             observed_total, rate_pattern, bucket_populations, lower_guess, upper_guess
         )
         rate_point_estimates = self.predict_rate(fitted_beta, rate_pattern)
+        
+        #This is some dirty type casting, if reduce output, we set all groups with population 0 to 0
+        #Otherwise, we're multiplying everything True, which gets casted to 1
+        output_multiplier = ((1-1*reduce_output)+bucket_populations)>0
+
         if observed_total_se is not None:
             standard_errors = self.rate_standard_errors(
                 fitted_beta, rate_pattern, bucket_populations, observed_total_se)
-            return rate_point_estimates, standard_errors
-        return rate_point_estimates
+            return rate_point_estimates*output_multiplier, standard_errors*output_multiplier
+        return rate_point_estimates*output_multiplier
 
     def rate_standard_errors(
         self,
diff --git a/src/pydisagg/disaggregate.py b/src/pydisagg/disaggregate.py
index ffd6e61..25b56a8 100644
--- a/src/pydisagg/disaggregate.py
+++ b/src/pydisagg/disaggregate.py
@@ -86,7 +86,8 @@ def split_datapoint(
         point_estimates = model.split_to_rates(
             observed_total,
             rate_pattern,
-            bucket_populations
+            bucket_populations,
+            reduce_output=True
         )
         if observed_total_se is not None:
             fitted_beta = model.fit_beta(

	demographic_id	estimate	se
		age_group_0	age_group_1	age_group_2	age_group_2	age_group_3
demographic_id
0	0	0.458789	0.241211	0.352732	0.792302	0.000000	0.000000	0.057566	0.042434	0.052932	0.038152
0	0.370255	0.180650	1	0	0.000000	0.000000	0.274292	0.725708	0.058568	0.050000
1	2	1	0.297618	0.137111	0.214081	0.655975	0.000000	0.080178	0.045378	0.064533	0.086557
1	0.550000	0.314286	0.440000	3	1	0.000000	0.000000	0.000000	0.846154	0.146250	0.127347	0.076923
2	4	2	0.059787	0.091730	0.125166	0.057983
3	5	3	0.160000	0.232258	0.300000	0.800000	0.000000	0.000000	0.000000	0.020000	0.026535	0.031250	0.023810
3	0.136000	6	3	0.000000	0.200000	0.261538	0.767742	0.000000	0.000000	0.018360	0.025000	0.030178	0.027862
3	0.121529	0.180139	7	3	0.000000	0.000000	0.237380	0.743930	0.034659