From aa85560ceca9eae1c3b456d8d3af357ff674057e Mon Sep 17 00:00:00 2001 From: AHsu98 <34590951+AHsu98@users.noreply.github.com> Date: Wed, 23 Aug 2023 12:56:14 -0700 Subject: [PATCH 1/3] Updated example, fixed some of the docstrings --- examples/Basic Splitting Examples.ipynb | 191 ++++-------------------- src/pydisagg/disaggregate.py | 3 +- 2 files changed, 33 insertions(+), 161 deletions(-) diff --git a/examples/Basic Splitting Examples.ipynb b/examples/Basic Splitting Examples.ipynb index 6c05063..6d44984 100644 --- a/examples/Basic Splitting Examples.ipynb +++ b/examples/Basic Splitting Examples.ipynb @@ -110,134 +110,24 @@ "print(np.sum(bse * rmm.count_diff_beta(b,rate_pattern,populations)))" ] }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([0.47619048, 1.19047619, 0.83333333])" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "rmm.count_split_standard_errors(b,rate_pattern,populations,observed_total_SE)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "b=oddm.fit_beta(observed_total,rate_pattern,populations)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0.30223366091526715" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "1/oddm.H_diff_beta(b,rate_pattern,populations)" - ] - }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "3.679899347162453" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "oddm.fit_beta(observed_total,rate_pattern,populations)" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0.7555841522881679" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "oddm.beta_standard_error(\n", - " b,\n", - " rate_pattern,\n", - " populations,\n", - " observed_total_SE,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "ename": "AttributeError", - "evalue": "'RateMultiplicativeModel' object has no attribute 'split_groups'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", - "\u001b[1;32m/Users/owlx/Dropbox/UW Math Work/Research with Sasha/IHME work/Splitting Problems/splitting-python-package/examples/Basic Splitting Examples.ipynb Cell 15\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0m estimate,SE,CI\u001b[39m=\u001b[39msplit_datapoint(\n\u001b[1;32m 2\u001b[0m observed_total,\n\u001b[1;32m 3\u001b[0m populations,\n\u001b[1;32m 4\u001b[0m rate_pattern,\n\u001b[1;32m 5\u001b[0m observed_total_SE,\n\u001b[1;32m 6\u001b[0m model\u001b[39m=\u001b[39;49mRateMultiplicativeModel()\n\u001b[1;32m 7\u001b[0m )\n\u001b[1;32m 8\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39mEstimated incidence in each group\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 9\u001b[0m \u001b[39mprint\u001b[39m(estimate)\n", - "File \u001b[0;32m~/Dropbox/UW Math Work/Research with Sasha/IHME work/Splitting Problems/splitting-python-package/src/pydisagg/disaggregate.py:70\u001b[0m, in \u001b[0;36msplit_datapoint\u001b[0;34m(observed_total, bucket_populations, rate_pattern, observed_total_se, model, output_type, CI_method)\u001b[0m\n\u001b[1;32m 21\u001b[0m \u001b[39m\"\"\"Disaggregate a datapoint using the model given as input.\u001b[39;00m\n\u001b[1;32m 22\u001b[0m \u001b[39mDefaults to assuming multiplicativity in the odds ratio\u001b[39;00m\n\u001b[1;32m 23\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 67\u001b[0m \u001b[39m (point_estimate,standard_error,(CI_lower,CI_upper))\u001b[39;00m\n\u001b[1;32m 68\u001b[0m \u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m 69\u001b[0m \u001b[39mif\u001b[39;00m output_type\u001b[39m==\u001b[39m\u001b[39m'\u001b[39m\u001b[39mtotal\u001b[39m\u001b[39m'\u001b[39m:\n\u001b[0;32m---> 70\u001b[0m \u001b[39mreturn\u001b[39;00m model\u001b[39m.\u001b[39;49msplit_groups(\n\u001b[1;32m 71\u001b[0m bucket_populations,\n\u001b[1;32m 72\u001b[0m observed_total,\n\u001b[1;32m 73\u001b[0m observed_total_se,\n\u001b[1;32m 74\u001b[0m rate_pattern,\n\u001b[1;32m 75\u001b[0m CI_method\u001b[39m=\u001b[39mCI_method\n\u001b[1;32m 76\u001b[0m )\n\u001b[1;32m 77\u001b[0m \u001b[39mif\u001b[39;00m output_type\u001b[39m==\u001b[39m\u001b[39m'\u001b[39m\u001b[39mrate\u001b[39m\u001b[39m'\u001b[39m:\n\u001b[1;32m 78\u001b[0m \u001b[39mreturn\u001b[39;00m model\u001b[39m.\u001b[39msplit_groups_rate(\n\u001b[1;32m 79\u001b[0m bucket_populations,\n\u001b[1;32m 80\u001b[0m observed_total,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 83\u001b[0m CI_method\u001b[39m=\u001b[39mCI_method\n\u001b[1;32m 84\u001b[0m )\n", - "\u001b[0;31mAttributeError\u001b[0m: 'RateMultiplicativeModel' object has no attribute 'split_groups'" + "name": "stdout", + "output_type": "stream", + "text": [ + "Estimated incidence in each group\n", + "[ 5.9047619 14.76190476 10.33333333]\n", + "Standard Error of Above Estimates\n", + "[0.47619048 1.19047619 0.83333333]\n" ] } ], "source": [ - "estimate,SE,CI=split_datapoint(\n", + "estimate,SE=split_datapoint(\n", " observed_total,\n", " populations,\n", " rate_pattern,\n", @@ -248,17 +138,12 @@ "print(estimate)\n", "\n", "print(\"Standard Error of Above Estimates\")\n", - "print(SE)\n", - "\n", - "print(\"CI Lower\")\n", - "print(CI[0])\n", - "print(\"CI Upper\")\n", - "print(CI[1])\n" + "print(SE)" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 13, "metadata": {}, "outputs": [ { @@ -268,16 +153,12 @@ "Estimated incidence in each group\n", "[16.29952498 9.75395156 4.94652346]\n", "Standard Error of Above Estimates\n", - "[2.27869012 0.18133601 0.03997387]\n", - "CI Lower\n", - "[11.83337441 9.39853952 4.86817611]\n", - "CI Upper\n", - "[20.76567555 10.1093636 5.02487081]\n" + "[2.27869012 0.18133601 0.03997387]\n" ] } ], "source": [ - "estimate,SE,CI=split_datapoint(\n", + "estimate,SE=split_datapoint(\n", " observed_total,\n", " populations,\n", " rate_pattern,\n", @@ -288,14 +169,7 @@ "print(estimate)\n", "\n", "print(\"Standard Error of Above Estimates\")\n", - "print(SE)\n", - "\n", - "print(\"CI Lower\")\n", - "print(CI[0])\n", - "\n", - "print(\"CI Upper\")\n", - "print(CI[1])\n", - "\n" + "print(SE)" ] }, { @@ -308,7 +182,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -354,7 +228,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 26, "metadata": {}, "outputs": [ { @@ -364,7 +238,7 @@ "Estimates:\n", "[648.14814815 855.55555556]\n", "SE\n", - "[ 92.59259259 122.22222222]\n" + "[1.07758621 1.42241379]\n" ] } ], @@ -383,18 +257,24 @@ "#This hack of using the ratio of incidence rates works when we assume multiplicativity in rate rather than in odds\n", "baseline_male_rate=1.2\n", "baseline_female_rate=1\n", - "sex_splitting_model=RateMultiplicativeModel(np.array([baseline_female_rate,baseline_male_rate]))\n", + "sex_splitting_model=RateMultiplicativeModel()\n", + "pattern = np.array([baseline_female_rate,baseline_male_rate])\n", "\n", "#Fit with study_props, the study population proportions\n", - "sex_splitting_model.fit_beta(\n", - " bucket_populations=study_props,\n", + "fitted_beta = sex_splitting_model.fit_beta(\n", " observed_total=observed_study_rate,\n", - " observed_total_se=study_se\n", + " rate_pattern=pattern,\n", + " bucket_populations=study_props,\n", + " #observed_total_se=study_se,\n", ")\n", "\n", "#Predict with population proportions\n", - "estimated_by_sex=sex_splitting_model.predict_count(pop_by_sex)\n", - "SE_by_sex=sex_splitting_model.predict_count_SE(pop_by_sex)\n", + "estimated_by_sex=sex_splitting_model.predict_count(fitted_beta,pattern, pop_by_sex)\n", + "SE_by_sex=sex_splitting_model.count_split_standard_errors(\n", + " fitted_beta,\n", + " pattern,\n", + " pop_by_sex,\n", + " observed_total_SE)\n", "print(\"Estimates:\")\n", "print(estimated_by_sex)\n", "\n", @@ -411,7 +291,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 27, "metadata": {}, "outputs": [], "source": [ @@ -447,7 +327,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 28, "metadata": {}, "outputs": [], "source": [ @@ -473,7 +353,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 29, "metadata": {}, "outputs": [ { @@ -539,7 +419,7 @@ "3 80.384758 52.343563 28.041195" ] }, - "execution_count": 9, + "execution_count": 29, "metadata": {}, "output_type": "execute_result" } @@ -547,13 +427,6 @@ "source": [ "full_results" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/src/pydisagg/disaggregate.py b/src/pydisagg/disaggregate.py index 04822c7..08f8c39 100644 --- a/src/pydisagg/disaggregate.py +++ b/src/pydisagg/disaggregate.py @@ -55,7 +55,6 @@ def split_datapoint( ( estimate_in_each_bucket, se_of_estimate_bucket, - (CI_lower_in_each_bucket,CI_upper_in_each_bucket) ) Otherwise, if standard errors are not available, this will return a numpy array of the disaggregated estimates @@ -64,7 +63,7 @@ def split_datapoint( ----- If no observed_total_se is given, returns point estimates If observed_total_se is given, then returns a tuple - (point_estimate,standard_error,(CI_lower,CI_upper)) + (point_estimate,standard_error) """ if output_type not in ['total', 'rate']: raise ValueError("output_type must be one of either 'total' or 'rate'") From 611de71093a2587ebaa8c17082cc7ec7c17a975d Mon Sep 17 00:00:00 2001 From: AHsu98 <34590951+AHsu98@users.noreply.github.com> Date: Wed, 23 Aug 2023 13:02:00 -0700 Subject: [PATCH 2/3] Dropped dropped CI_method input No longer supporting confidence intervals for now, we can figure out how to add them back later if someone explicitly asks --- examples/Basic Splitting Examples.ipynb | 62 +++++++++++-------------- src/pydisagg/disaggregate.py | 4 -- 2 files changed, 27 insertions(+), 39 deletions(-) diff --git a/examples/Basic Splitting Examples.ipynb b/examples/Basic Splitting Examples.ipynb index 6d44984..72fc5a0 100644 --- a/examples/Basic Splitting Examples.ipynb +++ b/examples/Basic Splitting Examples.ipynb @@ -112,7 +112,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -143,7 +143,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -182,7 +182,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -228,7 +228,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -291,7 +291,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -327,33 +327,7 @@ }, { "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [], - "source": [ - "combined_split_results=np.zeros((4,3))\n", - "combined_split_results[:,0]=age_results\n", - "\n", - "for age_incidence,sex_proportions,population_at_age,rate_ratio,age_id in zip(\n", - " age_results,\n", - " within_age_sex_proportions,\n", - " population_age_pattern,\n", - " within_age_sex_rate_ratios,\n", - " range(len(age_results))\n", - " ):\n", - " combined_split_results[age_id,1:3]=split_datapoint(\n", - " age_incidence,\n", - " population_at_age*sex_proportions,\n", - " np.array([rate_ratio,1]),\n", - " model=sex_splitting_model\n", - " )\n", - " \n", - "full_results=pd.DataFrame(combined_split_results,columns=['Total in Age Group','Male','Female'])" - ] - }, - { - "cell_type": "code", - "execution_count": 29, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -419,13 +393,31 @@ "3 80.384758 52.343563 28.041195" ] }, - "execution_count": 29, "metadata": {}, - "output_type": "execute_result" + "output_type": "display_data" } ], "source": [ - "full_results" + "combined_split_results=np.zeros((4,3))\n", + "combined_split_results[:,0]=age_results\n", + "\n", + "for age_incidence,sex_proportions,population_at_age,rate_ratio,age_id in zip(\n", + " age_results,\n", + " within_age_sex_proportions,\n", + " population_age_pattern,\n", + " within_age_sex_rate_ratios,\n", + " range(len(age_results))\n", + " ):\n", + " combined_split_results[age_id,1:3]=split_datapoint(\n", + " age_incidence,\n", + " population_at_age*sex_proportions,\n", + " np.array([rate_ratio,1]),\n", + " model=sex_splitting_model\n", + " )\n", + " \n", + "full_results=pd.DataFrame(combined_split_results,columns=['Total in Age Group','Male','Female'])\n", + "\n", + "display(full_results)" ] } ], diff --git a/src/pydisagg/disaggregate.py b/src/pydisagg/disaggregate.py index 08f8c39..ffd6e61 100644 --- a/src/pydisagg/disaggregate.py +++ b/src/pydisagg/disaggregate.py @@ -16,7 +16,6 @@ def split_datapoint( observed_total_se: Optional[float] = None, model: Optional[DisaggModel] = LogOdds_model(), output_type: Literal['total', 'rate'] = 'total', - CI_method: Optional[str] = 'delta-wald' ) -> Union[tuple, NDArray]: """Disaggregate a datapoint using the model given as input. Defaults to assuming multiplicativity in the odds ratio @@ -44,9 +43,6 @@ def split_datapoint( in each group, or estimate the rate per population unit. model : Optional[DisaggModel], optional DisaggModel to use, by default LMO_model(1) - CI_method : Optional[str], optional - method to use for confidence intervals, - see documentation for standard error methods in DisaggModel, by default 'delta-wald' Returns ------- From 508e18f4a243a9af6d07ebcadd13943e9fdae303 Mon Sep 17 00:00:00 2001 From: AHsu98 <34590951+AHsu98@users.noreply.github.com> Date: Wed, 23 Aug 2023 13:19:14 -0700 Subject: [PATCH 3/3] Fixed Dataframe Rate Splitting Adding extra input on whether to clear predicted rates in model.split_to_rates for groups for population zero. This means that we only make predictions for groups that have nonzero population in our study. --- examples/Dataframe Splitting Demo.ipynb | 132 +++++++++++------------- src/pydisagg/DisaggModel.py | 13 ++- src/pydisagg/disaggregate.py | 3 +- 3 files changed, 71 insertions(+), 77 deletions(-) diff --git a/examples/Dataframe Splitting Demo.ipynb b/examples/Dataframe Splitting Demo.ipynb index 5d900c4..4e7cc94 100644 --- a/examples/Dataframe Splitting Demo.ipynb +++ b/examples/Dataframe Splitting Demo.ipynb @@ -469,7 +469,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -635,7 +635,7 @@ "7 0.000000 1.763117 1.236883 " ] }, - "execution_count": 10, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -648,12 +648,12 @@ " baseline_patterns,\n", " use_se=True\n", ")\n", - "df.reset_index()" + "df" ] }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -672,20 +672,18 @@ " .dataframe thead tr th {\n", " text-align: left;\n", " }\n", - "\n", - " .dataframe thead tr:last-of-type th {\n", - " text-align: right;\n", - " }\n", "\n", "\n", " \n", " \n", " \n", + " \n", " \n", " \n", " \n", " \n", " \n", + " \n", " \n", " \n", " \n", @@ -695,34 +693,25 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", " \n", " \n", " \n", " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -731,21 +720,23 @@ " \n", " \n", " \n", - " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -753,7 +744,8 @@ " \n", " \n", " \n", - " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -764,31 +756,34 @@ " \n", " \n", " \n", - " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -801,32 +796,30 @@ "" ], "text/plain": [ - " estimate se \\\n", - " age_group_0 age_group_1 age_group_2 age_group_3 age_group_0 \n", - "demographic_id \n", - "0 0.458789 0.241211 0.352732 0.792302 0.057566 \n", - "0 0.370255 0.180650 0.274292 0.725708 0.058568 \n", - "1 0.297618 0.137111 0.214081 0.655975 0.080178 \n", - "1 0.550000 0.314286 0.440000 0.846154 0.146250 \n", - "2 0.059787 0.091730 0.125166 0.571801 0.013312 \n", - "3 0.160000 0.232258 0.300000 0.800000 0.020000 \n", - "3 0.136000 0.200000 0.261538 0.767742 0.018360 \n", - "3 0.121529 0.180139 0.237380 0.743930 0.034659 \n", + " demographic_id estimate se \\\n", + " age_group_0 age_group_1 age_group_2 age_group_3 age_group_0 \n", + "0 0 0.458789 0.241211 0.000000 0.000000 0.057566 \n", + "1 0 0.000000 0.000000 0.274292 0.725708 0.058568 \n", + "2 1 0.297618 0.137111 0.214081 0.000000 0.080178 \n", + "3 1 0.000000 0.000000 0.000000 0.846154 0.146250 \n", + "4 2 0.059787 0.091730 0.125166 0.571801 0.013312 \n", + "5 3 0.160000 0.000000 0.000000 0.000000 0.020000 \n", + "6 3 0.000000 0.200000 0.000000 0.000000 0.018360 \n", + "7 3 0.000000 0.000000 0.237380 0.743930 0.034659 \n", "\n", - " \n", - " age_group_1 age_group_2 age_group_3 \n", - "demographic_id \n", - "0 0.042434 0.052932 0.038152 \n", - "0 0.037179 0.050000 0.050000 \n", - "1 0.045378 0.064533 0.086557 \n", - "1 0.127347 0.145600 0.076923 \n", - "2 0.019730 0.025931 0.057983 \n", - "3 0.026535 0.031250 0.023810 \n", - "3 0.025000 0.030178 0.027862 \n", - "3 0.047946 0.058771 0.061844 " + " \n", + " age_group_1 age_group_2 age_group_3 \n", + "0 0.042434 0.052932 0.038152 \n", + "1 0.037179 0.050000 0.050000 \n", + "2 0.045378 0.064533 0.086557 \n", + "3 0.127347 0.145600 0.076923 \n", + "4 0.019730 0.025931 0.057983 \n", + "5 0.026535 0.031250 0.023810 \n", + "6 0.025000 0.030178 0.027862 \n", + "7 0.047946 0.058771 0.061844 " ] }, - "execution_count": 15, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -842,13 +835,6 @@ ")\n", "df_rate" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { @@ -867,7 +853,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.0" + "version": "3.9.12" }, "orig_nbformat": 4, "vscode": { diff --git a/src/pydisagg/DisaggModel.py b/src/pydisagg/DisaggModel.py index 4a8b45a..fc00faa 100644 --- a/src/pydisagg/DisaggModel.py +++ b/src/pydisagg/DisaggModel.py @@ -267,6 +267,7 @@ def split_to_rates( observed_total_se: Optional[float] = None, lower_guess: float = -50, upper_guess: float = 50, + reduce_output: bool = False, ) -> NDArray: """Splits the given total to rates @@ -287,7 +288,8 @@ def split_to_rates( verbose : Optional[int], optional how much to print, 1 prints the root value, 2 prints the entire rootfinding output, by default 0 - + reduce_output : bool, by default False + boolean for whether or not to set groups with zero population to have zero rate Returns ------- if observed_total_se is not given, we return @@ -303,11 +305,16 @@ def split_to_rates( observed_total, rate_pattern, bucket_populations, lower_guess, upper_guess ) rate_point_estimates = self.predict_rate(fitted_beta, rate_pattern) + + #This is some dirty type casting, if reduce output, we set all groups with population 0 to 0 + #Otherwise, we're multiplying everything True, which gets casted to 1 + output_multiplier = ((1-1*reduce_output)+bucket_populations)>0 + if observed_total_se is not None: standard_errors = self.rate_standard_errors( fitted_beta, rate_pattern, bucket_populations, observed_total_se) - return rate_point_estimates, standard_errors - return rate_point_estimates + return rate_point_estimates*output_multiplier, standard_errors*output_multiplier + return rate_point_estimates*output_multiplier def rate_standard_errors( self, diff --git a/src/pydisagg/disaggregate.py b/src/pydisagg/disaggregate.py index ffd6e61..25b56a8 100644 --- a/src/pydisagg/disaggregate.py +++ b/src/pydisagg/disaggregate.py @@ -86,7 +86,8 @@ def split_datapoint( point_estimates = model.split_to_rates( observed_total, rate_pattern, - bucket_populations + bucket_populations, + reduce_output=True ) if observed_total_se is not None: fitted_beta = model.fit_beta(
demographic_idestimatese
age_group_0age_group_1age_group_2age_group_2age_group_3
demographic_id
000.4587890.2412110.3527320.7923020.0000000.0000000.0575660.0424340.0529320.038152
00.3702550.180650100.0000000.0000000.2742920.7257080.0585680.050000
1210.2976180.1371110.2140810.6559750.0000000.0801780.0453780.0645330.086557
10.5500000.3142860.440000310.0000000.0000000.0000000.8461540.1462500.1273470.076923
2420.0597870.0917300.1251660.057983
3530.1600000.2322580.3000000.8000000.0000000.0000000.0000000.0200000.0265350.0312500.023810
30.136000630.0000000.2000000.2615380.7677420.0000000.0000000.0183600.0250000.0301780.027862
30.1215290.180139730.0000000.0000000.2373800.7439300.034659