suppress errors in core_stats

Differential Revision: D54271155 fbshipit-source-id: 1a7055bb875f240a10c87256ef4ccfabb4736217
facebookresearch · Feb 27, 2024 · 72e54bc · 72e54bc
1 parent 0bfc653
commit 72e54bc
Show file tree

Hide file tree

Showing 10 changed files with 21 additions and 35 deletions.
diff --git a/balance/adjustment.py b/balance/adjustment.py
@@ -252,6 +252,7 @@ def apply_transformations(
 
     # additions is new columns to add to data. i.e.: column names that appear in transformations
     #   but are not present in all_data.
+    # pyre-fixme[16]: Optional type has no attribute `columns`.
     additions = {k: v for k, v in transformations.items() if k not in all_data.columns}
     transformations = {
         k: v for k, v in transformations.items() if k in all_data.columns
@@ -268,6 +269,7 @@ def apply_transformations(
     ) > 0, "No transformations or additions passed"
 
     if len(additions) > 0:
+        # pyre-fixme[16]: Optional type has no attribute `assign`.
         added = all_data.assign(**additions).loc[:, list(additions.keys())]
     else:
         added = None
@@ -280,6 +282,7 @@ def apply_transformations(
         # Adding .copy(deep=False) solves this.
         # See: https://stackoverflow.com/a/54914752
         transformed = pd.DataFrame(
+            # pyre-fixme[16]: Optional type has no attribute `copy`.
             {k: v(all_data.copy(deep=False)[k]) for k, v in transformations.items()}
         )
     else:
@@ -293,6 +296,7 @@ def apply_transformations(
         if drop:
             logger.warning(f"Dropping the variables: {dropped_columns}")
         else:
+            # pyre-fixme[16]: Optional type has no attribute `loc`.
             out = pd.concat((out, all_data.loc[:, dropped_columns]), axis=1)
     logger.info(f"Final variables in output: {list(out.columns)}")
 

diff --git a/balance/balancedf_class.py b/balance/balancedf_class.py
@@ -1690,6 +1690,7 @@ def from_frame(
         Returns:
             BalanceCovarsDF: Object.
         """
+        # pyre-fixme[9]: df has type `DataFrame`; used as `Optional[DataFrame]`.
         df = df.reset_index()
         df = pd.concat(
             (df, pd.Series(np.arange(0, df.shape[0]), name="id"), weights), axis=1

diff --git a/balance/cli.py b/balance/cli.py
@@ -283,9 +283,7 @@ def adapt_output(self, output_df: pd.DataFrame) -> pd.DataFrame:
             output_df = output_df[keep_rows]
 
         if self.has_keep_columns():
-            output_df = output_df[  # pyre-ignore[9]: this uses the DataFrame also.
-                self.keep_columns()
-            ]
+            output_df = output_df[self.keep_columns()]
 
         return output_df
 

diff --git a/balance/sample_class.py b/balance/sample_class.py
@@ -1022,6 +1022,7 @@ def diagnostics(self: "Sample") -> pd.DataFrame:
         diagnostics = diagnostics.reset_index(drop=True)
 
         logger.info("Done computing diagnostics")
+        # pyre-fixme[7]: Expected `DataFrame` but got `Optional[DataFrame]`.
         return diagnostics
 
     ############################################

diff --git a/balance/stats_and_plots/weighted_comparisons_plots.py b/balance/stats_and_plots/weighted_comparisons_plots.py
@@ -544,7 +544,6 @@ def plot_qq_categorical(
 
         if plot_data.shape[0] < label_threshold:
             for r in plot_data.itertuples():
-                # pyre-fixme[16]: `tuple` has no attribute `prop_sample`.
                 axis.text(x=r.prop_sample, y=r.prop_target, s=r[1])
 
     axis.set_ylim(-0.1, 1.1)
@@ -1430,6 +1429,8 @@ def plot_dist(
             logger.warning("plotly plots ignore dist_type. Consider library='seaborn'")
 
         return plotly_plot_dist(
+            # pyre-fixme[6]: For 1st argument expected `Dict[str, DataFrame]` but
+            #  got `Dict[str, Union[DataFrame, Series]]`.
             dict_of_dfs,
             variables,
             numeric_n_values_threshold,

diff --git a/balance/stats_and_plots/weighted_stats.py b/balance/stats_and_plots/weighted_stats.py
@@ -99,9 +99,7 @@ def _prepare_weighted_stat_args(
         v = v.replace([np.inf, -np.inf], np.nan)
         w = w.replace([np.inf, -np.inf], np.nan)
     v = v.reset_index(drop=True)
-    w = w.reset_index(  # pyre-ignore[16]: w is a pd.Series which has a reset_index method.
-        drop=True
-    )
+    w = w.reset_index(drop=True)
 
     _check_weights_are_valid(w)
 
@@ -310,7 +308,6 @@ def ci_of_weighted_mean(
         # Apply a lambda function to round a pd.Series of tuples to x decimal places
         ci = ci.apply(lambda t: tuple(round(x, round_ndigits) for x in t))
 
-    # pyre-ignore[7]: pyre thinks this function could return a DataFrame because of ci = ci.apply(round_tuple). It's wrong.
     return ci
 
 
@@ -661,7 +658,6 @@ def relative_frequency_table(
     if w is None:
         w = pd.Series(np.ones(df.shape[0]))
 
-    # pyre-ignore[6]: this is a pyre bug. str inherits from hashable, and .rename works fine.
     w = w.rename("Freq")
 
     if column is None:
@@ -674,7 +670,6 @@ def relative_frequency_table(
         else:
             raise TypeError("argument `df` must be a pandas DataFrame or Series")
 
-    # pyre-ignore[6]: this is a bug. pd.concat can deal with a DataFrame and a Series.
     relative_frequency_table_data = pd.concat((df, w), axis=1)
 
     relative_frequency_table_data = relative_frequency_table_data.groupby(

diff --git a/balance/stats_and_plots/weights_stats.py b/balance/stats_and_plots/weights_stats.py
@@ -306,9 +306,7 @@ def weighted_median_breakdown_point(w: pd.Series) -> np.float64:
     n = len(w)  # n users
     w = w / w.sum()  # normalize to 1
     # get a cumsum of sorted weights to find the median:
-    w_freq_cumsum = w.sort_values(  # pyre-ignore[16]: it does have a cumsum method.
-        ascending=False
-    ).cumsum()
+    w_freq_cumsum = w.sort_values(ascending=False).cumsum()
     numerator = (w_freq_cumsum <= 0.5).sum()
     if numerator == 0:
         numerator = (

diff --git a/balance/util.py b/balance/util.py
@@ -1157,7 +1157,6 @@ def rm_mutual_nas(*args) -> List:
     missing_mask = reduce(
         lambda x, y: x | y,
         [
-            # pyre-ignore[16]: pd.Series has isna.
             pd.Series(x).replace([np.inf, -np.inf], np.nan).isna()
             for x in args
             if x is not None
@@ -1433,9 +1432,7 @@ def fct_lump(s: pd.Series, prop: float = 0.05) -> pd.Series:
         remainder_category_name = remainder_category_name * 2
 
     if s.dtype.name == "category":
-        s = s.astype(  # pyre-ignore[9]: this use is for pd.Series (not defined currently for pd.DataFrame)
-            "object"
-        )
+        s = s.astype("object")
     s.loc[s.apply(lambda x: x in small_categories)] = remainder_category_name
     return s
 
@@ -1472,12 +1469,8 @@ def fct_lump_by(s: pd.Series, by: pd.Series, prop: float = 0.05) -> pd.Series:
     # https://github.com/pandas-dev/pandas/issues/16646
     # we keep the index of s as the index of the result
     s_index = s.index
-    s = s.reset_index(  # pyre-ignore[9]: this use is for pd.Series (not defined currently for pd.DataFrame)
-        drop=True
-    )
-    by = by.reset_index(  # pyre-ignore[9]: this use is for pd.Series (not defined currently for pd.DataFrame)‰
-        drop=True
-    )
+    s = s.reset_index(drop=True)
+    by = by.reset_index(drop=True)
     res = s.groupby(by).apply(lambda x: fct_lump(x, prop=prop))
     res.index = s_index
     return res
@@ -1714,11 +1707,9 @@ def _astype_in_df_from_dtypes(
                 # {'id': dtype('int64'), 'a': dtype('int64'), 'weight': dtype('float64')}
     """
     dict_of_target_dtypes = _dict_intersect(
-        # pyre-ignore[6]: using to_dict on pd.Series will work fine:
         target_dtypes.to_dict(),
         df.dtypes.to_dict(),
     )
-    # pyre-ignore[7]: we expect the input and output to be df (and not pd.Series)
     return df.astype(dict_of_target_dtypes)
 
 

diff --git a/balance/weighting_methods/ipw.py b/balance/weighting_methods/ipw.py
@@ -271,12 +271,9 @@ def choose_regularization(
             )
     all_perf = pd.DataFrame(all_perf)
     best = (
-        # pyre-fixme[16]: `Optional` has no attribute `tail`.
         all_perf[all_perf.design_effect < max_de]
-        # pyre-fixme[6]: For 1st param expected `Union[typing_extensions.Literal[0],
-        #  typing_extensions.Literal['index']]` but got
-        #  `typing_extensions.Literal['design_effect']`.
-        .sort_values("design_effect").tail(n_asmd_candidates)
+        .sort_values("design_effect")
+        .tail(n_asmd_candidates)
     )
     logger.debug(f"Regularisation with design effect below {max_de}: \n {best}")
 
@@ -299,7 +296,6 @@ def choose_regularization(
         adjusted_df = sample_df[sample_df.index.isin(weights.index)]
 
         asmd_after = asmd(
-            # pyre-fixme[6]: For 1st param expected `DataFrame` but got `Series`.
             sample_df=adjusted_df,
             target_df=target_df,
             sample_weights=weights,
@@ -325,10 +321,8 @@ def choose_regularization(
     all_perf = pd.DataFrame(all_perf)
     best = (
         all_perf[all_perf.design_effect < max_de]
-        # pyre-fixme[6]: For 1st param expected `Union[typing_extensions.Literal[0],
-        #  typing_extensions.Literal['index']]` but got
-        #  `typing_extensions.Literal['asmd_improvement']`.
-        .sort_values("asmd_improvement").tail(1)
+        .sort_values("asmd_improvement")
+        .tail(1)
     )
     logger.info(f"Best regularisation: \n {best}")
     solution = {

diff --git a/balance/weighting_methods/rake.py b/balance/weighting_methods/rake.py
@@ -133,6 +133,8 @@ def rake(
     # series of type Object, which won't work for the ipfn script
     levels_dict = {}
     for variable in variables:
+        # pyre-fixme[16]: Optional type has no attribute `__setitem__`.
+        # pyre-fixme[16]: Optional type has no attribute `__getitem__`.
         target_df[variable] = target_df[variable].astype(str)
         sample_df[variable] = sample_df[variable].astype(str)
 
@@ -157,6 +159,7 @@ def rake(
         f"Final covariates and levels that will be used in raking: {levels_dict}."
     )
 
+    # pyre-fixme[16]: Optional type has no attribute `assign`.
     target_df = target_df.assign(weight=target_weights)
     sample_df = sample_df.assign(weight=sample_weights)