From 72e54bc03e5ea58e9e6a5dd5cf65009797707ba0 Mon Sep 17 00:00:00 2001 From: generatedunixname89002005307016 Date: Tue, 27 Feb 2024 12:16:14 -0800 Subject: [PATCH] suppress errors in `core_stats` Differential Revision: D54271155 fbshipit-source-id: 1a7055bb875f240a10c87256ef4ccfabb4736217 --- balance/adjustment.py | 4 ++++ balance/balancedf_class.py | 1 + balance/cli.py | 4 +--- balance/sample_class.py | 1 + .../stats_and_plots/weighted_comparisons_plots.py | 3 ++- balance/stats_and_plots/weighted_stats.py | 7 +------ balance/stats_and_plots/weights_stats.py | 4 +--- balance/util.py | 15 +++------------ balance/weighting_methods/ipw.py | 14 ++++---------- balance/weighting_methods/rake.py | 3 +++ 10 files changed, 21 insertions(+), 35 deletions(-) diff --git a/balance/adjustment.py b/balance/adjustment.py index 00019c1..baaac17 100644 --- a/balance/adjustment.py +++ b/balance/adjustment.py @@ -252,6 +252,7 @@ def apply_transformations( # additions is new columns to add to data. i.e.: column names that appear in transformations # but are not present in all_data. + # pyre-fixme[16]: Optional type has no attribute `columns`. additions = {k: v for k, v in transformations.items() if k not in all_data.columns} transformations = { k: v for k, v in transformations.items() if k in all_data.columns @@ -268,6 +269,7 @@ def apply_transformations( ) > 0, "No transformations or additions passed" if len(additions) > 0: + # pyre-fixme[16]: Optional type has no attribute `assign`. added = all_data.assign(**additions).loc[:, list(additions.keys())] else: added = None @@ -280,6 +282,7 @@ def apply_transformations( # Adding .copy(deep=False) solves this. # See: https://stackoverflow.com/a/54914752 transformed = pd.DataFrame( + # pyre-fixme[16]: Optional type has no attribute `copy`. {k: v(all_data.copy(deep=False)[k]) for k, v in transformations.items()} ) else: @@ -293,6 +296,7 @@ def apply_transformations( if drop: logger.warning(f"Dropping the variables: {dropped_columns}") else: + # pyre-fixme[16]: Optional type has no attribute `loc`. out = pd.concat((out, all_data.loc[:, dropped_columns]), axis=1) logger.info(f"Final variables in output: {list(out.columns)}") diff --git a/balance/balancedf_class.py b/balance/balancedf_class.py index 1594a65..0984402 100644 --- a/balance/balancedf_class.py +++ b/balance/balancedf_class.py @@ -1690,6 +1690,7 @@ def from_frame( Returns: BalanceCovarsDF: Object. """ + # pyre-fixme[9]: df has type `DataFrame`; used as `Optional[DataFrame]`. df = df.reset_index() df = pd.concat( (df, pd.Series(np.arange(0, df.shape[0]), name="id"), weights), axis=1 diff --git a/balance/cli.py b/balance/cli.py index d1ded94..aa8430c 100644 --- a/balance/cli.py +++ b/balance/cli.py @@ -283,9 +283,7 @@ def adapt_output(self, output_df: pd.DataFrame) -> pd.DataFrame: output_df = output_df[keep_rows] if self.has_keep_columns(): - output_df = output_df[ # pyre-ignore[9]: this uses the DataFrame also. - self.keep_columns() - ] + output_df = output_df[self.keep_columns()] return output_df diff --git a/balance/sample_class.py b/balance/sample_class.py index 61c8fb6..bb0cb42 100644 --- a/balance/sample_class.py +++ b/balance/sample_class.py @@ -1022,6 +1022,7 @@ def diagnostics(self: "Sample") -> pd.DataFrame: diagnostics = diagnostics.reset_index(drop=True) logger.info("Done computing diagnostics") + # pyre-fixme[7]: Expected `DataFrame` but got `Optional[DataFrame]`. return diagnostics ############################################ diff --git a/balance/stats_and_plots/weighted_comparisons_plots.py b/balance/stats_and_plots/weighted_comparisons_plots.py index 212c67e..adb3991 100644 --- a/balance/stats_and_plots/weighted_comparisons_plots.py +++ b/balance/stats_and_plots/weighted_comparisons_plots.py @@ -544,7 +544,6 @@ def plot_qq_categorical( if plot_data.shape[0] < label_threshold: for r in plot_data.itertuples(): - # pyre-fixme[16]: `tuple` has no attribute `prop_sample`. axis.text(x=r.prop_sample, y=r.prop_target, s=r[1]) axis.set_ylim(-0.1, 1.1) @@ -1430,6 +1429,8 @@ def plot_dist( logger.warning("plotly plots ignore dist_type. Consider library='seaborn'") return plotly_plot_dist( + # pyre-fixme[6]: For 1st argument expected `Dict[str, DataFrame]` but + # got `Dict[str, Union[DataFrame, Series]]`. dict_of_dfs, variables, numeric_n_values_threshold, diff --git a/balance/stats_and_plots/weighted_stats.py b/balance/stats_and_plots/weighted_stats.py index 02b0326..8d10af2 100644 --- a/balance/stats_and_plots/weighted_stats.py +++ b/balance/stats_and_plots/weighted_stats.py @@ -99,9 +99,7 @@ def _prepare_weighted_stat_args( v = v.replace([np.inf, -np.inf], np.nan) w = w.replace([np.inf, -np.inf], np.nan) v = v.reset_index(drop=True) - w = w.reset_index( # pyre-ignore[16]: w is a pd.Series which has a reset_index method. - drop=True - ) + w = w.reset_index(drop=True) _check_weights_are_valid(w) @@ -310,7 +308,6 @@ def ci_of_weighted_mean( # Apply a lambda function to round a pd.Series of tuples to x decimal places ci = ci.apply(lambda t: tuple(round(x, round_ndigits) for x in t)) - # pyre-ignore[7]: pyre thinks this function could return a DataFrame because of ci = ci.apply(round_tuple). It's wrong. return ci @@ -661,7 +658,6 @@ def relative_frequency_table( if w is None: w = pd.Series(np.ones(df.shape[0])) - # pyre-ignore[6]: this is a pyre bug. str inherits from hashable, and .rename works fine. w = w.rename("Freq") if column is None: @@ -674,7 +670,6 @@ def relative_frequency_table( else: raise TypeError("argument `df` must be a pandas DataFrame or Series") - # pyre-ignore[6]: this is a bug. pd.concat can deal with a DataFrame and a Series. relative_frequency_table_data = pd.concat((df, w), axis=1) relative_frequency_table_data = relative_frequency_table_data.groupby( diff --git a/balance/stats_and_plots/weights_stats.py b/balance/stats_and_plots/weights_stats.py index ed5a974..55720bd 100644 --- a/balance/stats_and_plots/weights_stats.py +++ b/balance/stats_and_plots/weights_stats.py @@ -306,9 +306,7 @@ def weighted_median_breakdown_point(w: pd.Series) -> np.float64: n = len(w) # n users w = w / w.sum() # normalize to 1 # get a cumsum of sorted weights to find the median: - w_freq_cumsum = w.sort_values( # pyre-ignore[16]: it does have a cumsum method. - ascending=False - ).cumsum() + w_freq_cumsum = w.sort_values(ascending=False).cumsum() numerator = (w_freq_cumsum <= 0.5).sum() if numerator == 0: numerator = ( diff --git a/balance/util.py b/balance/util.py index 7069e6f..816c0d2 100644 --- a/balance/util.py +++ b/balance/util.py @@ -1157,7 +1157,6 @@ def rm_mutual_nas(*args) -> List: missing_mask = reduce( lambda x, y: x | y, [ - # pyre-ignore[16]: pd.Series has isna. pd.Series(x).replace([np.inf, -np.inf], np.nan).isna() for x in args if x is not None @@ -1433,9 +1432,7 @@ def fct_lump(s: pd.Series, prop: float = 0.05) -> pd.Series: remainder_category_name = remainder_category_name * 2 if s.dtype.name == "category": - s = s.astype( # pyre-ignore[9]: this use is for pd.Series (not defined currently for pd.DataFrame) - "object" - ) + s = s.astype("object") s.loc[s.apply(lambda x: x in small_categories)] = remainder_category_name return s @@ -1472,12 +1469,8 @@ def fct_lump_by(s: pd.Series, by: pd.Series, prop: float = 0.05) -> pd.Series: # https://github.com/pandas-dev/pandas/issues/16646 # we keep the index of s as the index of the result s_index = s.index - s = s.reset_index( # pyre-ignore[9]: this use is for pd.Series (not defined currently for pd.DataFrame) - drop=True - ) - by = by.reset_index( # pyre-ignore[9]: this use is for pd.Series (not defined currently for pd.DataFrame)‰ - drop=True - ) + s = s.reset_index(drop=True) + by = by.reset_index(drop=True) res = s.groupby(by).apply(lambda x: fct_lump(x, prop=prop)) res.index = s_index return res @@ -1714,11 +1707,9 @@ def _astype_in_df_from_dtypes( # {'id': dtype('int64'), 'a': dtype('int64'), 'weight': dtype('float64')} """ dict_of_target_dtypes = _dict_intersect( - # pyre-ignore[6]: using to_dict on pd.Series will work fine: target_dtypes.to_dict(), df.dtypes.to_dict(), ) - # pyre-ignore[7]: we expect the input and output to be df (and not pd.Series) return df.astype(dict_of_target_dtypes) diff --git a/balance/weighting_methods/ipw.py b/balance/weighting_methods/ipw.py index 7ff649c..f24d9ba 100644 --- a/balance/weighting_methods/ipw.py +++ b/balance/weighting_methods/ipw.py @@ -271,12 +271,9 @@ def choose_regularization( ) all_perf = pd.DataFrame(all_perf) best = ( - # pyre-fixme[16]: `Optional` has no attribute `tail`. all_perf[all_perf.design_effect < max_de] - # pyre-fixme[6]: For 1st param expected `Union[typing_extensions.Literal[0], - # typing_extensions.Literal['index']]` but got - # `typing_extensions.Literal['design_effect']`. - .sort_values("design_effect").tail(n_asmd_candidates) + .sort_values("design_effect") + .tail(n_asmd_candidates) ) logger.debug(f"Regularisation with design effect below {max_de}: \n {best}") @@ -299,7 +296,6 @@ def choose_regularization( adjusted_df = sample_df[sample_df.index.isin(weights.index)] asmd_after = asmd( - # pyre-fixme[6]: For 1st param expected `DataFrame` but got `Series`. sample_df=adjusted_df, target_df=target_df, sample_weights=weights, @@ -325,10 +321,8 @@ def choose_regularization( all_perf = pd.DataFrame(all_perf) best = ( all_perf[all_perf.design_effect < max_de] - # pyre-fixme[6]: For 1st param expected `Union[typing_extensions.Literal[0], - # typing_extensions.Literal['index']]` but got - # `typing_extensions.Literal['asmd_improvement']`. - .sort_values("asmd_improvement").tail(1) + .sort_values("asmd_improvement") + .tail(1) ) logger.info(f"Best regularisation: \n {best}") solution = { diff --git a/balance/weighting_methods/rake.py b/balance/weighting_methods/rake.py index 68acdd2..1fb512b 100644 --- a/balance/weighting_methods/rake.py +++ b/balance/weighting_methods/rake.py @@ -133,6 +133,8 @@ def rake( # series of type Object, which won't work for the ipfn script levels_dict = {} for variable in variables: + # pyre-fixme[16]: Optional type has no attribute `__setitem__`. + # pyre-fixme[16]: Optional type has no attribute `__getitem__`. target_df[variable] = target_df[variable].astype(str) sample_df[variable] = sample_df[variable].astype(str) @@ -157,6 +159,7 @@ def rake( f"Final covariates and levels that will be used in raking: {levels_dict}." ) + # pyre-fixme[16]: Optional type has no attribute `assign`. target_df = target_df.assign(weight=target_weights) sample_df = sample_df.assign(weight=sample_weights)