Skip to content

Commit

Permalink
suppress errors in core_stats
Browse files Browse the repository at this point in the history
Differential Revision: D54271155

fbshipit-source-id: 1a7055bb875f240a10c87256ef4ccfabb4736217
  • Loading branch information
generatedunixname89002005307016 authored and facebook-github-bot committed Feb 27, 2024
1 parent 0bfc653 commit 72e54bc
Show file tree
Hide file tree
Showing 10 changed files with 21 additions and 35 deletions.
4 changes: 4 additions & 0 deletions balance/adjustment.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,7 @@ def apply_transformations(

# additions is new columns to add to data. i.e.: column names that appear in transformations
# but are not present in all_data.
# pyre-fixme[16]: Optional type has no attribute `columns`.
additions = {k: v for k, v in transformations.items() if k not in all_data.columns}
transformations = {
k: v for k, v in transformations.items() if k in all_data.columns
Expand All @@ -268,6 +269,7 @@ def apply_transformations(
) > 0, "No transformations or additions passed"

if len(additions) > 0:
# pyre-fixme[16]: Optional type has no attribute `assign`.
added = all_data.assign(**additions).loc[:, list(additions.keys())]
else:
added = None
Expand All @@ -280,6 +282,7 @@ def apply_transformations(
# Adding .copy(deep=False) solves this.
# See: https://stackoverflow.com/a/54914752
transformed = pd.DataFrame(
# pyre-fixme[16]: Optional type has no attribute `copy`.
{k: v(all_data.copy(deep=False)[k]) for k, v in transformations.items()}
)
else:
Expand All @@ -293,6 +296,7 @@ def apply_transformations(
if drop:
logger.warning(f"Dropping the variables: {dropped_columns}")
else:
# pyre-fixme[16]: Optional type has no attribute `loc`.
out = pd.concat((out, all_data.loc[:, dropped_columns]), axis=1)
logger.info(f"Final variables in output: {list(out.columns)}")

Expand Down
1 change: 1 addition & 0 deletions balance/balancedf_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -1690,6 +1690,7 @@ def from_frame(
Returns:
BalanceCovarsDF: Object.
"""
# pyre-fixme[9]: df has type `DataFrame`; used as `Optional[DataFrame]`.
df = df.reset_index()
df = pd.concat(
(df, pd.Series(np.arange(0, df.shape[0]), name="id"), weights), axis=1
Expand Down
4 changes: 1 addition & 3 deletions balance/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,9 +283,7 @@ def adapt_output(self, output_df: pd.DataFrame) -> pd.DataFrame:
output_df = output_df[keep_rows]

if self.has_keep_columns():
output_df = output_df[ # pyre-ignore[9]: this uses the DataFrame also.
self.keep_columns()
]
output_df = output_df[self.keep_columns()]

return output_df

Expand Down
1 change: 1 addition & 0 deletions balance/sample_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -1022,6 +1022,7 @@ def diagnostics(self: "Sample") -> pd.DataFrame:
diagnostics = diagnostics.reset_index(drop=True)

logger.info("Done computing diagnostics")
# pyre-fixme[7]: Expected `DataFrame` but got `Optional[DataFrame]`.
return diagnostics

############################################
Expand Down
3 changes: 2 additions & 1 deletion balance/stats_and_plots/weighted_comparisons_plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -544,7 +544,6 @@ def plot_qq_categorical(

if plot_data.shape[0] < label_threshold:
for r in plot_data.itertuples():
# pyre-fixme[16]: `tuple` has no attribute `prop_sample`.
axis.text(x=r.prop_sample, y=r.prop_target, s=r[1])

axis.set_ylim(-0.1, 1.1)
Expand Down Expand Up @@ -1430,6 +1429,8 @@ def plot_dist(
logger.warning("plotly plots ignore dist_type. Consider library='seaborn'")

return plotly_plot_dist(
# pyre-fixme[6]: For 1st argument expected `Dict[str, DataFrame]` but
# got `Dict[str, Union[DataFrame, Series]]`.
dict_of_dfs,
variables,
numeric_n_values_threshold,
Expand Down
7 changes: 1 addition & 6 deletions balance/stats_and_plots/weighted_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,9 +99,7 @@ def _prepare_weighted_stat_args(
v = v.replace([np.inf, -np.inf], np.nan)
w = w.replace([np.inf, -np.inf], np.nan)
v = v.reset_index(drop=True)
w = w.reset_index( # pyre-ignore[16]: w is a pd.Series which has a reset_index method.
drop=True
)
w = w.reset_index(drop=True)

_check_weights_are_valid(w)

Expand Down Expand Up @@ -310,7 +308,6 @@ def ci_of_weighted_mean(
# Apply a lambda function to round a pd.Series of tuples to x decimal places
ci = ci.apply(lambda t: tuple(round(x, round_ndigits) for x in t))

# pyre-ignore[7]: pyre thinks this function could return a DataFrame because of ci = ci.apply(round_tuple). It's wrong.
return ci


Expand Down Expand Up @@ -661,7 +658,6 @@ def relative_frequency_table(
if w is None:
w = pd.Series(np.ones(df.shape[0]))

# pyre-ignore[6]: this is a pyre bug. str inherits from hashable, and .rename works fine.
w = w.rename("Freq")

if column is None:
Expand All @@ -674,7 +670,6 @@ def relative_frequency_table(
else:
raise TypeError("argument `df` must be a pandas DataFrame or Series")

# pyre-ignore[6]: this is a bug. pd.concat can deal with a DataFrame and a Series.
relative_frequency_table_data = pd.concat((df, w), axis=1)

relative_frequency_table_data = relative_frequency_table_data.groupby(
Expand Down
4 changes: 1 addition & 3 deletions balance/stats_and_plots/weights_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,9 +306,7 @@ def weighted_median_breakdown_point(w: pd.Series) -> np.float64:
n = len(w) # n users
w = w / w.sum() # normalize to 1
# get a cumsum of sorted weights to find the median:
w_freq_cumsum = w.sort_values( # pyre-ignore[16]: it does have a cumsum method.
ascending=False
).cumsum()
w_freq_cumsum = w.sort_values(ascending=False).cumsum()
numerator = (w_freq_cumsum <= 0.5).sum()
if numerator == 0:
numerator = (
Expand Down
15 changes: 3 additions & 12 deletions balance/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -1157,7 +1157,6 @@ def rm_mutual_nas(*args) -> List:
missing_mask = reduce(
lambda x, y: x | y,
[
# pyre-ignore[16]: pd.Series has isna.
pd.Series(x).replace([np.inf, -np.inf], np.nan).isna()
for x in args
if x is not None
Expand Down Expand Up @@ -1433,9 +1432,7 @@ def fct_lump(s: pd.Series, prop: float = 0.05) -> pd.Series:
remainder_category_name = remainder_category_name * 2

if s.dtype.name == "category":
s = s.astype( # pyre-ignore[9]: this use is for pd.Series (not defined currently for pd.DataFrame)
"object"
)
s = s.astype("object")
s.loc[s.apply(lambda x: x in small_categories)] = remainder_category_name
return s

Expand Down Expand Up @@ -1472,12 +1469,8 @@ def fct_lump_by(s: pd.Series, by: pd.Series, prop: float = 0.05) -> pd.Series:
# https://github.com/pandas-dev/pandas/issues/16646
# we keep the index of s as the index of the result
s_index = s.index
s = s.reset_index( # pyre-ignore[9]: this use is for pd.Series (not defined currently for pd.DataFrame)
drop=True
)
by = by.reset_index( # pyre-ignore[9]: this use is for pd.Series (not defined currently for pd.DataFrame)‰
drop=True
)
s = s.reset_index(drop=True)
by = by.reset_index(drop=True)
res = s.groupby(by).apply(lambda x: fct_lump(x, prop=prop))
res.index = s_index
return res
Expand Down Expand Up @@ -1714,11 +1707,9 @@ def _astype_in_df_from_dtypes(
# {'id': dtype('int64'), 'a': dtype('int64'), 'weight': dtype('float64')}
"""
dict_of_target_dtypes = _dict_intersect(
# pyre-ignore[6]: using to_dict on pd.Series will work fine:
target_dtypes.to_dict(),
df.dtypes.to_dict(),
)
# pyre-ignore[7]: we expect the input and output to be df (and not pd.Series)
return df.astype(dict_of_target_dtypes)


Expand Down
14 changes: 4 additions & 10 deletions balance/weighting_methods/ipw.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,12 +271,9 @@ def choose_regularization(
)
all_perf = pd.DataFrame(all_perf)
best = (
# pyre-fixme[16]: `Optional` has no attribute `tail`.
all_perf[all_perf.design_effect < max_de]
# pyre-fixme[6]: For 1st param expected `Union[typing_extensions.Literal[0],
# typing_extensions.Literal['index']]` but got
# `typing_extensions.Literal['design_effect']`.
.sort_values("design_effect").tail(n_asmd_candidates)
.sort_values("design_effect")
.tail(n_asmd_candidates)
)
logger.debug(f"Regularisation with design effect below {max_de}: \n {best}")

Expand All @@ -299,7 +296,6 @@ def choose_regularization(
adjusted_df = sample_df[sample_df.index.isin(weights.index)]

asmd_after = asmd(
# pyre-fixme[6]: For 1st param expected `DataFrame` but got `Series`.
sample_df=adjusted_df,
target_df=target_df,
sample_weights=weights,
Expand All @@ -325,10 +321,8 @@ def choose_regularization(
all_perf = pd.DataFrame(all_perf)
best = (
all_perf[all_perf.design_effect < max_de]
# pyre-fixme[6]: For 1st param expected `Union[typing_extensions.Literal[0],
# typing_extensions.Literal['index']]` but got
# `typing_extensions.Literal['asmd_improvement']`.
.sort_values("asmd_improvement").tail(1)
.sort_values("asmd_improvement")
.tail(1)
)
logger.info(f"Best regularisation: \n {best}")
solution = {
Expand Down
3 changes: 3 additions & 0 deletions balance/weighting_methods/rake.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,8 @@ def rake(
# series of type Object, which won't work for the ipfn script
levels_dict = {}
for variable in variables:
# pyre-fixme[16]: Optional type has no attribute `__setitem__`.
# pyre-fixme[16]: Optional type has no attribute `__getitem__`.
target_df[variable] = target_df[variable].astype(str)
sample_df[variable] = sample_df[variable].astype(str)

Expand All @@ -157,6 +159,7 @@ def rake(
f"Final covariates and levels that will be used in raking: {levels_dict}."
)

# pyre-fixme[16]: Optional type has no attribute `assign`.
target_df = target_df.assign(weight=target_weights)
sample_df = sample_df.assign(weight=sample_weights)

Expand Down

0 comments on commit 72e54bc

Please sign in to comment.