Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
chenyangkang committed Nov 20, 2024
1 parent 6d50366 commit 48d62c8
Show file tree
Hide file tree
Showing 5 changed files with 175 additions and 35 deletions.
25 changes: 13 additions & 12 deletions stemflow/model/AdaSTEM.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,7 @@ def __init__(
completely_random_rotation: bool = False,
lazy_loading: bool = False,
lazy_loading_dir: Union[str, None] = None,
min_class_sample: int = 1,
logit_agg: bool = False
min_class_sample: int = 1
):
"""Make an AdaSTEM object
Expand Down Expand Up @@ -187,8 +186,6 @@ def __init__(
If lazy_loading, the directory of the model to temporary save to. Default to None, where a random number will be generated as folder name.
min_class_sample:
Minimum umber of samples needed to train the classifier in each stixel. If the sample does not satisfy, fit a dummy one. This parameter does not influence regression tasks.
logit_agg:
Whether to use logit aggregation for the classification task. If True, the model is averaging the probability prediction estimated by all ensembles in logit scale, and then back-tranform it to probability scale. It's recommened to be combinedly used with the CalibratedClassifierCV class in sklearn as a wrapper of the classifier to estimate the calibrated probability. If False, the output is the essentially the proportion of "1s" acorss the related ensembles; e.g., if 100 stixels covers this spatiotemporal points, and 90% of them predict that it is a "1", then the ouput probability is 0.9; Therefore it would be a probability estimated by the spatiotemporal neiborhood.
Raises:
AttributeError: Base model do not have method 'fit' or 'predict'
AttributeError: task not in one of ['regression', 'classification', 'hurdle']
Expand Down Expand Up @@ -272,7 +269,6 @@ def __init__(
# X. miscellaneous
self.lazy_loading = lazy_loading
self.lazy_loading_dir = lazy_loading_dir
self.logit_agg=logit_agg

if not verbosity == 0:
self.verbosity = 1
Expand Down Expand Up @@ -805,6 +801,7 @@ def predict_proba(
n_jobs: Union[None, int] = None,
aggregation: str = "mean",
return_by_separate_ensembles: bool = False,
logit_agg: bool = False,
**base_model_prediction_param
) -> Union[np.ndarray, Tuple[np.ndarray]]:
"""Predict probability
Expand All @@ -826,7 +823,8 @@ def predict_proba(
'mean' or 'median' for aggregation method across ensembles.
return_by_separate_ensembles (bool, optional):
Experimental function. return not by aggregation, but by separate ensembles.
logit_agg:
Whether to use logit aggregation for the classification task. If True, the model is averaging the probability prediction estimated by all ensembles in logit scale, and then back-tranform it to probability scale. It's recommened to be combinedly used with the CalibratedClassifierCV class in sklearn as a wrapper of the classifier to estimate the calibrated probability. If False, the output is the essentially the proportion of "1s" acorss the related ensembles; e.g., if 100 stixels covers this spatiotemporal points, and 90% of them predict that it is a "1", then the ouput probability is 0.9; Therefore it would be a probability estimated by the spatiotemporal neiborhood.
Raises:
TypeError:
X_test is not of type pd.core.frame.DataFrame.
Expand Down Expand Up @@ -857,7 +855,7 @@ def predict_proba(
return new_res.values

# Transform to logit space if classification:
if self.task=='classification' and self.logit_agg:
if self.task=='classification' and logit_agg:
for col_index in range(res.shape[1]):
prob = np.clip(res.iloc[:,col_index], 1e-6, 1 - 1e-6)
res.iloc[:,col_index] = np.log(prob / (1-prob)) # logit space
Expand Down Expand Up @@ -924,6 +922,7 @@ def predict(
n_jobs: Union[None, int] = 1,
aggregation: str = "mean",
return_by_separate_ensembles: bool = False,
logit_agg: bool = False,
**base_model_prediction_param
) -> Union[np.ndarray, Tuple[np.ndarray]]:
pass
Expand Down Expand Up @@ -1343,8 +1342,7 @@ def __init__(
completely_random_rotation=False,
lazy_loading = False,
lazy_loading_dir = None,
min_class_sample = 1,
logit_agg=False
min_class_sample = 1
):
super().__init__(
base_model=base_model,
Expand Down Expand Up @@ -1377,8 +1375,7 @@ def __init__(
completely_random_rotation=completely_random_rotation,
lazy_loading=lazy_loading,
lazy_loading_dir=lazy_loading_dir,
min_class_sample=min_class_sample,
logit_agg=logit_agg
min_class_sample=min_class_sample
)

self._estimator_type = 'classifier'
Expand All @@ -1392,6 +1389,7 @@ def predict(
n_jobs: Union[int, None] = 1,
aggregation: str = "mean",
return_by_separate_ensembles: bool = False,
logit_agg: bool = False,
**base_model_prediction_param
) -> Union[np.ndarray, Tuple[np.ndarray]]:
"""A rewrite of predict_proba adapted for Classifier
Expand Down Expand Up @@ -1419,7 +1417,8 @@ def predict(
Experimental function. return not by aggregation, but by separate ensembles.
base_model_prediction_param:
Additional parameter passed to base_model.predict_proba or base_model.predict
logit_agg:
Whether to use logit aggregation for the classification task. If True, the model is averaging the probability prediction estimated by all ensembles in logit scale, and then back-tranform it to probability scale. It's recommened to be combinedly used with the CalibratedClassifierCV class in sklearn as a wrapper of the classifier to estimate the calibrated probability. If False, the output is the essentially the proportion of "1s" acorss the related ensembles; e.g., if 100 stixels covers this spatiotemporal points, and 90% of them predict that it is a "1", then the ouput probability is 0.9; Therefore it would be a probability estimated by the spatiotemporal neiborhood.
Raises:
TypeError:
X_test is not of type pd.core.frame.DataFrame.
Expand All @@ -1441,6 +1440,7 @@ def predict(
n_jobs=n_jobs,
aggregation=aggregation,
return_by_separate_ensembles=return_by_separate_ensembles,
logit_agg=logit_agg,
**base_model_prediction_param
)
mean = mean[:,1].flatten()
Expand All @@ -1456,6 +1456,7 @@ def predict(
n_jobs=n_jobs,
aggregation=aggregation,
return_by_separate_ensembles=return_by_separate_ensembles,
logit_agg=logit_agg,
**base_model_prediction_param
)
mean = mean[:,1].flatten()
Expand Down
16 changes: 5 additions & 11 deletions stemflow/model/STEM.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,7 @@ def __init__(
completely_random_rotation: bool = False,
lazy_loading: bool = False,
lazy_loading_dir: Union[str, None] = None,
min_class_sample: int = 1,
logit_agg: bool = False
min_class_sample: int = 1
):
"""Make a STEM object
Expand Down Expand Up @@ -122,9 +121,7 @@ def __init__(
If lazy_loading, the directory of the model to temporary save to. Default to None, where a random number will be generated as folder name.
min_class_sample:
Minimum umber of samples needed to train the classifier in each stixel. If the sample does not satisfy, fit a dummy one. This parameter does not influence regression tasks.
logit_agg:
Whether to use logit aggregation for the classification task. If True, the model is averaging the probability prediction estimated by all ensembles in logit scale, and then back-tranform it to probability scale. It's recommened to be combinedly used with the CalibratedClassifierCV class in sklearn as a wrapper of the classifier to estimate the calibrated probability. If False, the output is the essentially the proportion of "1s" acorss the related ensembles; e.g., if 100 stixels covers this spatiotemporal points, and 90% of them predict that it is a "1", then the ouput probability is 0.9; Therefore it would be a probability estimated by the spatiotemporal neiborhood.
Raises:
AttributeError: Base model do not have method 'fit' or 'predict'
AttributeError: task not in one of ['regression', 'classification', 'hurdle']
Expand Down Expand Up @@ -182,8 +179,7 @@ def __init__(
completely_random_rotation=completely_random_rotation,
lazy_loading=lazy_loading,
lazy_loading_dir=lazy_loading_dir,
min_class_sample=min_class_sample,
logit_agg=logit_agg
min_class_sample=min_class_sample
)

self.grid_len = grid_len
Expand Down Expand Up @@ -249,8 +245,7 @@ def __init__(
completely_random_rotation: bool = False,
lazy_loading: bool = False,
lazy_loading_dir: Union[str, None] = None,
min_class_sample: int = 1,
logit_agg: bool = False
min_class_sample: int = 1
):
super().__init__(
base_model=base_model,
Expand Down Expand Up @@ -283,8 +278,7 @@ def __init__(
completely_random_rotation=completely_random_rotation,
lazy_loading=lazy_loading,
lazy_loading_dir=lazy_loading_dir,
min_class_sample=min_class_sample,
logit_agg=logit_agg
min_class_sample=min_class_sample
)

self.grid_len = grid_len
Expand Down
16 changes: 5 additions & 11 deletions stemflow/model/SphereAdaSTEM.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,7 @@ def __init__(
radius: float = 6371.0,
lazy_loading: bool = False,
lazy_loading_dir: Union[str, None] = None,
min_class_sample: int = 1,
logit_agg: bool = False
min_class_sample: int = 1
):
"""Make a Spherical AdaSTEM object
Expand Down Expand Up @@ -161,9 +160,7 @@ def __init__(
If lazy_loading, the directory of the model to temporary save to. Default to None, where a random number will be generated as folder name.
min_class_sample:
Minimum umber of samples needed to train the classifier in each stixel. If the sample does not satisfy, fit a dummy one. This parameter does not influence regression tasks.
logit_agg:
Whether to use logit aggregation for the classification task. If True, the model is averaging the probability prediction estimated by all ensembles in logit scale, and then back-tranform it to probability scale. It's recommened to be combinedly used with the CalibratedClassifierCV class in sklearn as a wrapper of the classifier to estimate the calibrated probability. If False, the output is the essentially the proportion of "1s" acorss the related ensembles; e.g., if 100 stixels covers this spatiotemporal points, and 90% of them predict that it is a "1", then the ouput probability is 0.9; Therefore it would be a probability estimated by the spatiotemporal neiborhood.
Raises:
AttributeError: Base model do not have method 'fit' or 'predict'
AttributeError: task not in one of ['regression', 'classification', 'hurdle']
Expand Down Expand Up @@ -220,8 +217,7 @@ def __init__(
plot_empty=plot_empty,
lazy_loading=lazy_loading,
lazy_loading_dir=lazy_loading_dir,
min_class_sample=min_class_sample,
logit_agg=logit_agg
min_class_sample=min_class_sample
)

if not self.Spatio1 == "longitude":
Expand Down Expand Up @@ -558,8 +554,7 @@ def __init__(
plot_empty=False,
lazy_loading=False,
lazy_loading_dir=None,
min_class_sample: int = 1,
logit_agg: bool = False
min_class_sample: int = 1
):
super().__init__(
base_model=base_model,
Expand Down Expand Up @@ -591,8 +586,7 @@ def __init__(
plot_empty=plot_empty,
lazy_loading=lazy_loading,
lazy_loading_dir=lazy_loading_dir,
min_class_sample=min_class_sample,
logit_agg=logit_agg
min_class_sample=min_class_sample
)

self.predict = MethodType(AdaSTEMClassifier.predict, self)
Expand Down
78 changes: 77 additions & 1 deletion tests/make_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from stemflow.model.SphereAdaSTEM import SphereAdaSTEM, SphereAdaSTEMClassifier, SphereAdaSTEMRegressor
from stemflow.model.STEM import STEM, STEMClassifier, STEMRegressor
from stemflow.model_selection import ST_train_test_split
# from sklearn.calibration import CalibratedClassifierCV
from sklearn.calibration import CalibratedClassifierCV
# CalibratedClassifierCV(XGBClassifier(tree_method="hist", random_state=42, verbosity=0, n_jobs=1), cv=3, n_jobs=1, ensemble=True)

fold_ = 2
Expand Down Expand Up @@ -290,3 +290,79 @@ def make_AdaSTEMRegressor_Hurdle_for_AdaSTEM(fold_=2, min_req=1, **kwargs):
)

return model


def make_STEMClassifier_caliP(fold_=2, min_req=1, **kwargs):
model = STEMClassifier(
base_model=CalibratedClassifierCV(XGBClassifier(tree_method="hist", random_state=42, verbosity=0, n_jobs=1), cv=3, n_jobs=1, ensemble=True),
save_gridding_plot=True,
ensemble_fold=fold_,
min_ensemble_required=min_req,
grid_len=30,
temporal_start=1,
temporal_end=366,
temporal_step=40,
temporal_bin_interval=80,
points_lower_threshold=30,
Spatio1="longitude",
Spatio2="latitude",
Temporal1="DOY",
temporal_bin_start_jitter="adaptive",
spatio_bin_jitter_magnitude="adaptive",
use_temporal_to_train=True,
n_jobs=1,
min_class_sample=3,
**kwargs
)

return model

def make_SphereAdaClassifier_caliP(fold_=2, min_req=1, **kwargs):
model = SphereAdaSTEMClassifier(
base_model=CalibratedClassifierCV(XGBClassifier(tree_method="hist", random_state=42, verbosity=0, n_jobs=1), cv=3, n_jobs=1, ensemble=True),
save_gridding_plot=True,
ensemble_fold=fold_,
min_ensemble_required=min_req,
grid_len_upper_threshold=8000,
grid_len_lower_threshold=500,
temporal_start=1,
temporal_end=366,
temporal_step=40,
temporal_bin_interval=80,
points_lower_threshold=30,
Spatio1="longitude",
Spatio2="latitude",
Temporal1="DOY",
temporal_bin_start_jitter="adaptive",
spatio_bin_jitter_magnitude="adaptive",
use_temporal_to_train=True,
n_jobs=1,
min_class_sample=3,
**kwargs
)
return model

def make_AdaSTEMClassifier_caliP(fold_=2, min_req=1, **kwargs):
model = AdaSTEMClassifier(
base_model=CalibratedClassifierCV(XGBClassifier(tree_method="hist", random_state=42, verbosity=0, n_jobs=1), cv=3, n_jobs=1, ensemble=True),
save_gridding_plot=True,
ensemble_fold=fold_,
min_ensemble_required=min_req,
grid_len_upper_threshold=50,
grid_len_lower_threshold=20,
temporal_start=1,
temporal_end=366,
temporal_step=40,
temporal_bin_interval=80,
points_lower_threshold=30,
Spatio1="longitude",
Spatio2="latitude",
Temporal1="DOY",
temporal_bin_start_jitter="adaptive",
spatio_bin_jitter_magnitude="adaptive",
use_temporal_to_train=True,
n_jobs=1,
min_class_sample=3,
**kwargs
)
return model
75 changes: 75 additions & 0 deletions tests/test_model_prediction_logit_aggregation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import numpy as np
import pandas as pd

from stemflow.model.AdaSTEM import AdaSTEM
from stemflow.model_selection import ST_train_test_split

from .make_models import (
make_AdaSTEMClassifier_caliP,
make_SphereAdaClassifier_caliP,
make_STEMClassifier_caliP,
)
from .set_up_data import set_up_data

x_names, (X, y) = set_up_data()
X_train, X_test, y_train, y_test = ST_train_test_split(
X, y, Spatio_blocks_count=100, Temporal_blocks_count=100, random_state=42, test_size=0.3
)


def test_STEMClassifier_caliP():
model = make_STEMClassifier_caliP()
model = model.fit(X_train, np.where(y_train > 0, 1, 0))

pred_mean, pred_std = model.predict(X_test.reset_index(drop=True), return_std=True, verbosity=1, n_jobs=1, logit_agg=True)
assert np.sum(~np.isnan(pred_mean)) > 0
assert np.sum(~np.isnan(pred_std)) > 0

pred = model.predict(X_test)
assert len(pred) == len(X_test)
assert np.sum(np.isnan(pred)) / len(pred) <= 0.5

pred_df = pd.DataFrame(
{"y_true": y_test.flatten(), "y_pred": np.where(pred.flatten() < 0, 0, pred.flatten())}
).dropna()
assert len(pred_df) > 0

eval = AdaSTEM.eval_STEM_res("classification", pred_df.y_true, pred_df.y_pred)

def test_AdaSTEMClassifier_caliP():
model = make_AdaSTEMClassifier_caliP()
model = model.fit(X_train, np.where(y_train > 0, 1, 0))

pred_mean, pred_std = model.predict(X_test.reset_index(drop=True), return_std=True, verbosity=1, n_jobs=1, logit_agg=True)
assert np.sum(~np.isnan(pred_mean)) > 0
assert np.sum(~np.isnan(pred_std)) > 0

pred = model.predict(X_test)
assert len(pred) == len(X_test)
assert np.sum(np.isnan(pred)) / len(pred) <= 0.5

pred_df = pd.DataFrame(
{"y_true": y_test.flatten(), "y_pred": np.where(pred.flatten() < 0, 0, pred.flatten())}
).dropna()
assert len(pred_df) > 0

eval = AdaSTEM.eval_STEM_res("classification", pred_df.y_true, pred_df.y_pred)

def test_SphereAdaClassifier_caliP():
model = make_SphereAdaClassifier_caliP()
model = model.fit(X_train, np.where(y_train > 0, 1, 0))

pred_mean, pred_std = model.predict(X_test.reset_index(drop=True), return_std=True, verbosity=1, n_jobs=1, logit_agg=True)
assert np.sum(~np.isnan(pred_mean)) > 0
assert np.sum(~np.isnan(pred_std)) > 0

pred = model.predict(X_test)
assert len(pred) == len(X_test)
assert np.sum(np.isnan(pred)) / len(pred) <= 0.5

pred_df = pd.DataFrame(
{"y_true": y_test.flatten(), "y_pred": np.where(pred.flatten() < 0, 0, pred.flatten())}
).dropna()
assert len(pred_df) > 0

eval = AdaSTEM.eval_STEM_res("classification", pred_df.y_true, pred_df.y_pred)

0 comments on commit 48d62c8

Please sign in to comment.