diff --git a/CHANGELOG.md b/CHANGELOG.md index c44d87a9..73c2c12f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Added +- Added device for GBDTs training ([#396](https://github.com/pyg-team/pytorch-frame/pull/396)) - Added light-weight MLP ([#372](https://github.com/pyg-team/pytorch-frame/pull/372)) ### Changed diff --git a/test/gbdt/test_gbdt.py b/test/gbdt/test_gbdt.py index 42787976..010d5839 100644 --- a/test/gbdt/test_gbdt.py +++ b/test/gbdt/test_gbdt.py @@ -30,7 +30,11 @@ (TaskType.BINARY_CLASSIFICATION, Metric.ROCAUC), (TaskType.MULTICLASS_CLASSIFICATION, Metric.ACCURACY), ]) -def test_gbdt_with_save_load(gbdt_cls, stypes, task_type_and_metric): +@pytest.mark.parametrize('device', [ + 'cpu', + 'gpu', +]) +def test_gbdt_with_save_load(gbdt_cls, stypes, task_type_and_metric, device): task_type, metric = task_type_and_metric dataset: Dataset = FakeDataset( num_rows=30, @@ -47,6 +51,7 @@ def test_gbdt_with_save_load(gbdt_cls, stypes, task_type_and_metric): num_classes=dataset.num_classes if task_type == TaskType.MULTICLASS_CLASSIFICATION else None, metric=metric, + device=device, ) with tempfile.TemporaryDirectory() as temp_dir: diff --git a/torch_frame/gbdt/gbdt.py b/torch_frame/gbdt/gbdt.py index b2aafc5c..7424b4d7 100644 --- a/torch_frame/gbdt/gbdt.py +++ b/torch_frame/gbdt/gbdt.py @@ -29,16 +29,21 @@ class GBDT: :obj:`Metric.RMSE` for regression, :obj:`Metric.ROCAUC` for binary classification, and :obj:`Metric.ACCURACY` for multi- class classification. (default: :obj:`None`). + device (str): The device for GBDTs to train on. (default: :obj:`cpu`). """ def __init__( self, task_type: TaskType, num_classes: int | None = None, metric: Metric | None = None, + device: str = 'cpu', ): self.task_type = task_type self._is_fitted: bool = False self._num_classes = num_classes + self._device: str = device + + assert self._device in ['cpu', 'gpu'] # Set up metric self.metric = DEFAULT_METRIC[task_type] diff --git a/torch_frame/gbdt/tuned_catboost.py b/torch_frame/gbdt/tuned_catboost.py index cc2659f3..8a9f1953 100644 --- a/torch_frame/gbdt/tuned_catboost.py +++ b/torch_frame/gbdt/tuned_catboost.py @@ -142,12 +142,8 @@ def objective( num_boost_round, "depth": trial.suggest_int("depth", 3, 11), - "boosting_type": - trial.suggest_categorical("boosting_type", ["Ordered", "Plain"]), "bagging_temperature": trial.suggest_float("bagging_temperature", 0, 1), - "colsample_bylevel": - trial.suggest_float("colsample_bylevel", 0.01, 0.1), "leaf_estimation_iterations": trial.suggest_int("leaf_estimation_iterations", 1, 11), "l2_leaf_reg": @@ -176,6 +172,21 @@ def objective( else: raise ValueError(f"{self.__class__.__name__} is not supported for " f"{self.task_type}.") + + if self._device == 'gpu': + self.params['task_type'] = 'GPU' + if self.task_type == TaskType.MULTICLASS_CLASSIFICATION: + self.params['boosting_type'] = 'Plain' + else: + self.params['boosting_type'] = trial.suggest_categorical( + "boosting_type", ["Ordered", "Plain"]) + elif self._device == 'cpu': + self.params['task_type'] = 'CPU' + self.params['colsample_bylevel'] = trial.suggest_float( + "colsample_bylevel", 0.01, 0.1) + self.params['boosting_type'] = trial.suggest_categorical( + "boosting_type", ["Ordered", "Plain"]) + boost = catboost.CatBoost(self.params) boost = boost.fit(train_x, train_y, cat_features=cat_features, eval_set=[(val_x, val_y)], early_stopping_rounds=50, diff --git a/torch_frame/gbdt/tuned_lightgbm.py b/torch_frame/gbdt/tuned_lightgbm.py index 732ad741..a2598dd5 100644 --- a/torch_frame/gbdt/tuned_lightgbm.py +++ b/torch_frame/gbdt/tuned_lightgbm.py @@ -127,6 +127,8 @@ def objective( -1, "bagging_freq": 1, + "device": + self._device, "max_depth": trial.suggest_int("max_depth", 3, 11), "learning_rate": diff --git a/torch_frame/gbdt/tuned_xgboost.py b/torch_frame/gbdt/tuned_xgboost.py index 9b939d32..400250dc 100644 --- a/torch_frame/gbdt/tuned_xgboost.py +++ b/torch_frame/gbdt/tuned_xgboost.py @@ -118,8 +118,13 @@ def objective( else trial.suggest_float('lambda', 1e-8, 1e2, log=True)), "alpha": (0.0 if not trial.suggest_categorical('use_alpha', [True, False]) - else trial.suggest_float('alpha', 1e-8, 1e2, log=True)) + else trial.suggest_float('alpha', 1e-8, 1e2, log=True)), + "device": + self._device } + if self._device in ['gpu', 'cuda']: + self.params['tree_method'] = 'hist' + if self.params["booster"] == "gbtree" or self.params[ "booster"] == "dart": self.params["max_depth"] = trial.suggest_int("max_depth", 3, 11)