diff --git a/CHANGELOG.md b/CHANGELOG.md
index c44d87a9..73c2c12f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 ### Added
 
+- Added device for GBDTs training ([#396](https://github.com/pyg-team/pytorch-frame/pull/396))
 - Added light-weight MLP ([#372](https://github.com/pyg-team/pytorch-frame/pull/372))
 
 ### Changed
diff --git a/test/gbdt/test_gbdt.py b/test/gbdt/test_gbdt.py
index 42787976..010d5839 100644
--- a/test/gbdt/test_gbdt.py
+++ b/test/gbdt/test_gbdt.py
@@ -30,7 +30,11 @@
     (TaskType.BINARY_CLASSIFICATION, Metric.ROCAUC),
     (TaskType.MULTICLASS_CLASSIFICATION, Metric.ACCURACY),
 ])
-def test_gbdt_with_save_load(gbdt_cls, stypes, task_type_and_metric):
+@pytest.mark.parametrize('device', [
+    'cpu',
+    'gpu',
+])
+def test_gbdt_with_save_load(gbdt_cls, stypes, task_type_and_metric, device):
     task_type, metric = task_type_and_metric
     dataset: Dataset = FakeDataset(
         num_rows=30,
@@ -47,6 +51,7 @@ def test_gbdt_with_save_load(gbdt_cls, stypes, task_type_and_metric):
         num_classes=dataset.num_classes
         if task_type == TaskType.MULTICLASS_CLASSIFICATION else None,
         metric=metric,
+        device=device,
     )
 
     with tempfile.TemporaryDirectory() as temp_dir:
diff --git a/torch_frame/gbdt/gbdt.py b/torch_frame/gbdt/gbdt.py
index b2aafc5c..7424b4d7 100644
--- a/torch_frame/gbdt/gbdt.py
+++ b/torch_frame/gbdt/gbdt.py
@@ -29,16 +29,21 @@ class GBDT:
             :obj:`Metric.RMSE` for regression, :obj:`Metric.ROCAUC` for binary
             classification, and :obj:`Metric.ACCURACY` for multi-
             class classification. (default: :obj:`None`).
+        device (str): The device for GBDTs to train on. (default: :obj:`cpu`).
     """
     def __init__(
         self,
         task_type: TaskType,
         num_classes: int | None = None,
         metric: Metric | None = None,
+        device: str = 'cpu',
     ):
         self.task_type = task_type
         self._is_fitted: bool = False
         self._num_classes = num_classes
+        self._device: str = device
+
+        assert self._device in ['cpu', 'gpu']
 
         # Set up metric
         self.metric = DEFAULT_METRIC[task_type]
diff --git a/torch_frame/gbdt/tuned_catboost.py b/torch_frame/gbdt/tuned_catboost.py
index cc2659f3..8a9f1953 100644
--- a/torch_frame/gbdt/tuned_catboost.py
+++ b/torch_frame/gbdt/tuned_catboost.py
@@ -142,12 +142,8 @@ def objective(
             num_boost_round,
             "depth":
             trial.suggest_int("depth", 3, 11),
-            "boosting_type":
-            trial.suggest_categorical("boosting_type", ["Ordered", "Plain"]),
             "bagging_temperature":
             trial.suggest_float("bagging_temperature", 0, 1),
-            "colsample_bylevel":
-            trial.suggest_float("colsample_bylevel", 0.01, 0.1),
             "leaf_estimation_iterations":
             trial.suggest_int("leaf_estimation_iterations", 1, 11),
             "l2_leaf_reg":
@@ -176,6 +172,21 @@ def objective(
         else:
             raise ValueError(f"{self.__class__.__name__} is not supported for "
                              f"{self.task_type}.")
+
+        if self._device == 'gpu':
+            self.params['task_type'] = 'GPU'
+            if self.task_type == TaskType.MULTICLASS_CLASSIFICATION:
+                self.params['boosting_type'] = 'Plain'
+            else:
+                self.params['boosting_type'] = trial.suggest_categorical(
+                    "boosting_type", ["Ordered", "Plain"])
+        elif self._device == 'cpu':
+            self.params['task_type'] = 'CPU'
+            self.params['colsample_bylevel'] = trial.suggest_float(
+                "colsample_bylevel", 0.01, 0.1)
+            self.params['boosting_type'] = trial.suggest_categorical(
+                "boosting_type", ["Ordered", "Plain"])
+
         boost = catboost.CatBoost(self.params)
         boost = boost.fit(train_x, train_y, cat_features=cat_features,
                           eval_set=[(val_x, val_y)], early_stopping_rounds=50,
diff --git a/torch_frame/gbdt/tuned_lightgbm.py b/torch_frame/gbdt/tuned_lightgbm.py
index 732ad741..a2598dd5 100644
--- a/torch_frame/gbdt/tuned_lightgbm.py
+++ b/torch_frame/gbdt/tuned_lightgbm.py
@@ -127,6 +127,8 @@ def objective(
             -1,
             "bagging_freq":
             1,
+            "device":
+            self._device,
             "max_depth":
             trial.suggest_int("max_depth", 3, 11),
             "learning_rate":
diff --git a/torch_frame/gbdt/tuned_xgboost.py b/torch_frame/gbdt/tuned_xgboost.py
index 9b939d32..400250dc 100644
--- a/torch_frame/gbdt/tuned_xgboost.py
+++ b/torch_frame/gbdt/tuned_xgboost.py
@@ -118,8 +118,13 @@ def objective(
              else trial.suggest_float('lambda', 1e-8, 1e2, log=True)),
             "alpha":
             (0.0 if not trial.suggest_categorical('use_alpha', [True, False])
-             else trial.suggest_float('alpha', 1e-8, 1e2, log=True))
+             else trial.suggest_float('alpha', 1e-8, 1e2, log=True)),
+            "device":
+            self._device
         }
+        if self._device in ['gpu', 'cuda']:
+            self.params['tree_method'] = 'hist'
+
         if self.params["booster"] == "gbtree" or self.params[
                 "booster"] == "dart":
             self.params["max_depth"] = trial.suggest_int("max_depth", 3, 11)