diff --git a/.github/workflows/publish.yaml b/.github/workflows/publish.yaml
new file mode 100644
index 0000000000..94543c1a11
--- /dev/null
+++ b/.github/workflows/publish.yaml
@@ -0,0 +1,39 @@
+# To create a release, create a tag and push it to GitHub:
+#git tag -a "v0.0.1-beta" -m "beta version testing"
+#git push --tags
+# https://dev.to/iamtekson/publish-package-to-pypi-and-release-new-version-using-github-actions-108k
+name: Publish LitGPT to PyPI
+
+on:
+  push:
+    tags:
+      - "v*"
+jobs:
+  build-n-publish:
+    name: Build and publish to PyPI
+    runs-on: ubuntu-latest
+    environment:
+      name: pypi
+      url: https://pypi.org/p/litgpt
+    permissions:
+      id-token: write
+
+    steps:
+      - name: Checkout source
+        uses: actions/checkout@v3
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.x"
+
+      - name: Build source and wheel distributions
+        run: |
+          python -m pip install --upgrade build twine
+          python -m build
+          twine check --strict dist/*
+      - name: Publish distribution to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1
+        with:
+          user: __token__
+          password: ${{ secrets.PYPI_API_TOKEN }}
diff --git a/.gitignore b/.gitignore
index a2e84c57ad..d16dd90cdd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -16,3 +16,7 @@ checkpoints
 out
 wandb
 events.out.tfevents*
+
+# test artifacts from tests/test_readme.py
+tests/custom_finetuning_dataset.json
+tests/custom_texts
\ No newline at end of file
diff --git a/litgpt/chat/base.py b/litgpt/chat/base.py
index 9fa27d2ba3..6fd614a1fa 100644
--- a/litgpt/chat/base.py
+++ b/litgpt/chat/base.py
@@ -219,9 +219,3 @@ def main(
             file=sys.stderr,
         )
         fabric.print()
-
-
-if __name__ == "__main__":
-    torch.set_float32_matmul_precision("high")
-
-    CLI(main)
diff --git a/litgpt/deploy/serve.py b/litgpt/deploy/serve.py
index 863bdedb8b..ff89b0d60b 100644
--- a/litgpt/deploy/serve.py
+++ b/litgpt/deploy/serve.py
@@ -162,7 +162,3 @@ def run_server(
         devices=devices)
 
     server.run(port=port)
-
-
-if __name__ == "__main__":
-    CLI(run_server)
diff --git a/litgpt/eval/evaluate.py b/litgpt/eval/evaluate.py
index 29791630dd..bc6e01619f 100644
--- a/litgpt/eval/evaluate.py
+++ b/litgpt/eval/evaluate.py
@@ -112,7 +112,3 @@ def convert_and_evaluate(
         torch_random_seed=seed,
     )
     prepare_results(results, save_filepath)
-
-
-if __name__ == "__main__":
-    CLI(convert_and_evaluate)
diff --git a/litgpt/finetune/adapter.py b/litgpt/finetune/adapter.py
index 0e59011d9d..bde742f3db 100644
--- a/litgpt/finetune/adapter.py
+++ b/litgpt/finetune/adapter.py
@@ -22,7 +22,6 @@
 from litgpt.prompts import save_prompt_style
 from litgpt.tokenizer import Tokenizer
 from litgpt.utils import (
-    CLI,
     CycleIterator,
     check_valid_checkpoint_dir,
     choose_logger,
@@ -438,9 +437,3 @@ def validate_args(train: TrainArgs, eval: EvalArgs) -> None:
         issues.append(f"{__file__} requires either epochs or max_steps to be set. This is set in {train}")
     if issues:
         raise ValueError("\n".join(issues))
-
-
-if __name__ == "__main__":
-    torch.set_float32_matmul_precision("high")
-
-    CLI(setup)
diff --git a/litgpt/finetune/adapter_v2.py b/litgpt/finetune/adapter_v2.py
index b967c691be..13320d9cfe 100644
--- a/litgpt/finetune/adapter_v2.py
+++ b/litgpt/finetune/adapter_v2.py
@@ -22,7 +22,6 @@
 from litgpt.prompts import save_prompt_style
 from litgpt.tokenizer import Tokenizer
 from litgpt.utils import (
-    CLI,
     CycleIterator,
     check_valid_checkpoint_dir,
     choose_logger,
@@ -439,9 +438,3 @@ def validate_args(train: TrainArgs, eval: EvalArgs) -> None:
         issues.append(f"{__file__} requires either epochs or max_steps to be set. This is set in {train}")
     if issues:
         raise ValueError("\n".join(issues))
-
-
-if __name__ == "__main__":
-    torch.set_float32_matmul_precision("high")
-
-    CLI(setup)
diff --git a/litgpt/finetune/full.py b/litgpt/finetune/full.py
index bec98bbf7b..0e7da1a3d8 100644
--- a/litgpt/finetune/full.py
+++ b/litgpt/finetune/full.py
@@ -20,7 +20,6 @@
 from litgpt.prompts import save_prompt_style
 from litgpt.tokenizer import Tokenizer
 from litgpt.utils import (
-    CLI,
     CycleIterator,
     check_valid_checkpoint_dir,
     choose_logger,
@@ -421,8 +420,3 @@ def validate_args(train: TrainArgs, eval: EvalArgs) -> None:
     if issues:
         raise ValueError("\n".join(issues))
 
-
-if __name__ == "__main__":
-    torch.set_float32_matmul_precision("high")
-
-    CLI(setup)
diff --git a/litgpt/finetune/lora.py b/litgpt/finetune/lora.py
index 32295f2527..f7c459ecde 100644
--- a/litgpt/finetune/lora.py
+++ b/litgpt/finetune/lora.py
@@ -23,7 +23,6 @@
 from litgpt.scripts.merge_lora import merge_lora
 from litgpt.tokenizer import Tokenizer
 from litgpt.utils import (
-    CLI,
     CycleIterator,
     check_valid_checkpoint_dir,
     choose_logger,
@@ -476,9 +475,3 @@ def validate_args(train: TrainArgs, eval: EvalArgs) -> None:
         issues.append(f"{__file__} requires either epochs or max_steps to be set. This is set in {train}")
     if issues:
         raise ValueError("\n".join(issues))
-
-
-if __name__ == "__main__":
-    torch.set_float32_matmul_precision("high")
-
-    CLI(setup)
diff --git a/litgpt/generate/adapter.py b/litgpt/generate/adapter.py
index 10e882dbbf..9ac601dbf6 100644
--- a/litgpt/generate/adapter.py
+++ b/litgpt/generate/adapter.py
@@ -123,9 +123,3 @@ def main(
     fabric.print(f"\n\nTime for inference: {t:.02f} sec total, {tokens_generated / t:.02f} tokens/sec", file=sys.stderr)
     if fabric.device.type == "cuda":
         fabric.print(f"Memory used: {torch.cuda.max_memory_allocated() / 1e9:.02f} GB", file=sys.stderr)
-
-
-if __name__ == "__main__":
-    torch.set_float32_matmul_precision("high")
-
-    CLI(main)
diff --git a/litgpt/generate/adapter_v2.py b/litgpt/generate/adapter_v2.py
index 4f6406080c..d1c343fb94 100644
--- a/litgpt/generate/adapter_v2.py
+++ b/litgpt/generate/adapter_v2.py
@@ -13,7 +13,7 @@
 from litgpt.adapter_v2 import GPT, Config
 from litgpt.generate.base import generate
 from litgpt.prompts import has_prompt_style, load_prompt_style
-from litgpt.utils import CLI, check_valid_checkpoint_dir, get_default_supported_precision, lazy_load
+from litgpt.utils import check_valid_checkpoint_dir, get_default_supported_precision, lazy_load
 
 
 def main(
@@ -123,9 +123,3 @@ def main(
     fabric.print(f"\n\nTime for inference: {t:.02f} sec total, {tokens_generated / t:.02f} tokens/sec", file=sys.stderr)
     if fabric.device.type == "cuda":
         fabric.print(f"Memory used: {torch.cuda.max_memory_allocated() / 1e9:.02f} GB", file=sys.stderr)
-
-
-if __name__ == "__main__":
-    torch.set_float32_matmul_precision("high")
-
-    CLI(main)
diff --git a/litgpt/generate/base.py b/litgpt/generate/base.py
index 2b0f1b06de..50d6397de6 100644
--- a/litgpt/generate/base.py
+++ b/litgpt/generate/base.py
@@ -242,9 +242,3 @@ def main(
         )
     if fabric.device.type == "cuda":
         fabric.print(f"Memory used: {torch.cuda.max_memory_allocated() / 1e9:.02f} GB", file=sys.stderr)
-
-
-if __name__ == "__main__":
-    torch.set_float32_matmul_precision("high")
-
-    CLI(main)
diff --git a/litgpt/generate/full.py b/litgpt/generate/full.py
index 56a4e7975d..3ac060a3b4 100644
--- a/litgpt/generate/full.py
+++ b/litgpt/generate/full.py
@@ -12,7 +12,7 @@
 from litgpt import GPT, Config, PromptStyle, Tokenizer
 from litgpt.generate.base import generate
 from litgpt.prompts import has_prompt_style, load_prompt_style
-from litgpt.utils import CLI, check_valid_checkpoint_dir, get_default_supported_precision, load_checkpoint
+from litgpt.utils import check_valid_checkpoint_dir, get_default_supported_precision, load_checkpoint
 
 
 def main(
@@ -119,9 +119,3 @@ def main(
     fabric.print(f"\n\nTime for inference: {t:.02f} sec total, {tokens_generated / t:.02f} tokens/sec", file=sys.stderr)
     if fabric.device.type == "cuda":
         fabric.print(f"Memory used: {torch.cuda.max_memory_allocated() / 1e9:.02f} GB", file=sys.stderr)
-
-
-if __name__ == "__main__":
-    torch.set_float32_matmul_precision("high")
-
-    CLI(main)
diff --git a/litgpt/generate/sequentially.py b/litgpt/generate/sequentially.py
index 1d1908d088..d3d5250c30 100644
--- a/litgpt/generate/sequentially.py
+++ b/litgpt/generate/sequentially.py
@@ -20,7 +20,7 @@
 import litgpt.generate.base as generate_base
 from litgpt import GPT, Config, Tokenizer
 from litgpt.model import Block, build_mask_cache
-from litgpt.utils import CLI, check_valid_checkpoint_dir, get_default_supported_precision
+from litgpt.utils import check_valid_checkpoint_dir, get_default_supported_precision
 
 
 @torch.inference_mode()
@@ -232,9 +232,3 @@ def main(
             f"Time for inference {i + 1}: {t:.02f} sec total, {tokens_generated / t:.02f} tokens/sec", file=sys.stderr
         )
     print(f"Memory used: {torch.cuda.max_memory_allocated() / 1e9:.02f} GB", file=sys.stderr)
-
-
-if __name__ == "__main__":
-    torch.set_float32_matmul_precision("high")
-
-    CLI(main)
diff --git a/litgpt/generate/tp.py b/litgpt/generate/tp.py
index 39d6ac1065..d8439a220e 100644
--- a/litgpt/generate/tp.py
+++ b/litgpt/generate/tp.py
@@ -226,9 +226,3 @@ def main(
         )
     if fabric.device.type == "cuda":
         fabric.print(f"Memory used: {torch.cuda.max_memory_allocated() / 1e9:.02f} GB", file=sys.stderr)
-
-
-if __name__ == "__main__":
-    torch.set_float32_matmul_precision("high")
-
-    CLI(main)
diff --git a/litgpt/pretrain.py b/litgpt/pretrain.py
index 78892206f6..13b5e4898f 100644
--- a/litgpt/pretrain.py
+++ b/litgpt/pretrain.py
@@ -488,8 +488,3 @@ def validate_args(train: TrainArgs, eval: EvalArgs, initial_checkpoint_dir, resu
     if issues:
         raise ValueError("\n".join(issues))
 
-
-if __name__ == "__main__":
-    torch.set_float32_matmul_precision("high")
-
-    CLI(setup)
diff --git a/litgpt/scripts/convert_hf_checkpoint.py b/litgpt/scripts/convert_hf_checkpoint.py
index 9fdb337ee4..653d55ade5 100644
--- a/litgpt/scripts/convert_hf_checkpoint.py
+++ b/litgpt/scripts/convert_hf_checkpoint.py
@@ -349,9 +349,3 @@ def convert_hf_checkpoint(
         gc.collect()
         print(f"Saving converted checkpoint to {checkpoint_dir}")
         saver.save(sd)
-
-
-if __name__ == "__main__":
-    from jsonargparse import CLI
-
-    CLI(convert_hf_checkpoint)
diff --git a/litgpt/scripts/convert_lit_checkpoint.py b/litgpt/scripts/convert_lit_checkpoint.py
index d18100249d..e8cc6931bf 100644
--- a/litgpt/scripts/convert_lit_checkpoint.py
+++ b/litgpt/scripts/convert_lit_checkpoint.py
@@ -10,7 +10,7 @@
 
 from litgpt import Config
 from litgpt.scripts.convert_hf_checkpoint import layer_template, load_param
-from litgpt.utils import CLI, incremental_save, lazy_load
+from litgpt.utils import incremental_save, lazy_load
 
 
 def copy_weights_falcon(
@@ -265,7 +265,3 @@ def convert_lit_checkpoint(checkpoint_dir: Path, output_dir: Path) -> None:
         copy_fn(sd, lit_weights, saver=saver)
         gc.collect()
         saver.save(sd)
-
-
-if __name__ == "__main__":
-    CLI(convert_lit_checkpoint)
diff --git a/litgpt/scripts/convert_pretrained_checkpoint.py b/litgpt/scripts/convert_pretrained_checkpoint.py
index 3bbb4f5291..7e7541ef99 100644
--- a/litgpt/scripts/convert_pretrained_checkpoint.py
+++ b/litgpt/scripts/convert_pretrained_checkpoint.py
@@ -46,7 +46,3 @@ def convert_pretrained_checkpoint(checkpoint_dir: Path, output_dir: Path) -> Non
         saver.save(converted_state_dict)
 
     copy_config_files(checkpoint_dir, output_dir)
-
-
-if __name__ == "__main__":
-    CLI(convert_pretrained_checkpoint)
diff --git a/litgpt/scripts/download.py b/litgpt/scripts/download.py
index f360c0b3b0..72cd0f7845 100644
--- a/litgpt/scripts/download.py
+++ b/litgpt/scripts/download.py
@@ -144,7 +144,3 @@ def gated_repo_catcher(repo_id: str, access_token: Optional[str]):
                     f" visit https://huggingface.co/{repo_id} for more information."
                 ) from None
         raise e from None
-
-
-if __name__ == "__main__":
-    CLI(download_from_hub)
diff --git a/litgpt/scripts/merge_lora.py b/litgpt/scripts/merge_lora.py
index aff59daef4..7491fb8b7a 100644
--- a/litgpt/scripts/merge_lora.py
+++ b/litgpt/scripts/merge_lora.py
@@ -43,16 +43,23 @@ def merge_lora(
     fabric = L.Fabric(devices=1, precision=precision, accelerator="cpu")
     config = Config.from_file(checkpoint_dir / "model_config.yaml", **lora_params)
 
-    with fabric.init_module():
+    with fabric.init_module(), torch.device("meta"):
         model = GPT(config)
+        # we don't care about these to perform merging
+        model.cos = None
+        model.sin = None
 
     lora_path = checkpoint_dir / "lit_model.pth.lora"
     pretrained_checkpoint = torch.load(str(pretrained_checkpoint_dir / "lit_model.pth"), mmap=True)
     lora_checkpoint = torch.load(str(lora_path), mmap=True)
+    lora_checkpoint = lora_checkpoint.get("model", lora_checkpoint)
 
     # Merge LoRA weights into the base model
-    pretrained_checkpoint.update(lora_checkpoint.get("model", lora_checkpoint))
-    model.load_state_dict(pretrained_checkpoint)
+    pretrained_checkpoint.update(lora_checkpoint)
+    model.load_state_dict(pretrained_checkpoint, assign=True)
+    # since LoRA finetuning only saves the LoRA weights, we treat the lora weights dtype as the expected dtype
+    lora_dtype = next(iter(lora_checkpoint.values())).dtype
+    model.to(dtype=lora_dtype, device="cpu")
     merge_lora_weights(model)
 
     # Remove LoRA parameters and the LoRA linear substring
@@ -79,7 +86,3 @@ def load_lora_metadata(checkpoint_dir: Path) -> Tuple[Dict[str, Any], Path, Opti
     pretrained_checkpoint_dir = Path(hparams["checkpoint_dir"])
     precision = hparams.get("precision")
     return lora_params, pretrained_checkpoint_dir, precision
-
-
-if __name__ == "__main__":
-    CLI(merge_lora)
diff --git a/litgpt/utils.py b/litgpt/utils.py
index b36eaf7b4b..3c57db5fa9 100644
--- a/litgpt/utils.py
+++ b/litgpt/utils.py
@@ -444,6 +444,7 @@ def save_hyperparameters(function: callable, checkpoint_dir: Path) -> None:
         ("finetune", "lora"),
         ("finetune", "adapter"),
         ("finetune", "adapter_v2"),
+        ("finetune",),
         ("pretrain",),
     ]
     for known_command in known_commands:
diff --git a/pyproject.toml b/pyproject.toml
index 40029ae13b..63cc9d6d86 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -37,7 +37,7 @@ all = [
     "sentencepiece>=0.2.0",      # llama-based models
     "tokenizers>=0.15.2",        # pythia, falcon, redpajama
     "requests>=2.31.0",          # litgpt.data
-    "litdata>=0.2.2",            # litgpt.data
+    "litdata>=0.2.2,<0.2.6",     # litgpt.data
     "litserve>=0.1.0",           # litgpt.deploy
     "zstandard>=0.22.0",         # litgpt.data.prepare_slimpajama.py
     "pandas>=1.9.0",             # litgpt.data.prepare_starcoder.py
diff --git a/tests/test_chat.py b/tests/test_chat.py
index 13b432897d..ac6ef5760f 100644
--- a/tests/test_chat.py
+++ b/tests/test_chat.py
@@ -122,13 +122,8 @@ def test_main(mocked_input, stop_iteration, fake_checkpoint_dir, monkeypatch, te
     assert re.match("Now chatting with Llama 3.*>> .*Reply: foo bar baz", out.getvalue(), re.DOTALL)
 
 
-@pytest.mark.parametrize("mode", ["file", "entrypoint"])
-def test_cli(mode):
-    if mode == "file":
-        cli_path = Path(__file__).parent.parent / "litgpt/chat/base.py"
-        args = [sys.executable, cli_path, "-h"]
-    else:
-        args = ["litgpt", "chat", "-h"]
+def test_cli():
+    args = ["litgpt", "chat", "-h"]
     output = subprocess.check_output(args)
     output = str(output.decode())
     assert "Starts a conversation" in output
diff --git a/tests/test_evaluate.py b/tests/test_evaluate.py
index 12f8a68f9c..3b6c56ae1b 100644
--- a/tests/test_evaluate.py
+++ b/tests/test_evaluate.py
@@ -1,14 +1,11 @@
 # Copyright Lightning AI. Licensed under the Apache License 2.0, see LICENSE file.
 
 import subprocess
-import sys
 from contextlib import redirect_stdout
 from dataclasses import asdict
 from io import StringIO
-from pathlib import Path
 from unittest import mock
 
-import pytest
 import torch
 import yaml
 
@@ -43,13 +40,8 @@ def test_evaluate_script(tmp_path):
     assert "Loading checkpoint shards" not in stdout
 
 
-@pytest.mark.parametrize("mode", ["file", "entrypoint"])
-def test_cli(mode):
-    if mode == "file":
-        cli_path = Path(__file__).parent.parent / "litgpt/eval/evaluate.py"
-        args = [sys.executable, cli_path, "-h"]
-    else:
-        args = ["litgpt", "evaluate", "-h"]
+def test_cli():
+    args = ["litgpt", "evaluate", "-h"]
     output = subprocess.check_output(args)
     output = str(output.decode())
     assert "run the LM Evaluation Harness" in output
diff --git a/tests/test_generate.py b/tests/test_generate.py
index 7cd0dca9db..430cd6ada0 100644
--- a/tests/test_generate.py
+++ b/tests/test_generate.py
@@ -83,13 +83,8 @@ def test_main(fake_checkpoint_dir, monkeypatch, tensor_like):
     assert "'padded_vocab_size': 512, 'n_layer': 2, 'n_head': 4" in err.getvalue()
 
 
-@pytest.mark.parametrize("mode", ["file", "entrypoint"])
-def test_cli(mode):
-    if mode == "file":
-        cli_path = Path(__file__).parent.parent / "litgpt/generate/base.py"
-        args = [sys.executable, cli_path, "-h"]
-    else:
-        args = ["litgpt", "generate", "base", "-h"]
+def test_cli():
+    args = ["litgpt", "generate", "base", "-h"]
     output = subprocess.check_output(args)
     output = str(output.decode())
     assert "Generates text samples" in output
diff --git a/tests/test_generate_adapter.py b/tests/test_generate_adapter.py
index e977b0a93b..0df5fb67d2 100644
--- a/tests/test_generate_adapter.py
+++ b/tests/test_generate_adapter.py
@@ -48,13 +48,8 @@ def test_main(fake_checkpoint_dir, monkeypatch, version, tensor_like):
 
 
 @pytest.mark.parametrize("version", ("", "_v2"))
-@pytest.mark.parametrize("mode", ["file", "entrypoint"])
-def test_cli(version, mode):
-    if mode == "file":
-        cli_path = Path(__file__).parent.parent / f"litgpt/generate/adapter{version}.py"
-        args = [sys.executable, cli_path, "-h"]
-    else:
-        args = ["litgpt", "generate", f"adapter{version}", "-h"]
+def test_cli(version):
+    args = ["litgpt", "generate", f"adapter{version}", "-h"]
     output = subprocess.check_output(args)
     output = str(output.decode())
     assert "Generates a response" in output
diff --git a/tests/test_generate_sequentially.py b/tests/test_generate_sequentially.py
index b0bed4797e..98b209f325 100644
--- a/tests/test_generate_sequentially.py
+++ b/tests/test_generate_sequentially.py
@@ -285,22 +285,17 @@ def test_base_with_sequentially(tmp_path):
         f"--checkpoint_dir={str(checkpoint_dir)}",
     ]
     env = {"CUDA_VISIBLE_DEVICES": "0,1"}
-    base_stdout = subprocess.check_output([sys.executable, root / "litgpt/generate/base.py", *args], env=env).decode()
+    base_stdout = subprocess.check_output([sys.executable, "-m", "litgpt", "generate", "base", *args], env=env, cwd=root).decode()
     sequential_stdout = subprocess.check_output(
-        [sys.executable, root / "litgpt/generate/sequentially.py", *args], env=env
+        [sys.executable, "-m", "litgpt", "generate", "sequentially", *args], env=env, cwd=root,
     ).decode()
 
     assert base_stdout.startswith("What food do llamas eat?")
     assert base_stdout == sequential_stdout
 
 
-@pytest.mark.parametrize("mode", ["file", "entrypoint"])
-def test_cli(mode):
-    if mode == "file":
-        cli_path = Path(__file__).parent.parent / "litgpt/generate/sequentially.py"
-        args = [sys.executable, cli_path, "-h"]
-    else:
-        args = ["litgpt", "generate", "sequentially", "-h"]
+def test_cli():
+    args = ["litgpt", "generate", "sequentially", "-h"]
     output = subprocess.check_output(args)
     output = str(output.decode())
     assert "Generates text samples" in output
diff --git a/tests/test_generate_tp.py b/tests/test_generate_tp.py
index 039dd0ea4b..817f7a3a71 100644
--- a/tests/test_generate_tp.py
+++ b/tests/test_generate_tp.py
@@ -124,19 +124,14 @@ def test_tp(tmp_path):
         f"--checkpoint_dir={str(checkpoint_dir)}",
     ]
     env = {"CUDA_VISIBLE_DEVICES": "0,1"}
-    tp_stdout = subprocess.check_output([sys.executable, root / "litgpt/generate/tp.py", *args], env=env).decode()
+    tp_stdout = subprocess.check_output([sys.executable, "-m", "litgpt", "generate", "tp", *args], env=env, cwd=root).decode()
 
     # there is some unaccounted randomness so cannot compare the output with that of `generate/base.py`
     assert tp_stdout.startswith("What food do llamas eat?")
 
 
-@pytest.mark.parametrize("mode", ["file", "entrypoint"])
-def test_cli(mode):
-    if mode == "file":
-        cli_path = Path(__file__).parent.parent / "litgpt/generate/tp.py"
-        args = [sys.executable, cli_path, "-h"]
-    else:
-        args = ["litgpt", "generate", "tp", "-h"]
+def test_cli():
+    args = ["litgpt", "generate", "tp", "-h"]
     output = subprocess.check_output(args)
     output = str(output.decode())
     assert "Generates text samples" in output
diff --git a/tests/test_lora.py b/tests/test_lora.py
index d283f1cf44..8f4edba90a 100644
--- a/tests/test_lora.py
+++ b/tests/test_lora.py
@@ -758,3 +758,13 @@ def test_lora_model_fsdp_init():
     model = fabric.setup(model)
     y = model(x)
     assert y.shape == torch.Size([2, 8, 512])
+
+    # verify that all the parameters, buffers and other attributes aren't on `meta` device
+    for m in model.modules():
+        for p_name, parameter in m.named_parameters():
+            assert not parameter.is_meta, f"Parameter `{p_name}` isn't materialized."
+        for b_name, buffer in m._buffers.items():
+            assert not buffer.is_meta, f"Buffer `{b_name}` isn't materialized."
+        for attr_name, attr_value in m.__dict__.items():
+            if isinstance(attr_value, torch.Tensor):
+                assert not attr_value.is_meta, f"Attribute `{attr_name}` isn't materialized."
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 9770bf98e7..554929c77f 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -267,6 +267,7 @@ def _test_function2(out_dir: Path, foo: bool = False, bar: int = 1):
     "command",
     [
         "any.py",
+        "litgpt finetune",
         "litgpt finetune full",
         "litgpt finetune lora",
         "litgpt finetune adapter",