diff --git a/.github/workflows/publish.yaml b/.github/workflows/publish.yaml new file mode 100644 index 0000000000..94543c1a11 --- /dev/null +++ b/.github/workflows/publish.yaml @@ -0,0 +1,39 @@ +# To create a release, create a tag and push it to GitHub: +#git tag -a "v0.0.1-beta" -m "beta version testing" +#git push --tags +# https://dev.to/iamtekson/publish-package-to-pypi-and-release-new-version-using-github-actions-108k +name: Publish LitGPT to PyPI + +on: + push: + tags: + - "v*" +jobs: + build-n-publish: + name: Build and publish to PyPI + runs-on: ubuntu-latest + environment: + name: pypi + url: https://pypi.org/p/litgpt + permissions: + id-token: write + + steps: + - name: Checkout source + uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: "3.x" + + - name: Build source and wheel distributions + run: | + python -m pip install --upgrade build twine + python -m build + twine check --strict dist/* + - name: Publish distribution to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + user: __token__ + password: ${{ secrets.PYPI_API_TOKEN }} diff --git a/.gitignore b/.gitignore index a2e84c57ad..d16dd90cdd 100644 --- a/.gitignore +++ b/.gitignore @@ -16,3 +16,7 @@ checkpoints out wandb events.out.tfevents* + +# test artifacts from tests/test_readme.py +tests/custom_finetuning_dataset.json +tests/custom_texts \ No newline at end of file diff --git a/litgpt/chat/base.py b/litgpt/chat/base.py index 9fa27d2ba3..6fd614a1fa 100644 --- a/litgpt/chat/base.py +++ b/litgpt/chat/base.py @@ -219,9 +219,3 @@ def main( file=sys.stderr, ) fabric.print() - - -if __name__ == "__main__": - torch.set_float32_matmul_precision("high") - - CLI(main) diff --git a/litgpt/deploy/serve.py b/litgpt/deploy/serve.py index 863bdedb8b..ff89b0d60b 100644 --- a/litgpt/deploy/serve.py +++ b/litgpt/deploy/serve.py @@ -162,7 +162,3 @@ def run_server( devices=devices) server.run(port=port) - - -if __name__ == "__main__": - CLI(run_server) diff --git a/litgpt/eval/evaluate.py b/litgpt/eval/evaluate.py index 29791630dd..bc6e01619f 100644 --- a/litgpt/eval/evaluate.py +++ b/litgpt/eval/evaluate.py @@ -112,7 +112,3 @@ def convert_and_evaluate( torch_random_seed=seed, ) prepare_results(results, save_filepath) - - -if __name__ == "__main__": - CLI(convert_and_evaluate) diff --git a/litgpt/finetune/adapter.py b/litgpt/finetune/adapter.py index 0e59011d9d..bde742f3db 100644 --- a/litgpt/finetune/adapter.py +++ b/litgpt/finetune/adapter.py @@ -22,7 +22,6 @@ from litgpt.prompts import save_prompt_style from litgpt.tokenizer import Tokenizer from litgpt.utils import ( - CLI, CycleIterator, check_valid_checkpoint_dir, choose_logger, @@ -438,9 +437,3 @@ def validate_args(train: TrainArgs, eval: EvalArgs) -> None: issues.append(f"{__file__} requires either epochs or max_steps to be set. This is set in {train}") if issues: raise ValueError("\n".join(issues)) - - -if __name__ == "__main__": - torch.set_float32_matmul_precision("high") - - CLI(setup) diff --git a/litgpt/finetune/adapter_v2.py b/litgpt/finetune/adapter_v2.py index b967c691be..13320d9cfe 100644 --- a/litgpt/finetune/adapter_v2.py +++ b/litgpt/finetune/adapter_v2.py @@ -22,7 +22,6 @@ from litgpt.prompts import save_prompt_style from litgpt.tokenizer import Tokenizer from litgpt.utils import ( - CLI, CycleIterator, check_valid_checkpoint_dir, choose_logger, @@ -439,9 +438,3 @@ def validate_args(train: TrainArgs, eval: EvalArgs) -> None: issues.append(f"{__file__} requires either epochs or max_steps to be set. This is set in {train}") if issues: raise ValueError("\n".join(issues)) - - -if __name__ == "__main__": - torch.set_float32_matmul_precision("high") - - CLI(setup) diff --git a/litgpt/finetune/full.py b/litgpt/finetune/full.py index bec98bbf7b..0e7da1a3d8 100644 --- a/litgpt/finetune/full.py +++ b/litgpt/finetune/full.py @@ -20,7 +20,6 @@ from litgpt.prompts import save_prompt_style from litgpt.tokenizer import Tokenizer from litgpt.utils import ( - CLI, CycleIterator, check_valid_checkpoint_dir, choose_logger, @@ -421,8 +420,3 @@ def validate_args(train: TrainArgs, eval: EvalArgs) -> None: if issues: raise ValueError("\n".join(issues)) - -if __name__ == "__main__": - torch.set_float32_matmul_precision("high") - - CLI(setup) diff --git a/litgpt/finetune/lora.py b/litgpt/finetune/lora.py index 32295f2527..f7c459ecde 100644 --- a/litgpt/finetune/lora.py +++ b/litgpt/finetune/lora.py @@ -23,7 +23,6 @@ from litgpt.scripts.merge_lora import merge_lora from litgpt.tokenizer import Tokenizer from litgpt.utils import ( - CLI, CycleIterator, check_valid_checkpoint_dir, choose_logger, @@ -476,9 +475,3 @@ def validate_args(train: TrainArgs, eval: EvalArgs) -> None: issues.append(f"{__file__} requires either epochs or max_steps to be set. This is set in {train}") if issues: raise ValueError("\n".join(issues)) - - -if __name__ == "__main__": - torch.set_float32_matmul_precision("high") - - CLI(setup) diff --git a/litgpt/generate/adapter.py b/litgpt/generate/adapter.py index 10e882dbbf..9ac601dbf6 100644 --- a/litgpt/generate/adapter.py +++ b/litgpt/generate/adapter.py @@ -123,9 +123,3 @@ def main( fabric.print(f"\n\nTime for inference: {t:.02f} sec total, {tokens_generated / t:.02f} tokens/sec", file=sys.stderr) if fabric.device.type == "cuda": fabric.print(f"Memory used: {torch.cuda.max_memory_allocated() / 1e9:.02f} GB", file=sys.stderr) - - -if __name__ == "__main__": - torch.set_float32_matmul_precision("high") - - CLI(main) diff --git a/litgpt/generate/adapter_v2.py b/litgpt/generate/adapter_v2.py index 4f6406080c..d1c343fb94 100644 --- a/litgpt/generate/adapter_v2.py +++ b/litgpt/generate/adapter_v2.py @@ -13,7 +13,7 @@ from litgpt.adapter_v2 import GPT, Config from litgpt.generate.base import generate from litgpt.prompts import has_prompt_style, load_prompt_style -from litgpt.utils import CLI, check_valid_checkpoint_dir, get_default_supported_precision, lazy_load +from litgpt.utils import check_valid_checkpoint_dir, get_default_supported_precision, lazy_load def main( @@ -123,9 +123,3 @@ def main( fabric.print(f"\n\nTime for inference: {t:.02f} sec total, {tokens_generated / t:.02f} tokens/sec", file=sys.stderr) if fabric.device.type == "cuda": fabric.print(f"Memory used: {torch.cuda.max_memory_allocated() / 1e9:.02f} GB", file=sys.stderr) - - -if __name__ == "__main__": - torch.set_float32_matmul_precision("high") - - CLI(main) diff --git a/litgpt/generate/base.py b/litgpt/generate/base.py index 2b0f1b06de..50d6397de6 100644 --- a/litgpt/generate/base.py +++ b/litgpt/generate/base.py @@ -242,9 +242,3 @@ def main( ) if fabric.device.type == "cuda": fabric.print(f"Memory used: {torch.cuda.max_memory_allocated() / 1e9:.02f} GB", file=sys.stderr) - - -if __name__ == "__main__": - torch.set_float32_matmul_precision("high") - - CLI(main) diff --git a/litgpt/generate/full.py b/litgpt/generate/full.py index 56a4e7975d..3ac060a3b4 100644 --- a/litgpt/generate/full.py +++ b/litgpt/generate/full.py @@ -12,7 +12,7 @@ from litgpt import GPT, Config, PromptStyle, Tokenizer from litgpt.generate.base import generate from litgpt.prompts import has_prompt_style, load_prompt_style -from litgpt.utils import CLI, check_valid_checkpoint_dir, get_default_supported_precision, load_checkpoint +from litgpt.utils import check_valid_checkpoint_dir, get_default_supported_precision, load_checkpoint def main( @@ -119,9 +119,3 @@ def main( fabric.print(f"\n\nTime for inference: {t:.02f} sec total, {tokens_generated / t:.02f} tokens/sec", file=sys.stderr) if fabric.device.type == "cuda": fabric.print(f"Memory used: {torch.cuda.max_memory_allocated() / 1e9:.02f} GB", file=sys.stderr) - - -if __name__ == "__main__": - torch.set_float32_matmul_precision("high") - - CLI(main) diff --git a/litgpt/generate/sequentially.py b/litgpt/generate/sequentially.py index 1d1908d088..d3d5250c30 100644 --- a/litgpt/generate/sequentially.py +++ b/litgpt/generate/sequentially.py @@ -20,7 +20,7 @@ import litgpt.generate.base as generate_base from litgpt import GPT, Config, Tokenizer from litgpt.model import Block, build_mask_cache -from litgpt.utils import CLI, check_valid_checkpoint_dir, get_default_supported_precision +from litgpt.utils import check_valid_checkpoint_dir, get_default_supported_precision @torch.inference_mode() @@ -232,9 +232,3 @@ def main( f"Time for inference {i + 1}: {t:.02f} sec total, {tokens_generated / t:.02f} tokens/sec", file=sys.stderr ) print(f"Memory used: {torch.cuda.max_memory_allocated() / 1e9:.02f} GB", file=sys.stderr) - - -if __name__ == "__main__": - torch.set_float32_matmul_precision("high") - - CLI(main) diff --git a/litgpt/generate/tp.py b/litgpt/generate/tp.py index 39d6ac1065..d8439a220e 100644 --- a/litgpt/generate/tp.py +++ b/litgpt/generate/tp.py @@ -226,9 +226,3 @@ def main( ) if fabric.device.type == "cuda": fabric.print(f"Memory used: {torch.cuda.max_memory_allocated() / 1e9:.02f} GB", file=sys.stderr) - - -if __name__ == "__main__": - torch.set_float32_matmul_precision("high") - - CLI(main) diff --git a/litgpt/pretrain.py b/litgpt/pretrain.py index 78892206f6..13b5e4898f 100644 --- a/litgpt/pretrain.py +++ b/litgpt/pretrain.py @@ -488,8 +488,3 @@ def validate_args(train: TrainArgs, eval: EvalArgs, initial_checkpoint_dir, resu if issues: raise ValueError("\n".join(issues)) - -if __name__ == "__main__": - torch.set_float32_matmul_precision("high") - - CLI(setup) diff --git a/litgpt/scripts/convert_hf_checkpoint.py b/litgpt/scripts/convert_hf_checkpoint.py index 9fdb337ee4..653d55ade5 100644 --- a/litgpt/scripts/convert_hf_checkpoint.py +++ b/litgpt/scripts/convert_hf_checkpoint.py @@ -349,9 +349,3 @@ def convert_hf_checkpoint( gc.collect() print(f"Saving converted checkpoint to {checkpoint_dir}") saver.save(sd) - - -if __name__ == "__main__": - from jsonargparse import CLI - - CLI(convert_hf_checkpoint) diff --git a/litgpt/scripts/convert_lit_checkpoint.py b/litgpt/scripts/convert_lit_checkpoint.py index d18100249d..e8cc6931bf 100644 --- a/litgpt/scripts/convert_lit_checkpoint.py +++ b/litgpt/scripts/convert_lit_checkpoint.py @@ -10,7 +10,7 @@ from litgpt import Config from litgpt.scripts.convert_hf_checkpoint import layer_template, load_param -from litgpt.utils import CLI, incremental_save, lazy_load +from litgpt.utils import incremental_save, lazy_load def copy_weights_falcon( @@ -265,7 +265,3 @@ def convert_lit_checkpoint(checkpoint_dir: Path, output_dir: Path) -> None: copy_fn(sd, lit_weights, saver=saver) gc.collect() saver.save(sd) - - -if __name__ == "__main__": - CLI(convert_lit_checkpoint) diff --git a/litgpt/scripts/convert_pretrained_checkpoint.py b/litgpt/scripts/convert_pretrained_checkpoint.py index 3bbb4f5291..7e7541ef99 100644 --- a/litgpt/scripts/convert_pretrained_checkpoint.py +++ b/litgpt/scripts/convert_pretrained_checkpoint.py @@ -46,7 +46,3 @@ def convert_pretrained_checkpoint(checkpoint_dir: Path, output_dir: Path) -> Non saver.save(converted_state_dict) copy_config_files(checkpoint_dir, output_dir) - - -if __name__ == "__main__": - CLI(convert_pretrained_checkpoint) diff --git a/litgpt/scripts/download.py b/litgpt/scripts/download.py index f360c0b3b0..72cd0f7845 100644 --- a/litgpt/scripts/download.py +++ b/litgpt/scripts/download.py @@ -144,7 +144,3 @@ def gated_repo_catcher(repo_id: str, access_token: Optional[str]): f" visit https://huggingface.co/{repo_id} for more information." ) from None raise e from None - - -if __name__ == "__main__": - CLI(download_from_hub) diff --git a/litgpt/scripts/merge_lora.py b/litgpt/scripts/merge_lora.py index aff59daef4..7491fb8b7a 100644 --- a/litgpt/scripts/merge_lora.py +++ b/litgpt/scripts/merge_lora.py @@ -43,16 +43,23 @@ def merge_lora( fabric = L.Fabric(devices=1, precision=precision, accelerator="cpu") config = Config.from_file(checkpoint_dir / "model_config.yaml", **lora_params) - with fabric.init_module(): + with fabric.init_module(), torch.device("meta"): model = GPT(config) + # we don't care about these to perform merging + model.cos = None + model.sin = None lora_path = checkpoint_dir / "lit_model.pth.lora" pretrained_checkpoint = torch.load(str(pretrained_checkpoint_dir / "lit_model.pth"), mmap=True) lora_checkpoint = torch.load(str(lora_path), mmap=True) + lora_checkpoint = lora_checkpoint.get("model", lora_checkpoint) # Merge LoRA weights into the base model - pretrained_checkpoint.update(lora_checkpoint.get("model", lora_checkpoint)) - model.load_state_dict(pretrained_checkpoint) + pretrained_checkpoint.update(lora_checkpoint) + model.load_state_dict(pretrained_checkpoint, assign=True) + # since LoRA finetuning only saves the LoRA weights, we treat the lora weights dtype as the expected dtype + lora_dtype = next(iter(lora_checkpoint.values())).dtype + model.to(dtype=lora_dtype, device="cpu") merge_lora_weights(model) # Remove LoRA parameters and the LoRA linear substring @@ -79,7 +86,3 @@ def load_lora_metadata(checkpoint_dir: Path) -> Tuple[Dict[str, Any], Path, Opti pretrained_checkpoint_dir = Path(hparams["checkpoint_dir"]) precision = hparams.get("precision") return lora_params, pretrained_checkpoint_dir, precision - - -if __name__ == "__main__": - CLI(merge_lora) diff --git a/litgpt/utils.py b/litgpt/utils.py index b36eaf7b4b..3c57db5fa9 100644 --- a/litgpt/utils.py +++ b/litgpt/utils.py @@ -444,6 +444,7 @@ def save_hyperparameters(function: callable, checkpoint_dir: Path) -> None: ("finetune", "lora"), ("finetune", "adapter"), ("finetune", "adapter_v2"), + ("finetune",), ("pretrain",), ] for known_command in known_commands: diff --git a/pyproject.toml b/pyproject.toml index 40029ae13b..63cc9d6d86 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,7 +37,7 @@ all = [ "sentencepiece>=0.2.0", # llama-based models "tokenizers>=0.15.2", # pythia, falcon, redpajama "requests>=2.31.0", # litgpt.data - "litdata>=0.2.2", # litgpt.data + "litdata>=0.2.2,<0.2.6", # litgpt.data "litserve>=0.1.0", # litgpt.deploy "zstandard>=0.22.0", # litgpt.data.prepare_slimpajama.py "pandas>=1.9.0", # litgpt.data.prepare_starcoder.py diff --git a/tests/test_chat.py b/tests/test_chat.py index 13b432897d..ac6ef5760f 100644 --- a/tests/test_chat.py +++ b/tests/test_chat.py @@ -122,13 +122,8 @@ def test_main(mocked_input, stop_iteration, fake_checkpoint_dir, monkeypatch, te assert re.match("Now chatting with Llama 3.*>> .*Reply: foo bar baz", out.getvalue(), re.DOTALL) -@pytest.mark.parametrize("mode", ["file", "entrypoint"]) -def test_cli(mode): - if mode == "file": - cli_path = Path(__file__).parent.parent / "litgpt/chat/base.py" - args = [sys.executable, cli_path, "-h"] - else: - args = ["litgpt", "chat", "-h"] +def test_cli(): + args = ["litgpt", "chat", "-h"] output = subprocess.check_output(args) output = str(output.decode()) assert "Starts a conversation" in output diff --git a/tests/test_evaluate.py b/tests/test_evaluate.py index 12f8a68f9c..3b6c56ae1b 100644 --- a/tests/test_evaluate.py +++ b/tests/test_evaluate.py @@ -1,14 +1,11 @@ # Copyright Lightning AI. Licensed under the Apache License 2.0, see LICENSE file. import subprocess -import sys from contextlib import redirect_stdout from dataclasses import asdict from io import StringIO -from pathlib import Path from unittest import mock -import pytest import torch import yaml @@ -43,13 +40,8 @@ def test_evaluate_script(tmp_path): assert "Loading checkpoint shards" not in stdout -@pytest.mark.parametrize("mode", ["file", "entrypoint"]) -def test_cli(mode): - if mode == "file": - cli_path = Path(__file__).parent.parent / "litgpt/eval/evaluate.py" - args = [sys.executable, cli_path, "-h"] - else: - args = ["litgpt", "evaluate", "-h"] +def test_cli(): + args = ["litgpt", "evaluate", "-h"] output = subprocess.check_output(args) output = str(output.decode()) assert "run the LM Evaluation Harness" in output diff --git a/tests/test_generate.py b/tests/test_generate.py index 7cd0dca9db..430cd6ada0 100644 --- a/tests/test_generate.py +++ b/tests/test_generate.py @@ -83,13 +83,8 @@ def test_main(fake_checkpoint_dir, monkeypatch, tensor_like): assert "'padded_vocab_size': 512, 'n_layer': 2, 'n_head': 4" in err.getvalue() -@pytest.mark.parametrize("mode", ["file", "entrypoint"]) -def test_cli(mode): - if mode == "file": - cli_path = Path(__file__).parent.parent / "litgpt/generate/base.py" - args = [sys.executable, cli_path, "-h"] - else: - args = ["litgpt", "generate", "base", "-h"] +def test_cli(): + args = ["litgpt", "generate", "base", "-h"] output = subprocess.check_output(args) output = str(output.decode()) assert "Generates text samples" in output diff --git a/tests/test_generate_adapter.py b/tests/test_generate_adapter.py index e977b0a93b..0df5fb67d2 100644 --- a/tests/test_generate_adapter.py +++ b/tests/test_generate_adapter.py @@ -48,13 +48,8 @@ def test_main(fake_checkpoint_dir, monkeypatch, version, tensor_like): @pytest.mark.parametrize("version", ("", "_v2")) -@pytest.mark.parametrize("mode", ["file", "entrypoint"]) -def test_cli(version, mode): - if mode == "file": - cli_path = Path(__file__).parent.parent / f"litgpt/generate/adapter{version}.py" - args = [sys.executable, cli_path, "-h"] - else: - args = ["litgpt", "generate", f"adapter{version}", "-h"] +def test_cli(version): + args = ["litgpt", "generate", f"adapter{version}", "-h"] output = subprocess.check_output(args) output = str(output.decode()) assert "Generates a response" in output diff --git a/tests/test_generate_sequentially.py b/tests/test_generate_sequentially.py index b0bed4797e..98b209f325 100644 --- a/tests/test_generate_sequentially.py +++ b/tests/test_generate_sequentially.py @@ -285,22 +285,17 @@ def test_base_with_sequentially(tmp_path): f"--checkpoint_dir={str(checkpoint_dir)}", ] env = {"CUDA_VISIBLE_DEVICES": "0,1"} - base_stdout = subprocess.check_output([sys.executable, root / "litgpt/generate/base.py", *args], env=env).decode() + base_stdout = subprocess.check_output([sys.executable, "-m", "litgpt", "generate", "base", *args], env=env, cwd=root).decode() sequential_stdout = subprocess.check_output( - [sys.executable, root / "litgpt/generate/sequentially.py", *args], env=env + [sys.executable, "-m", "litgpt", "generate", "sequentially", *args], env=env, cwd=root, ).decode() assert base_stdout.startswith("What food do llamas eat?") assert base_stdout == sequential_stdout -@pytest.mark.parametrize("mode", ["file", "entrypoint"]) -def test_cli(mode): - if mode == "file": - cli_path = Path(__file__).parent.parent / "litgpt/generate/sequentially.py" - args = [sys.executable, cli_path, "-h"] - else: - args = ["litgpt", "generate", "sequentially", "-h"] +def test_cli(): + args = ["litgpt", "generate", "sequentially", "-h"] output = subprocess.check_output(args) output = str(output.decode()) assert "Generates text samples" in output diff --git a/tests/test_generate_tp.py b/tests/test_generate_tp.py index 039dd0ea4b..817f7a3a71 100644 --- a/tests/test_generate_tp.py +++ b/tests/test_generate_tp.py @@ -124,19 +124,14 @@ def test_tp(tmp_path): f"--checkpoint_dir={str(checkpoint_dir)}", ] env = {"CUDA_VISIBLE_DEVICES": "0,1"} - tp_stdout = subprocess.check_output([sys.executable, root / "litgpt/generate/tp.py", *args], env=env).decode() + tp_stdout = subprocess.check_output([sys.executable, "-m", "litgpt", "generate", "tp", *args], env=env, cwd=root).decode() # there is some unaccounted randomness so cannot compare the output with that of `generate/base.py` assert tp_stdout.startswith("What food do llamas eat?") -@pytest.mark.parametrize("mode", ["file", "entrypoint"]) -def test_cli(mode): - if mode == "file": - cli_path = Path(__file__).parent.parent / "litgpt/generate/tp.py" - args = [sys.executable, cli_path, "-h"] - else: - args = ["litgpt", "generate", "tp", "-h"] +def test_cli(): + args = ["litgpt", "generate", "tp", "-h"] output = subprocess.check_output(args) output = str(output.decode()) assert "Generates text samples" in output diff --git a/tests/test_lora.py b/tests/test_lora.py index d283f1cf44..8f4edba90a 100644 --- a/tests/test_lora.py +++ b/tests/test_lora.py @@ -758,3 +758,13 @@ def test_lora_model_fsdp_init(): model = fabric.setup(model) y = model(x) assert y.shape == torch.Size([2, 8, 512]) + + # verify that all the parameters, buffers and other attributes aren't on `meta` device + for m in model.modules(): + for p_name, parameter in m.named_parameters(): + assert not parameter.is_meta, f"Parameter `{p_name}` isn't materialized." + for b_name, buffer in m._buffers.items(): + assert not buffer.is_meta, f"Buffer `{b_name}` isn't materialized." + for attr_name, attr_value in m.__dict__.items(): + if isinstance(attr_value, torch.Tensor): + assert not attr_value.is_meta, f"Attribute `{attr_name}` isn't materialized." diff --git a/tests/test_utils.py b/tests/test_utils.py index 9770bf98e7..554929c77f 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -267,6 +267,7 @@ def _test_function2(out_dir: Path, foo: bool = False, bar: int = 1): "command", [ "any.py", + "litgpt finetune", "litgpt finetune full", "litgpt finetune lora", "litgpt finetune adapter",