diff --git a/.github/workflows/publish.yaml b/.github/workflows/publish.yaml new file mode 100644 index 0000000000..94543c1a11 --- /dev/null +++ b/.github/workflows/publish.yaml @@ -0,0 +1,39 @@ +# To create a release, create a tag and push it to GitHub: +#git tag -a "v0.0.1-beta" -m "beta version testing" +#git push --tags +# https://dev.to/iamtekson/publish-package-to-pypi-and-release-new-version-using-github-actions-108k +name: Publish LitGPT to PyPI + +on: + push: + tags: + - "v*" +jobs: + build-n-publish: + name: Build and publish to PyPI + runs-on: ubuntu-latest + environment: + name: pypi + url: https://pypi.org/p/litgpt + permissions: + id-token: write + + steps: + - name: Checkout source + uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: "3.x" + + - name: Build source and wheel distributions + run: | + python -m pip install --upgrade build twine + python -m build + twine check --strict dist/* + - name: Publish distribution to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + user: __token__ + password: ${{ secrets.PYPI_API_TOKEN }} diff --git a/litgpt/chat/base.py b/litgpt/chat/base.py index 9fa27d2ba3..6fd614a1fa 100644 --- a/litgpt/chat/base.py +++ b/litgpt/chat/base.py @@ -219,9 +219,3 @@ def main( file=sys.stderr, ) fabric.print() - - -if __name__ == "__main__": - torch.set_float32_matmul_precision("high") - - CLI(main) diff --git a/litgpt/deploy/serve.py b/litgpt/deploy/serve.py index 42ff3bdd50..beda182f28 100644 --- a/litgpt/deploy/serve.py +++ b/litgpt/deploy/serve.py @@ -162,7 +162,3 @@ def run_server( devices=devices) server.run(port=port) - - -if __name__ == "__main__": - CLI(run_server) diff --git a/litgpt/eval/evaluate.py b/litgpt/eval/evaluate.py index 29791630dd..bc6e01619f 100644 --- a/litgpt/eval/evaluate.py +++ b/litgpt/eval/evaluate.py @@ -112,7 +112,3 @@ def convert_and_evaluate( torch_random_seed=seed, ) prepare_results(results, save_filepath) - - -if __name__ == "__main__": - CLI(convert_and_evaluate) diff --git a/litgpt/finetune/adapter.py b/litgpt/finetune/adapter.py index 313d0ea8e7..aa02977fb9 100644 --- a/litgpt/finetune/adapter.py +++ b/litgpt/finetune/adapter.py @@ -22,7 +22,6 @@ from litgpt.prompts import save_prompt_style from litgpt.tokenizer import Tokenizer from litgpt.utils import ( - CLI, CycleIterator, check_valid_checkpoint_dir, choose_logger, @@ -396,9 +395,3 @@ def validate_args(train: TrainArgs, eval: EvalArgs) -> None: issues.append(f"{__file__} requires either epochs or max_steps to be set. This is set in {train}") if issues: raise ValueError("\n".join(issues)) - - -if __name__ == "__main__": - torch.set_float32_matmul_precision("high") - - CLI(setup) diff --git a/litgpt/finetune/adapter_v2.py b/litgpt/finetune/adapter_v2.py index 39b2a2d0e2..197bcd0bba 100644 --- a/litgpt/finetune/adapter_v2.py +++ b/litgpt/finetune/adapter_v2.py @@ -22,7 +22,6 @@ from litgpt.prompts import save_prompt_style from litgpt.tokenizer import Tokenizer from litgpt.utils import ( - CLI, CycleIterator, check_valid_checkpoint_dir, choose_logger, @@ -396,9 +395,3 @@ def validate_args(train: TrainArgs, eval: EvalArgs) -> None: issues.append(f"{__file__} requires either epochs or max_steps to be set. This is set in {train}") if issues: raise ValueError("\n".join(issues)) - - -if __name__ == "__main__": - torch.set_float32_matmul_precision("high") - - CLI(setup) diff --git a/litgpt/finetune/full.py b/litgpt/finetune/full.py index 967b42bec8..49890f4292 100644 --- a/litgpt/finetune/full.py +++ b/litgpt/finetune/full.py @@ -22,7 +22,6 @@ from litgpt.prompts import save_prompt_style from litgpt.tokenizer import Tokenizer from litgpt.utils import ( - CLI, CycleIterator, check_valid_checkpoint_dir, choose_logger, @@ -420,9 +419,3 @@ def validate_longlora_args(config: Config, longlora: LongLoraArgs): f"LongLora context length ({longlora.context_length}) must be a multiple of the number of groups " f"({longlora.n_groups})." ) - - -if __name__ == "__main__": - torch.set_float32_matmul_precision("high") - - CLI(setup) diff --git a/litgpt/finetune/lora.py b/litgpt/finetune/lora.py index 463683803f..69b0a6935f 100644 --- a/litgpt/finetune/lora.py +++ b/litgpt/finetune/lora.py @@ -32,7 +32,6 @@ from litgpt.scripts.merge_lora import merge_lora from litgpt.tokenizer import Tokenizer from litgpt.utils import ( - CLI, CycleIterator, check_valid_checkpoint_dir, choose_logger, @@ -502,10 +501,4 @@ def validate_longlora_args(config: Config, longlora: LongLoraArgs): raise ValueError( f"LongLora context length ({longlora.context_length}) must be a multiple of the number of groups " f"({longlora.n_groups})." - ) - - -if __name__ == "__main__": - torch.set_float32_matmul_precision("high") - - CLI(setup) + ) \ No newline at end of file diff --git a/litgpt/generate/adapter.py b/litgpt/generate/adapter.py index 10e882dbbf..9ac601dbf6 100644 --- a/litgpt/generate/adapter.py +++ b/litgpt/generate/adapter.py @@ -123,9 +123,3 @@ def main( fabric.print(f"\n\nTime for inference: {t:.02f} sec total, {tokens_generated / t:.02f} tokens/sec", file=sys.stderr) if fabric.device.type == "cuda": fabric.print(f"Memory used: {torch.cuda.max_memory_allocated() / 1e9:.02f} GB", file=sys.stderr) - - -if __name__ == "__main__": - torch.set_float32_matmul_precision("high") - - CLI(main) diff --git a/litgpt/generate/adapter_v2.py b/litgpt/generate/adapter_v2.py index 4f6406080c..d1c343fb94 100644 --- a/litgpt/generate/adapter_v2.py +++ b/litgpt/generate/adapter_v2.py @@ -13,7 +13,7 @@ from litgpt.adapter_v2 import GPT, Config from litgpt.generate.base import generate from litgpt.prompts import has_prompt_style, load_prompt_style -from litgpt.utils import CLI, check_valid_checkpoint_dir, get_default_supported_precision, lazy_load +from litgpt.utils import check_valid_checkpoint_dir, get_default_supported_precision, lazy_load def main( @@ -123,9 +123,3 @@ def main( fabric.print(f"\n\nTime for inference: {t:.02f} sec total, {tokens_generated / t:.02f} tokens/sec", file=sys.stderr) if fabric.device.type == "cuda": fabric.print(f"Memory used: {torch.cuda.max_memory_allocated() / 1e9:.02f} GB", file=sys.stderr) - - -if __name__ == "__main__": - torch.set_float32_matmul_precision("high") - - CLI(main) diff --git a/litgpt/generate/base.py b/litgpt/generate/base.py index a0bf559a59..7415ba4ab0 100644 --- a/litgpt/generate/base.py +++ b/litgpt/generate/base.py @@ -264,9 +264,3 @@ def main( ) if fabric.device.type == "cuda": fabric.print(f"Memory used: {torch.cuda.max_memory_allocated() / 1e9:.02f} GB", file=sys.stderr) - - -if __name__ == "__main__": - torch.set_float32_matmul_precision("high") - - CLI(main) diff --git a/litgpt/generate/full.py b/litgpt/generate/full.py index 694f7e86e0..2f95e04f7d 100644 --- a/litgpt/generate/full.py +++ b/litgpt/generate/full.py @@ -13,7 +13,7 @@ from litgpt import GPT, Config, PromptStyle, Tokenizer from litgpt.generate.base import generate from litgpt.prompts import has_prompt_style, load_prompt_style -from litgpt.utils import CLI, check_valid_checkpoint_dir, get_default_supported_precision, load_checkpoint +from litgpt.utils import check_valid_checkpoint_dir, get_default_supported_precision, load_checkpoint def main( @@ -141,9 +141,3 @@ def main( fabric.print(f"\n\nTime for inference: {t:.02f} sec total, {tokens_generated / t:.02f} tokens/sec", file=sys.stderr) if fabric.device.type == "cuda": fabric.print(f"Memory used: {torch.cuda.max_memory_allocated() / 1e9:.02f} GB", file=sys.stderr) - - -if __name__ == "__main__": - torch.set_float32_matmul_precision("high") - - CLI(main) diff --git a/litgpt/generate/sequentially.py b/litgpt/generate/sequentially.py index f63b320fe2..ec65395336 100644 --- a/litgpt/generate/sequentially.py +++ b/litgpt/generate/sequentially.py @@ -21,7 +21,7 @@ import litgpt.generate.base as generate_base from litgpt import GPT, Config, Tokenizer from litgpt.model import Block, build_mask_cache -from litgpt.utils import CLI, check_valid_checkpoint_dir, get_default_supported_precision +from litgpt.utils import check_valid_checkpoint_dir, get_default_supported_precision @torch.inference_mode() @@ -254,9 +254,3 @@ def main( f"Time for inference {i + 1}: {t:.02f} sec total, {tokens_generated / t:.02f} tokens/sec", file=sys.stderr ) print(f"Memory used: {torch.cuda.max_memory_allocated() / 1e9:.02f} GB", file=sys.stderr) - - -if __name__ == "__main__": - torch.set_float32_matmul_precision("high") - - CLI(main) diff --git a/litgpt/generate/tp.py b/litgpt/generate/tp.py index 3d0432510c..0d6d166f48 100644 --- a/litgpt/generate/tp.py +++ b/litgpt/generate/tp.py @@ -248,9 +248,3 @@ def main( ) if fabric.device.type == "cuda": fabric.print(f"Memory used: {torch.cuda.max_memory_allocated() / 1e9:.02f} GB", file=sys.stderr) - - -if __name__ == "__main__": - torch.set_float32_matmul_precision("high") - - CLI(main) diff --git a/litgpt/pretrain.py b/litgpt/pretrain.py index d5014dc022..56f4ac9b3c 100644 --- a/litgpt/pretrain.py +++ b/litgpt/pretrain.py @@ -443,8 +443,3 @@ def validate_args(train: TrainArgs, eval: EvalArgs, initial_checkpoint_dir, resu if issues: raise ValueError("\n".join(issues)) - -if __name__ == "__main__": - torch.set_float32_matmul_precision("high") - - CLI(setup) diff --git a/litgpt/scripts/convert_hf_checkpoint.py b/litgpt/scripts/convert_hf_checkpoint.py index 9fdb337ee4..653d55ade5 100644 --- a/litgpt/scripts/convert_hf_checkpoint.py +++ b/litgpt/scripts/convert_hf_checkpoint.py @@ -349,9 +349,3 @@ def convert_hf_checkpoint( gc.collect() print(f"Saving converted checkpoint to {checkpoint_dir}") saver.save(sd) - - -if __name__ == "__main__": - from jsonargparse import CLI - - CLI(convert_hf_checkpoint) diff --git a/litgpt/scripts/convert_lit_checkpoint.py b/litgpt/scripts/convert_lit_checkpoint.py index d18100249d..e8cc6931bf 100644 --- a/litgpt/scripts/convert_lit_checkpoint.py +++ b/litgpt/scripts/convert_lit_checkpoint.py @@ -10,7 +10,7 @@ from litgpt import Config from litgpt.scripts.convert_hf_checkpoint import layer_template, load_param -from litgpt.utils import CLI, incremental_save, lazy_load +from litgpt.utils import incremental_save, lazy_load def copy_weights_falcon( @@ -265,7 +265,3 @@ def convert_lit_checkpoint(checkpoint_dir: Path, output_dir: Path) -> None: copy_fn(sd, lit_weights, saver=saver) gc.collect() saver.save(sd) - - -if __name__ == "__main__": - CLI(convert_lit_checkpoint) diff --git a/litgpt/scripts/convert_pretrained_checkpoint.py b/litgpt/scripts/convert_pretrained_checkpoint.py index 3bbb4f5291..7e7541ef99 100644 --- a/litgpt/scripts/convert_pretrained_checkpoint.py +++ b/litgpt/scripts/convert_pretrained_checkpoint.py @@ -46,7 +46,3 @@ def convert_pretrained_checkpoint(checkpoint_dir: Path, output_dir: Path) -> Non saver.save(converted_state_dict) copy_config_files(checkpoint_dir, output_dir) - - -if __name__ == "__main__": - CLI(convert_pretrained_checkpoint) diff --git a/litgpt/scripts/download.py b/litgpt/scripts/download.py index f360c0b3b0..72cd0f7845 100644 --- a/litgpt/scripts/download.py +++ b/litgpt/scripts/download.py @@ -144,7 +144,3 @@ def gated_repo_catcher(repo_id: str, access_token: Optional[str]): f" visit https://huggingface.co/{repo_id} for more information." ) from None raise e from None - - -if __name__ == "__main__": - CLI(download_from_hub) diff --git a/litgpt/scripts/merge_lora.py b/litgpt/scripts/merge_lora.py index 845acbc405..7491fb8b7a 100644 --- a/litgpt/scripts/merge_lora.py +++ b/litgpt/scripts/merge_lora.py @@ -86,7 +86,3 @@ def load_lora_metadata(checkpoint_dir: Path) -> Tuple[Dict[str, Any], Path, Opti pretrained_checkpoint_dir = Path(hparams["checkpoint_dir"]) precision = hparams.get("precision") return lora_params, pretrained_checkpoint_dir, precision - - -if __name__ == "__main__": - CLI(merge_lora) diff --git a/litgpt/utils.py b/litgpt/utils.py index 6eb7efbff4..18aea56d64 100644 --- a/litgpt/utils.py +++ b/litgpt/utils.py @@ -444,6 +444,7 @@ def save_hyperparameters(function: callable, checkpoint_dir: Path) -> None: ("finetune", "lora"), ("finetune", "adapter"), ("finetune", "adapter_v2"), + ("finetune",), ("pretrain",), ] for known_command in known_commands: diff --git a/pyproject.toml b/pyproject.toml index 40029ae13b..63cc9d6d86 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,7 +37,7 @@ all = [ "sentencepiece>=0.2.0", # llama-based models "tokenizers>=0.15.2", # pythia, falcon, redpajama "requests>=2.31.0", # litgpt.data - "litdata>=0.2.2", # litgpt.data + "litdata>=0.2.2,<0.2.6", # litgpt.data "litserve>=0.1.0", # litgpt.deploy "zstandard>=0.22.0", # litgpt.data.prepare_slimpajama.py "pandas>=1.9.0", # litgpt.data.prepare_starcoder.py diff --git a/tests/test_chat.py b/tests/test_chat.py index 13b432897d..ac6ef5760f 100644 --- a/tests/test_chat.py +++ b/tests/test_chat.py @@ -122,13 +122,8 @@ def test_main(mocked_input, stop_iteration, fake_checkpoint_dir, monkeypatch, te assert re.match("Now chatting with Llama 3.*>> .*Reply: foo bar baz", out.getvalue(), re.DOTALL) -@pytest.mark.parametrize("mode", ["file", "entrypoint"]) -def test_cli(mode): - if mode == "file": - cli_path = Path(__file__).parent.parent / "litgpt/chat/base.py" - args = [sys.executable, cli_path, "-h"] - else: - args = ["litgpt", "chat", "-h"] +def test_cli(): + args = ["litgpt", "chat", "-h"] output = subprocess.check_output(args) output = str(output.decode()) assert "Starts a conversation" in output diff --git a/tests/test_evaluate.py b/tests/test_evaluate.py index 12f8a68f9c..3b6c56ae1b 100644 --- a/tests/test_evaluate.py +++ b/tests/test_evaluate.py @@ -1,14 +1,11 @@ # Copyright Lightning AI. Licensed under the Apache License 2.0, see LICENSE file. import subprocess -import sys from contextlib import redirect_stdout from dataclasses import asdict from io import StringIO -from pathlib import Path from unittest import mock -import pytest import torch import yaml @@ -43,13 +40,8 @@ def test_evaluate_script(tmp_path): assert "Loading checkpoint shards" not in stdout -@pytest.mark.parametrize("mode", ["file", "entrypoint"]) -def test_cli(mode): - if mode == "file": - cli_path = Path(__file__).parent.parent / "litgpt/eval/evaluate.py" - args = [sys.executable, cli_path, "-h"] - else: - args = ["litgpt", "evaluate", "-h"] +def test_cli(): + args = ["litgpt", "evaluate", "-h"] output = subprocess.check_output(args) output = str(output.decode()) assert "run the LM Evaluation Harness" in output diff --git a/tests/test_generate.py b/tests/test_generate.py index 7cd0dca9db..430cd6ada0 100644 --- a/tests/test_generate.py +++ b/tests/test_generate.py @@ -83,13 +83,8 @@ def test_main(fake_checkpoint_dir, monkeypatch, tensor_like): assert "'padded_vocab_size': 512, 'n_layer': 2, 'n_head': 4" in err.getvalue() -@pytest.mark.parametrize("mode", ["file", "entrypoint"]) -def test_cli(mode): - if mode == "file": - cli_path = Path(__file__).parent.parent / "litgpt/generate/base.py" - args = [sys.executable, cli_path, "-h"] - else: - args = ["litgpt", "generate", "base", "-h"] +def test_cli(): + args = ["litgpt", "generate", "base", "-h"] output = subprocess.check_output(args) output = str(output.decode()) assert "Generates text samples" in output diff --git a/tests/test_generate_adapter.py b/tests/test_generate_adapter.py index e977b0a93b..0df5fb67d2 100644 --- a/tests/test_generate_adapter.py +++ b/tests/test_generate_adapter.py @@ -48,13 +48,8 @@ def test_main(fake_checkpoint_dir, monkeypatch, version, tensor_like): @pytest.mark.parametrize("version", ("", "_v2")) -@pytest.mark.parametrize("mode", ["file", "entrypoint"]) -def test_cli(version, mode): - if mode == "file": - cli_path = Path(__file__).parent.parent / f"litgpt/generate/adapter{version}.py" - args = [sys.executable, cli_path, "-h"] - else: - args = ["litgpt", "generate", f"adapter{version}", "-h"] +def test_cli(version): + args = ["litgpt", "generate", f"adapter{version}", "-h"] output = subprocess.check_output(args) output = str(output.decode()) assert "Generates a response" in output diff --git a/tests/test_generate_sequentially.py b/tests/test_generate_sequentially.py index b0bed4797e..98b209f325 100644 --- a/tests/test_generate_sequentially.py +++ b/tests/test_generate_sequentially.py @@ -285,22 +285,17 @@ def test_base_with_sequentially(tmp_path): f"--checkpoint_dir={str(checkpoint_dir)}", ] env = {"CUDA_VISIBLE_DEVICES": "0,1"} - base_stdout = subprocess.check_output([sys.executable, root / "litgpt/generate/base.py", *args], env=env).decode() + base_stdout = subprocess.check_output([sys.executable, "-m", "litgpt", "generate", "base", *args], env=env, cwd=root).decode() sequential_stdout = subprocess.check_output( - [sys.executable, root / "litgpt/generate/sequentially.py", *args], env=env + [sys.executable, "-m", "litgpt", "generate", "sequentially", *args], env=env, cwd=root, ).decode() assert base_stdout.startswith("What food do llamas eat?") assert base_stdout == sequential_stdout -@pytest.mark.parametrize("mode", ["file", "entrypoint"]) -def test_cli(mode): - if mode == "file": - cli_path = Path(__file__).parent.parent / "litgpt/generate/sequentially.py" - args = [sys.executable, cli_path, "-h"] - else: - args = ["litgpt", "generate", "sequentially", "-h"] +def test_cli(): + args = ["litgpt", "generate", "sequentially", "-h"] output = subprocess.check_output(args) output = str(output.decode()) assert "Generates text samples" in output diff --git a/tests/test_generate_tp.py b/tests/test_generate_tp.py index 039dd0ea4b..817f7a3a71 100644 --- a/tests/test_generate_tp.py +++ b/tests/test_generate_tp.py @@ -124,19 +124,14 @@ def test_tp(tmp_path): f"--checkpoint_dir={str(checkpoint_dir)}", ] env = {"CUDA_VISIBLE_DEVICES": "0,1"} - tp_stdout = subprocess.check_output([sys.executable, root / "litgpt/generate/tp.py", *args], env=env).decode() + tp_stdout = subprocess.check_output([sys.executable, "-m", "litgpt", "generate", "tp", *args], env=env, cwd=root).decode() # there is some unaccounted randomness so cannot compare the output with that of `generate/base.py` assert tp_stdout.startswith("What food do llamas eat?") -@pytest.mark.parametrize("mode", ["file", "entrypoint"]) -def test_cli(mode): - if mode == "file": - cli_path = Path(__file__).parent.parent / "litgpt/generate/tp.py" - args = [sys.executable, cli_path, "-h"] - else: - args = ["litgpt", "generate", "tp", "-h"] +def test_cli(): + args = ["litgpt", "generate", "tp", "-h"] output = subprocess.check_output(args) output = str(output.decode()) assert "Generates text samples" in output diff --git a/tests/test_lora.py b/tests/test_lora.py index 2966bdb5bc..3ef29e181d 100644 --- a/tests/test_lora.py +++ b/tests/test_lora.py @@ -822,3 +822,13 @@ def test_lora_model_fsdp_init(): model = fabric.setup(model) y = model(x) assert y.shape == torch.Size([2, 8, 512]) + + # verify that all the parameters, buffers and other attributes aren't on `meta` device + for m in model.modules(): + for p_name, parameter in m.named_parameters(): + assert not parameter.is_meta, f"Parameter `{p_name}` isn't materialized." + for b_name, buffer in m._buffers.items(): + assert not buffer.is_meta, f"Buffer `{b_name}` isn't materialized." + for attr_name, attr_value in m.__dict__.items(): + if isinstance(attr_value, torch.Tensor): + assert not attr_value.is_meta, f"Attribute `{attr_name}` isn't materialized." diff --git a/tests/test_utils.py b/tests/test_utils.py index 9770bf98e7..554929c77f 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -267,6 +267,7 @@ def _test_function2(out_dir: Path, foo: bool = False, bar: int = 1): "command", [ "any.py", + "litgpt finetune", "litgpt finetune full", "litgpt finetune lora", "litgpt finetune adapter",