From 412e0fe6e523ace4cfc681ac3d12ab19914c4d9c Mon Sep 17 00:00:00 2001 From: Sebastian Raschka Date: Tue, 16 Apr 2024 06:13:13 -0400 Subject: [PATCH 01/11] Typo fix in convert_lit_checkpoint.py (#1298) --- litgpt/scripts/convert_lit_checkpoint.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/litgpt/scripts/convert_lit_checkpoint.py b/litgpt/scripts/convert_lit_checkpoint.py index 76d5741913..d18100249d 100644 --- a/litgpt/scripts/convert_lit_checkpoint.py +++ b/litgpt/scripts/convert_lit_checkpoint.py @@ -235,7 +235,7 @@ def check_conversion_supported(lit_weights: Dict[str, torch.Tensor]) -> None: if any("lora" in wn for wn in lit_weights): raise ValueError("Checkpoints with LoRA weights cannot be converted. Call `scripts/merge_lora.py` first.") if any("adapter" in wn or "gating_factor" in wn for wn in lit_weights): - raise NotImplementedError("Converting adapter models is supported.") + raise NotImplementedError("Converting adapter models is not supported.") @torch.inference_mode() From 03cb16672753385ed2e57441eb2bd59689383eb4 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 16 Apr 2024 06:36:42 -0400 Subject: [PATCH 02/11] Update README.md --- README.md | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index eaa5fbf7f5..e52213db80 100644 --- a/README.md +++ b/README.md @@ -5,11 +5,11 @@ # ⚡ LitGPT -**Pretrain, finetune, deploy 20+ LLMs on your own data** +**Pretrain, finetune, evaluate, and deploy 20+ LLMs on your own data** Uses the latest state-of-the-art techniques: -✅ fp4/8/16/32     ✅ LoRA, QLoRA, Adapter (v1, v2)     ✅ flash attention     ✅ FSDP     ✅ 1-1000+ GPUs/TPUs +✅ flash attention     ✅ fp4/8/16/32     ✅ LoRA, QLoRA, Adapter (v1, v2)     ✅ FSDP     ✅ 1-1000+ GPUs/TPUs --- @@ -32,14 +32,15 @@ Uses the latest state-of-the-art techniques:   -## Finetune, pretrain and deploy AI models Lightning fast ⚡⚡ -LitGPT is a command-line tool to use, pretrain, finetune and deploy LLMs. It is based on configs with highly-optimized recipes for training the world's largest, most powerful open-source LLMs. +## Finetune, pretrain and deploy LLMs Lightning fast ⚡⚡ +LitGPT is a command-line tool designed to easily [finetune](#finetune-an-llm), [pretrain](#pretrain-an-llm), [evaluate](#use-an-llm), and deploy [20+ LLMs](#choose-from-20-llms) **on your own data**. It features highly-optimized [training recipes](#training-recipes) for the world's most powerful open-source large-language-models (LLMs). -We've reimplemented all the model architectures and training recipes for 3 reasons: +We reimplemented all model architectures and training recipes from scratch for 4 reasons: -1. Remove all abstraction layers and have single file implementations. -2. Guarantee Apache 2.0 compliance to enable enterprise use without limits. -3. Optimized every detail of every model to get the fastest performance possible to lower cost and training speeds. +1. Remove all abstraction layers and have single file implementations. +2. Guarantee Apache 2.0 compliance to enable enterprise use without limits. +3. Optimized every detail of every model to get the fastest performance possible to lower cost and training speeds. +4. Config based for highly-optimized recipes.   From 1f6d74090e89a0e23886d6177681d5fdb9c49322 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 16 Apr 2024 06:41:35 -0400 Subject: [PATCH 03/11] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index e52213db80..01134ad458 100644 --- a/README.md +++ b/README.md @@ -39,8 +39,8 @@ We reimplemented all model architectures and training recipes from scratch for 4 1. Remove all abstraction layers and have single file implementations. 2. Guarantee Apache 2.0 compliance to enable enterprise use without limits. -3. Optimized every detail of every model to get the fastest performance possible to lower cost and training speeds. -4. Config based for highly-optimized recipes. +3. Optimized each model architectural detail to maximize performance, reduce costs, and speed up training. +4. Highly-optimized recipe configs we have tested at enterprise scale.   From 8685b55ac62e08874f919034c0c8ea2bb0817c4c Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 16 Apr 2024 06:42:23 -0400 Subject: [PATCH 04/11] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 01134ad458..5d8f49101e 100644 --- a/README.md +++ b/README.md @@ -40,7 +40,7 @@ We reimplemented all model architectures and training recipes from scratch for 4 1. Remove all abstraction layers and have single file implementations. 2. Guarantee Apache 2.0 compliance to enable enterprise use without limits. 3. Optimized each model architectural detail to maximize performance, reduce costs, and speed up training. -4. Highly-optimized recipe configs we have tested at enterprise scale. +4. Highly-optimized [recipe configs](https://github.com/Lightning-AI/litgpt/tree/main/config_hub) we have tested at enterprise scale.   From e1f554079f32ac7c27a8f91d3335436e0fd37bf9 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 16 Apr 2024 06:43:37 -0400 Subject: [PATCH 05/11] Update README.md --- README.md | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 5d8f49101e..384a181c38 100644 --- a/README.md +++ b/README.md @@ -40,7 +40,7 @@ We reimplemented all model architectures and training recipes from scratch for 4 1. Remove all abstraction layers and have single file implementations. 2. Guarantee Apache 2.0 compliance to enable enterprise use without limits. 3. Optimized each model architectural detail to maximize performance, reduce costs, and speed up training. -4. Highly-optimized [recipe configs](https://github.com/Lightning-AI/litgpt/tree/main/config_hub) we have tested at enterprise scale. +4. Highly-optimized [recipe configs](#training-recipes) we have tested at enterprise scale.   @@ -231,9 +231,9 @@ Use, Finetune, pretrain, deploy over 20+ LLMs ([full list](tutorials/download_mo # Training recipes -LitGPT comes with validated recipes (YAML configs) to train models under different conditions. +LitGPT comes with validated recipes (YAML configs) to train models under different conditions. We've generated these recipes based on the parameters we found to perform the best for different training conditions. -We've generated these recipes based on the parameters we found to perform the best for different training conditions. +Browse all training recipes [here](config_hub). ### Example @@ -242,8 +242,6 @@ litgpt finetune lora \ --config https://raw.githubusercontent.com/Lightning-AI/litgpt/main/config_hub/finetune/llama-2-7b/lora.yaml ``` -Browse all training recipes [here](config_hub). - ### What is a config Configs let you customize training for all granular parameters like: From b22a1f49d912a1f1cf0bb76615fbe89065573a55 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 16 Apr 2024 06:55:26 -0400 Subject: [PATCH 06/11] Update README.md --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 384a181c38..d39364576d 100644 --- a/README.md +++ b/README.md @@ -32,7 +32,7 @@ Uses the latest state-of-the-art techniques:   -## Finetune, pretrain and deploy LLMs Lightning fast ⚡⚡ +# Finetune, pretrain and deploy LLMs Lightning fast ⚡⚡ LitGPT is a command-line tool designed to easily [finetune](#finetune-an-llm), [pretrain](#pretrain-an-llm), [evaluate](#use-an-llm), and deploy [20+ LLMs](#choose-from-20-llms) **on your own data**. It features highly-optimized [training recipes](#training-recipes) for the world's most powerful open-source large-language-models (LLMs). We reimplemented all model architectures and training recipes from scratch for 4 reasons: @@ -400,6 +400,8 @@ litgpt finetune lora \   +# Community + ## Get involved! We appreciate your feedback and contributions. If you have feature requests, questions, or want to contribute code or config files, please don't hesitate to use the [GitHub Issue](https://github.com/Lightning-AI/litgpt/issues) tracker. From 68d95a8bc032dda7d0911f6c1e5f1b862661fcc6 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 16 Apr 2024 07:12:59 -0400 Subject: [PATCH 07/11] Update README.md --- README.md | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index d39364576d..1b7f307e44 100644 --- a/README.md +++ b/README.md @@ -71,7 +71,16 @@ pip install -e '.[all]' --- # Get started -LitGPT is CLI and config-based. Select the model and the action you want to take on that model (finetune, pretrain, evaluate, deploy, etc...): +After installing LitGPT, select the model and action you want to take on that model (finetune, pretrain, evaluate, deploy, etc...): + +```bash +# ligpt [action] [model] +litgpt download mistralai/Mistral-7B-Instruct-v0.2 +litgpt chat mistralai/Mistral-7B-Instruct-v0.2 +litgpt finetune mistralai/Mistral-7B-Instruct-v0.2 +litgpt pretrain mistralai/Mistral-7B-Instruct-v0.2 +litgpt serve mistralai/Mistral-7B-Instruct-v0.2 +```   From 2bda868b94da93ef8c88d82247252d7e5e37b194 Mon Sep 17 00:00:00 2001 From: Ikko Eltociear Ashimine Date: Tue, 16 Apr 2024 22:19:39 +0900 Subject: [PATCH 08/11] Update download_model_weights.md (#1300) --- tutorials/download_model_weights.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tutorials/download_model_weights.md b/tutorials/download_model_weights.md index b91afa5929..d1c320ac33 100644 --- a/tutorials/download_model_weights.md +++ b/tutorials/download_model_weights.md @@ -243,7 +243,7 @@ litgpt download \   ## Finetunes and other model variants -Sometimes you want to download the weights of a finetune of one of the models listed above. To do this, you need to manually specifiy the `model_name` associated to the config to use. For example: +Sometimes you want to download the weights of a finetune of one of the models listed above. To do this, you need to manually specify the `model_name` associated to the config to use. For example: ```bash litgpt download \ From 55556380e4084d9ac3dc94b4753392c3af9ed1fc Mon Sep 17 00:00:00 2001 From: Sebastian Raschka Date: Tue, 16 Apr 2024 09:29:42 -0400 Subject: [PATCH 09/11] Improved link checker (#1285) --- .github/workflows/check-links.yml | 37 +++++++++++++------------------ 1 file changed, 15 insertions(+), 22 deletions(-) diff --git a/.github/workflows/check-links.yml b/.github/workflows/check-links.yml index 0edf0589cc..3a4e1322ad 100644 --- a/.github/workflows/check-links.yml +++ b/.github/workflows/check-links.yml @@ -1,4 +1,4 @@ -name: Check Markdown Links +name: Check hyperlinks on: push: @@ -9,30 +9,23 @@ on: - main jobs: - check-links: + test: runs-on: ubuntu-latest steps: - - name: Checkout Repository - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - - name: Install Markdown Link Checker - run: npm install -g markdown-link-check + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.10' - - name: Create config for markdown link checker - run: | - echo '{ - "projectBaseUrl":"${{ github.workspace }}", - "ignorePatterns": [ - { - "pattern": "^#" - }, - { - "pattern": "^https://falconllm.tii.ae" - } - ] - }' > $GITHUB_WORKSPACE/md_checker_config.json + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install pytest pytest-check-links - - name: Find Markdown Files and Check Links - run: | - find . -name \*.md -print0 | xargs -0 -n1 markdown-link-check -c $GITHUB_WORKSPACE/md_checker_config.json + - name: Check links + run: | + pytest --check-links README.md --check-links-ignore "https://stability.ai/blog/stablecode-llm-generative-ai-coding" + pytest --check-links tutorials --check-links-ignore "https://stability.ai/blog/stablecode-llm-generative-ai-coding" \ No newline at end of file From 3f2f70a0aecbd31da16c868958aa5eadb9b7e136 Mon Sep 17 00:00:00 2001 From: Sebastian Raschka Date: Tue, 16 Apr 2024 10:07:26 -0400 Subject: [PATCH 10/11] Lower-bound all dependencies and package versions (#1305) --- .github/azure-gpu-test.yml | 1 + .github/workflows/cpu-tests.yml | 1 + pyproject.toml | 36 ++++++++++++++++----------------- 3 files changed, 20 insertions(+), 18 deletions(-) diff --git a/.github/azure-gpu-test.yml b/.github/azure-gpu-test.yml index 8a9706717a..b9b5b32c35 100644 --- a/.github/azure-gpu-test.yml +++ b/.github/azure-gpu-test.yml @@ -41,6 +41,7 @@ jobs: displayName: "Image info & NVIDIA" - script: | + pip install --upgrade pip pip install '.[all,test]' displayName: 'Install dependencies' diff --git a/.github/workflows/cpu-tests.yml b/.github/workflows/cpu-tests.yml index cbb8d2805c..707e045099 100644 --- a/.github/workflows/cpu-tests.yml +++ b/.github/workflows/cpu-tests.yml @@ -45,6 +45,7 @@ jobs: - name: Install minimal dependencies run: | + pip install --upgrade pip pip install . pip list # make sure all modules are still importable with only the minimal dependencies available diff --git a/pyproject.toml b/pyproject.toml index 44f649baa6..1d3c89cfd9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,36 +23,36 @@ litgpt = "litgpt.__main__:main" [project.optional-dependencies] test = [ - "pytest", - "pytest-rerunfailures", - "pytest-timeout", + "pytest>=8.1.1", + "pytest-rerunfailures>=14.0", + "pytest-timeout>=2.3.1", "transformers>=4.38.0", # numerical comparisons - "einops", - "protobuf", + "einops>=0.7.0", + "protobuf>=4.23.4", "lightning-thunder==0.2.0.dev20240404; python_version >= '3.10'", ] all = [ "bitsandbytes==0.42.0", # quantization - "sentencepiece", # llama-based models - "tokenizers", # pythia, falcon, redpajama - "requests", # litgpt.data - "litdata", # litgpt.data - "zstandard", # litgpt.data.prepare_slimpajama.py - "pandas", # litgpt.data.prepare_starcoder.py - "pyarrow", # litgpt.data.prepare_starcoder.py - "tensorboard", # litgpt.pretrain - "torchmetrics", # litgpt.pretrain - "datasets", # litgpt.evaluate + "sentencepiece>=0.2.0", # llama-based models + "tokenizers>=0.15.2", # pythia, falcon, redpajama + "requests>=2.31.0", # litgpt.data + "litdata>=0.2.2", # litgpt.data + "zstandard>=0.22.0", # litgpt.data.prepare_slimpajama.py + "pandas>=1.9.0", # litgpt.data.prepare_starcoder.py + "pyarrow>=15.0.2", # litgpt.data.prepare_starcoder.py + "tensorboard>=2.14.0", # litgpt.pretrain + "torchmetrics>=1.3.1", # litgpt.pretrain + "datasets>=2.18.0", # litgpt.evaluate "transformers>=4.38.0", # litgpt.evaluate "lm-eval>=0.4.2", # litgpt.evaluate - "safetensors", # download + "safetensors>=0.4.3", # download "huggingface_hub[hf_transfer]>=0.21.0" # download ] [build-system] requires = [ - "setuptools", - "wheel", + "setuptools>=68.2.2", + "wheel>=0.41.2", ] build-backend = "setuptools.build_meta" From 02b04bac10a3c03117c483ab7eaa0aedfe36fcbd Mon Sep 17 00:00:00 2001 From: Luca Antiga Date: Tue, 16 Apr 2024 10:11:32 -0400 Subject: [PATCH 11/11] Temporarily skip thunder test due to timeout (#1304) --- tests/test_thunder_fsdp.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_thunder_fsdp.py b/tests/test_thunder_fsdp.py index 8b9c0f4340..321cdac7a6 100644 --- a/tests/test_thunder_fsdp.py +++ b/tests/test_thunder_fsdp.py @@ -263,6 +263,8 @@ def set_up_planner(self, state_dict, metadata, is_coordinator): @RunIf(min_cuda_gpus=2, thunder=True, standalone=True) def test_save_load_sharded_checkpoint(tmp_path): + pytest.skip("Temporarily disabled, often exceeds 5 min timeout") + strategy = ThunderFSDPStrategy(state_dict_type="sharded", broadcast_from=0) fabric = Fabric(accelerator="cuda", devices=2, strategy=strategy) fabric.launch()