diff --git a/litgpt/scripts/__init__.py b/litgpt/scripts/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/scripts/convert_hf_checkpoint.py b/litgpt/scripts/convert_hf_checkpoint.py similarity index 98% rename from scripts/convert_hf_checkpoint.py rename to litgpt/scripts/convert_hf_checkpoint.py index 89a9e7291e..711df4916e 100644 --- a/scripts/convert_hf_checkpoint.py +++ b/litgpt/scripts/convert_hf_checkpoint.py @@ -2,9 +2,7 @@ import gc import json -import sys from collections import defaultdict -from dataclasses import asdict from functools import partial from pathlib import Path from typing import Dict, List, Optional, Tuple, Union @@ -12,10 +10,6 @@ import torch from lightning.fabric.utilities.load import _NotYetLoadedTensor as NotYetLoadedTensor -# support running without installing as a package -wd = Path(__file__).parent.parent.resolve() -sys.path.append(str(wd)) - from litgpt import Config from litgpt.utils import incremental_save, lazy_load, save_config diff --git a/scripts/convert_lit_checkpoint.py b/litgpt/scripts/convert_lit_checkpoint.py similarity index 98% rename from scripts/convert_lit_checkpoint.py rename to litgpt/scripts/convert_lit_checkpoint.py index 1ff58bf9c2..67bd61fe89 100644 --- a/scripts/convert_lit_checkpoint.py +++ b/litgpt/scripts/convert_lit_checkpoint.py @@ -1,7 +1,6 @@ # Copyright Lightning AI. Licensed under the Apache License 2.0, see LICENSE file. import gc -import sys from functools import partial from pathlib import Path from typing import Dict, Optional, Tuple, Union @@ -9,13 +8,9 @@ import torch from lightning.fabric.utilities.load import _NotYetLoadedTensor as NotYetLoadedTensor -# support running without installing as a package -wd = Path(__file__).parent.parent.resolve() -sys.path.append(str(wd)) - from litgpt import Config +from litgpt.scripts.convert_hf_checkpoint import layer_template, load_param from litgpt.utils import CLI, incremental_save, lazy_load -from scripts.convert_hf_checkpoint import layer_template, load_param def copy_weights_falcon( diff --git a/scripts/convert_pretrained_checkpoint.py b/litgpt/scripts/convert_pretrained_checkpoint.py similarity index 91% rename from scripts/convert_pretrained_checkpoint.py rename to litgpt/scripts/convert_pretrained_checkpoint.py index 5aa4f2cd51..bcb0149cbf 100644 --- a/scripts/convert_pretrained_checkpoint.py +++ b/litgpt/scripts/convert_pretrained_checkpoint.py @@ -1,17 +1,9 @@ # Copyright Lightning AI. Licensed under the Apache License 2.0, see LICENSE file. -import json -import shutil -import sys -from dataclasses import asdict from pathlib import Path import torch -# support running without installing as a package -wd = Path(__file__).parent.parent.resolve() -sys.path.append(str(wd)) - from litgpt.utils import CLI, incremental_save, copy_config_files diff --git a/scripts/download.py b/litgpt/scripts/download.py similarity index 95% rename from scripts/download.py rename to litgpt/scripts/download.py index c9711e448a..89614cefac 100644 --- a/scripts/download.py +++ b/litgpt/scripts/download.py @@ -1,7 +1,6 @@ # Copyright Lightning AI. Licensed under the Apache License 2.0, see LICENSE file. import os -import sys from contextlib import contextmanager from pathlib import Path from typing import Optional, List, Tuple @@ -9,12 +8,8 @@ import torch from lightning_utilities.core.imports import RequirementCache -# support running without installing as a package -wd = Path(__file__).parent.parent.resolve() -sys.path.append(str(wd)) - +from litgpt.scripts.convert_hf_checkpoint import convert_hf_checkpoint from litgpt.utils import CLI -from scripts.convert_hf_checkpoint import convert_hf_checkpoint _SAFETENSORS_AVAILABLE = RequirementCache("safetensors") _HF_TRANSFER_AVAILABLE = RequirementCache("hf_transfer") diff --git a/scripts/merge_lora.py b/litgpt/scripts/merge_lora.py similarity index 94% rename from scripts/merge_lora.py rename to litgpt/scripts/merge_lora.py index 46ed647485..45627cad93 100644 --- a/scripts/merge_lora.py +++ b/litgpt/scripts/merge_lora.py @@ -2,21 +2,15 @@ """This script merges the LoRA weights with the base model""" import os -import sys from pathlib import Path from typing import Optional, Tuple, Dict, Any -import yaml - import lightning as L import torch - -# support running without installing as a package -wd = Path(__file__).parent.parent.resolve() -sys.path.append(str(wd)) +import yaml from litgpt.lora import GPT, Config, lora_filter, merge_lora_weights -from litgpt.utils import CLI, check_valid_checkpoint_dir, get_default_supported_precision, lazy_load +from litgpt.utils import CLI, check_valid_checkpoint_dir, lazy_load def merge_lora( diff --git a/litgpt/utils.py b/litgpt/utils.py index 94ec82ab7f..618775981a 100644 --- a/litgpt/utils.py +++ b/litgpt/utils.py @@ -72,7 +72,7 @@ def check_valid_checkpoint_dir(checkpoint_dir: Path) -> None: error_message = ( f"--checkpoint_dir {str(checkpoint_dir.absolute())!r}{problem}." "\nFind download instructions at https://github.com/Lightning-AI/litgpt/blob/main/tutorials\n" - f"{extra}\nSee all download options by running:\n python scripts/download.py" + f"{extra}\nSee all download options by running:\n python litgpt/scripts/download.py" ) print(error_message, file=sys.stderr) raise SystemExit(1) diff --git a/notebooks/falcon-inference.ipynb b/notebooks/falcon-inference.ipynb index 96bd035d4e..30dc390a92 100644 --- a/notebooks/falcon-inference.ipynb +++ b/notebooks/falcon-inference.ipynb @@ -60,7 +60,7 @@ "outputs": [], "source": [ "# download the weights\n", - "!python scripts/download.py --repo_id tiiuae/falcon-7b" + "!python litgpt/scripts/download.py --repo_id tiiuae/falcon-7b" ] }, { diff --git a/tests/test_adapter_v2.py b/tests/test_adapter_v2.py index d7cd1aef74..8bcc0bc9b5 100644 --- a/tests/test_adapter_v2.py +++ b/tests/test_adapter_v2.py @@ -186,7 +186,7 @@ def test_against_hf_mixtral(): from transformers.models.mixtral import MixtralConfig, MixtralForCausalLM from litgpt.adapter_v2 import GPT, Config - from scripts.convert_hf_checkpoint import copy_weights_hf_llama + from litgpt.scripts.convert_hf_checkpoint import copy_weights_hf_llama device = torch.device("cpu") dtype = torch.float32 diff --git a/tests/test_convert_hf_checkpoint.py b/tests/test_convert_hf_checkpoint.py index 40ea1e5837..65141e6113 100644 --- a/tests/test_convert_hf_checkpoint.py +++ b/tests/test_convert_hf_checkpoint.py @@ -8,7 +8,7 @@ def test_llama2_70b_conversion(): from litgpt import Config - from scripts.convert_hf_checkpoint import copy_weights_hf_llama + from litgpt.scripts.convert_hf_checkpoint import copy_weights_hf_llama shapes = { "model.embed_tokens.weight": (32000, 8192), @@ -102,14 +102,14 @@ def test_llama2_70b_conversion(): def test_convert_hf_checkpoint(tmp_path): - from scripts.convert_hf_checkpoint import convert_hf_checkpoint + from litgpt.scripts.convert_hf_checkpoint import convert_hf_checkpoint with pytest.raises(ValueError, match="to contain .bin"): convert_hf_checkpoint(checkpoint_dir=tmp_path, model_name="pythia-14m") bin_file = tmp_path / "foo.bin" bin_file.touch() - with mock.patch("scripts.convert_hf_checkpoint.lazy_load") as load: + with mock.patch("litgpt.scripts.convert_hf_checkpoint.lazy_load") as load: convert_hf_checkpoint(checkpoint_dir=tmp_path, model_name="pythia-14m") load.assert_called_with(bin_file) diff --git a/tests/test_convert_lit_checkpoint.py b/tests/test_convert_lit_checkpoint.py index 851bf5928d..2787f78018 100644 --- a/tests/test_convert_lit_checkpoint.py +++ b/tests/test_convert_lit_checkpoint.py @@ -16,7 +16,7 @@ def test_convert_lit_checkpoint(tmp_path): from litgpt import GPT, Config - from scripts.convert_lit_checkpoint import convert_lit_checkpoint + from litgpt.scripts.convert_lit_checkpoint import convert_lit_checkpoint ours_config = Config.from_name("Llama-2-7b-hf", block_size=8, n_layer=2, n_embd=32, n_head=2, padding_multiple=128) ours_model = GPT(ours_config) @@ -43,7 +43,7 @@ def test_against_falcon_40b(): from transformers.models.falcon.modeling_falcon import FalconForCausalLM from litgpt import GPT, Config - from scripts.convert_lit_checkpoint import copy_weights_falcon as copy_to_theirs + from litgpt.scripts.convert_lit_checkpoint import copy_weights_falcon as copy_to_theirs ours_config = Config.from_name("falcon-40b", n_layer=2, n_head=8, n_query_groups=4, n_embd=32) theirs_config = FalconConfig( @@ -78,7 +78,7 @@ def test_against_original_gpt_neox(): from transformers import GPTNeoXConfig, GPTNeoXForCausalLM from litgpt import GPT, Config - from scripts.convert_lit_checkpoint import copy_weights_gpt_neox as copy_to_theirs + from litgpt.scripts.convert_lit_checkpoint import copy_weights_gpt_neox as copy_to_theirs ours_config = Config(block_size=64, vocab_size=100, n_layer=4, n_head=8, n_embd=16) assert ours_config.padded_vocab_size == 512 @@ -123,7 +123,7 @@ def test_against_hf_llama2(ours_kwargs): from transformers.models.llama.modeling_llama import LlamaForCausalLM from litgpt import GPT, Config - from scripts.convert_lit_checkpoint import copy_weights_llama + from litgpt.scripts.convert_lit_checkpoint import copy_weights_llama ours_config = Config.from_name( padded_vocab_size=10000, n_layer=2, n_head=8, n_embd=32, intermediate_size=86, **ours_kwargs @@ -161,7 +161,7 @@ def test_against_mixtral(): from transformers.models.mixtral import MixtralConfig, MixtralForCausalLM from litgpt import GPT, Config - from scripts.convert_lit_checkpoint import copy_weights_llama + from litgpt.scripts.convert_lit_checkpoint import copy_weights_llama ours_config = Config.from_name( "Mixtral-8x7B-Instruct-v0.1", @@ -208,7 +208,7 @@ def test_against_original_open_llama_3b(): from transformers.models.llama.modeling_llama import LlamaForCausalLM from litgpt import GPT, Config - from scripts.convert_lit_checkpoint import copy_weights_llama + from litgpt.scripts.convert_lit_checkpoint import copy_weights_llama ours_config = Config.from_name("open_llama_3b", n_layer=2, n_head=8, n_embd=32, intermediate_size=86) T = 5 @@ -250,7 +250,7 @@ def test_against_hf_phi_1_5(): urlretrieve(url=url, filename=file_path) from litgpt import GPT, Config - from scripts.convert_lit_checkpoint import copy_weights_phi + from litgpt.scripts.convert_lit_checkpoint import copy_weights_phi from reference_models.configuration_phi import PhiConfig from reference_models.original_phi_1_5 import PhiForCausalLM @@ -300,7 +300,7 @@ def test_against_hf_phi_2(): urlretrieve(url=url, filename=file_path) from litgpt import GPT, Config - from scripts.convert_lit_checkpoint import copy_weights_phi + from litgpt.scripts.convert_lit_checkpoint import copy_weights_phi from reference_models.configuration_phi import PhiConfig from reference_models.original_phi_2 import PhiForCausalLM @@ -341,7 +341,7 @@ def test_against_original_stablelm_zephyr_3b(): from transformers import AutoConfig, AutoModelForCausalLM from litgpt import GPT, Config - from scripts.convert_lit_checkpoint import copy_weights_llama + from litgpt.scripts.convert_lit_checkpoint import copy_weights_llama T = 5 ours_config = Config.from_name("stablelm-zephyr-3b", n_layer=2, n_head=16, n_embd=32, intermediate_size=86) @@ -395,7 +395,7 @@ def test_against_original_gemma(model_name, device, dtype): from transformers.models.gemma.modeling_gemma import GemmaForCausalLM from litgpt import GPT, Config - from scripts.convert_lit_checkpoint import copy_weights_llama + from litgpt.scripts.convert_lit_checkpoint import copy_weights_llama torch.set_default_dtype(dtype) @@ -436,7 +436,7 @@ def test_against_original_gemma(model_name, device, dtype): def test_check_conversion_supported_adapter(): - from scripts.convert_lit_checkpoint import check_conversion_supported + from litgpt.scripts.convert_lit_checkpoint import check_conversion_supported lit_weights = {"some.key.name": ANY, "error.key.gating_factor": ANY} with pytest.raises(NotImplementedError, match="Converting adapter"): @@ -448,7 +448,7 @@ def test_check_conversion_supported_adapter(): def test_check_conversion_supported_lora(): - from scripts.convert_lit_checkpoint import check_conversion_supported + from litgpt.scripts.convert_lit_checkpoint import check_conversion_supported lit_weights = {"some.key.name": ANY, "error.key.lora": ANY} with pytest.raises(ValueError, match=r"LoRA.*cannot be converted"): @@ -457,7 +457,7 @@ def test_check_conversion_supported_lora(): def test_qkv_split(): from litgpt import Config - from scripts.convert_lit_checkpoint import qkv_split + from litgpt.scripts.convert_lit_checkpoint import qkv_split # MHA config = Config(n_embd=4, n_head=4) diff --git a/tests/test_convert_pretrained_checkpoint.py b/tests/test_convert_pretrained_checkpoint.py index 195d79bc52..d95260f0d6 100644 --- a/tests/test_convert_pretrained_checkpoint.py +++ b/tests/test_convert_pretrained_checkpoint.py @@ -6,7 +6,7 @@ def test_convert_pretrained_checkpoint(tmp_path, fake_checkpoint_dir): - from scripts.convert_pretrained_checkpoint import convert_checkpoint + from litgpt.scripts.convert_pretrained_checkpoint import convert_checkpoint # Pretend we made a checkpoint from pretraining pretrained_checkpoint = { diff --git a/tests/test_generate_sequentially.py b/tests/test_generate_sequentially.py index da0a1507bf..32f41e01d6 100644 --- a/tests/test_generate_sequentially.py +++ b/tests/test_generate_sequentially.py @@ -275,7 +275,7 @@ def test_model_forward_hooks(): @RunIf(min_cuda_gpus=2) def test_base_with_sequentially(tmp_path): from litgpt import GPT, Config - from scripts.download import download_from_hub + from litgpt.scripts.download import download_from_hub # download the tokenizer download_from_hub(repo_id="EleutherAI/pythia-14m", tokenizer_only=True, checkpoint_dir=tmp_path) diff --git a/tests/test_generate_tp.py b/tests/test_generate_tp.py index 6451f1dd10..f1da3e2f57 100644 --- a/tests/test_generate_tp.py +++ b/tests/test_generate_tp.py @@ -109,7 +109,7 @@ def test_tensor_parallel_llama(name, expected): @RunIf(min_cuda_gpus=2) def test_tp(tmp_path): from litgpt import GPT, Config - from scripts.download import download_from_hub + from litgpt.scripts.download import download_from_hub # download the tokenizer download_from_hub(repo_id="EleutherAI/pythia-14m", tokenizer_only=True, checkpoint_dir=tmp_path) diff --git a/tests/test_lm_eval_harness.py b/tests/test_lm_eval_harness.py index ca738253ec..9acd661e55 100644 --- a/tests/test_lm_eval_harness.py +++ b/tests/test_lm_eval_harness.py @@ -20,7 +20,7 @@ def test_run_eval(tmp_path, float_like): from eval.lm_eval_harness import EvalHarnessBase from litgpt.model import GPT from litgpt.tokenizer import Tokenizer - from scripts.download import download_from_hub + from litgpt.scripts.download import download_from_hub fabric = Fabric(devices=1) with fabric.init_module(): diff --git a/tests/test_lora.py b/tests/test_lora.py index 43318c209d..37c3777c81 100644 --- a/tests/test_lora.py +++ b/tests/test_lora.py @@ -545,7 +545,7 @@ def test_against_hf_mixtral(): from transformers.models.mixtral import MixtralConfig, MixtralForCausalLM from litgpt.lora import GPT, Config - from scripts.convert_hf_checkpoint import copy_weights_hf_llama + from litgpt.scripts.convert_hf_checkpoint import copy_weights_hf_llama device = torch.device("cpu") dtype = torch.float32 diff --git a/tests/test_merge_lora.py b/tests/test_merge_lora.py index eec6df0a44..fee5d02b27 100644 --- a/tests/test_merge_lora.py +++ b/tests/test_merge_lora.py @@ -20,7 +20,7 @@ def test_merge_lora(tmp_path, fake_checkpoint_dir): from litgpt.lora import GPT as LoRAGPT from litgpt.lora import lora_filter from litgpt.model import GPT - from scripts.merge_lora import merge_lora + from litgpt.scripts.merge_lora import merge_lora pretrained_checkpoint_dir = tmp_path / "pretrained" lora_checkpoint_dir = tmp_path / "lora" @@ -74,7 +74,7 @@ def test_merge_lora(tmp_path, fake_checkpoint_dir): def test_load_lora_metadata(fake_checkpoint_dir): - from scripts.merge_lora import load_lora_metadata + from litgpt.scripts.merge_lora import load_lora_metadata assert not (fake_checkpoint_dir / "hyperparameters.yaml").is_file() with pytest.raises(FileNotFoundError, match="missing a `hyperparameters.yaml` file"): diff --git a/tests/test_model.py b/tests/test_model.py index 6a176008f5..2919e51fe2 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -44,7 +44,7 @@ def test_against_gpt_neox_model(rotary_pct, batch_size, n_embd, parallel_residua from transformers import GPTNeoXConfig, GPTNeoXForCausalLM from litgpt import GPT, Config - from scripts.convert_hf_checkpoint import copy_weights_gpt_neox + from litgpt.scripts.convert_hf_checkpoint import copy_weights_gpt_neox torch.set_default_dtype(dtype) @@ -117,7 +117,7 @@ def test_against_hf_falcon(kwargs, device, dtype): from transformers.models.falcon import FalconConfig, FalconForCausalLM from litgpt import GPT, Config - from scripts.convert_hf_checkpoint import copy_weights_falcon + from litgpt.scripts.convert_hf_checkpoint import copy_weights_falcon torch.set_default_dtype(dtype) @@ -169,7 +169,7 @@ def test_against_original_open_llama_3b(device, dtype): from transformers.models.llama.modeling_llama import LlamaForCausalLM from litgpt import GPT, Config - from scripts.convert_hf_checkpoint import copy_weights_hf_llama + from litgpt.scripts.convert_hf_checkpoint import copy_weights_hf_llama torch.set_default_dtype(dtype) @@ -225,7 +225,7 @@ def test_against_hf_llama2(ours_kwargs, device, dtype): from transformers.models.llama.modeling_llama import LlamaForCausalLM from litgpt import GPT, Config - from scripts.convert_hf_checkpoint import copy_weights_hf_llama + from litgpt.scripts.convert_hf_checkpoint import copy_weights_hf_llama torch.set_default_dtype(dtype) @@ -290,7 +290,7 @@ def test_against_hf_phi_1_5(device, dtype): from reference_models.original_phi_1_5 import PhiForCausalLM from litgpt import GPT, Config - from scripts.convert_hf_checkpoint import copy_weights_phi + from litgpt.scripts.convert_hf_checkpoint import copy_weights_phi torch.set_default_dtype(dtype) @@ -352,7 +352,7 @@ def test_against_hf_phi_2(device, dtype): from reference_models.original_phi_2 import PhiForCausalLM from litgpt import GPT, Config - from scripts.convert_hf_checkpoint import copy_weights_phi + from litgpt.scripts.convert_hf_checkpoint import copy_weights_phi torch.set_default_dtype(dtype) @@ -408,7 +408,7 @@ def test_against_hf_mistral(device, dtype): from transformers.models.mistral.modeling_mistral import MistralForCausalLM from litgpt import GPT, Config - from scripts.convert_hf_checkpoint import copy_weights_hf_llama + from litgpt.scripts.convert_hf_checkpoint import copy_weights_hf_llama torch.set_default_dtype(dtype) @@ -455,7 +455,7 @@ def test_against_hf_mixtral(): from transformers.models.mixtral import MixtralConfig, MixtralForCausalLM from litgpt import GPT, Config - from scripts.convert_hf_checkpoint import copy_weights_hf_llama + from litgpt.scripts.convert_hf_checkpoint import copy_weights_hf_llama device = torch.device("cpu") dtype = torch.float32 @@ -520,7 +520,7 @@ def test_against_original_stablelm_zephyr_3b(device, dtype): from transformers import AutoConfig, AutoModelForCausalLM from litgpt import GPT, Config - from scripts.convert_hf_checkpoint import copy_weights_hf_llama + from litgpt.scripts.convert_hf_checkpoint import copy_weights_hf_llama torch.set_default_dtype(dtype) @@ -577,7 +577,7 @@ def test_against_original_gemma(model_name, device, dtype): from transformers.models.gemma.modeling_gemma import GemmaForCausalLM from litgpt import GPT, Config - from scripts.convert_hf_checkpoint import copy_weights_hf_llama + from litgpt.scripts.convert_hf_checkpoint import copy_weights_hf_llama torch.set_default_dtype(dtype) diff --git a/tests/test_utils.py b/tests/test_utils.py index d39f1d1162..8e4068fe72 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -43,7 +43,7 @@ def test_check_valid_checkpoint_dir(tmp_path): Find download instructions at https://github.com/Lightning-AI/litgpt/blob/main/tutorials See all download options by running: - python scripts/download.py + python litgpt/scripts/download.py """.strip() assert out == expected @@ -57,7 +57,7 @@ def test_check_valid_checkpoint_dir(tmp_path): Find download instructions at https://github.com/Lightning-AI/litgpt/blob/main/tutorials See all download options by running: - python scripts/download.py + python litgpt/scripts/download.py """.strip() assert out == expected @@ -75,7 +75,7 @@ def test_check_valid_checkpoint_dir(tmp_path): --checkpoint_dir '{str(checkpoint_dir.absolute())}' See all download options by running: - python scripts/download.py + python litgpt/scripts/download.py """.strip() assert out == expected diff --git a/tutorials/convert_hf_checkpoint.md b/tutorials/convert_hf_checkpoint.md index 6c1e45e36c..cc26a46587 100644 --- a/tutorials/convert_hf_checkpoint.md +++ b/tutorials/convert_hf_checkpoint.md @@ -1,9 +1,9 @@ # Converting Hugging Face Transformers to LitGPT weights -By default, the `scripts/download.py` script converts the downloaded HF checkpoint files into a LitGPT compatible format after downloading. For example, +By default, the `litgpt/scripts/download.py` script converts the downloaded HF checkpoint files into a LitGPT compatible format after downloading. For example, ```bash -python scripts/download.py --repo_id EleutherAI/pythia-14m +python litgpt/scripts/download.py --repo_id EleutherAI/pythia-14m ``` creates the following files: @@ -23,12 +23,12 @@ checkpoints/ -To disable the automatic conversion, which is useful for development and debugging purposes, you can run the `scripts/download.py` with the `--convert_checkpoint false` flag. This will only download the checkpoint files but do not convert them for use in LitGPT: +To disable the automatic conversion, which is useful for development and debugging purposes, you can run the `litgpt/scripts/download.py` with the `--convert_checkpoint false` flag. This will only download the checkpoint files but do not convert them for use in LitGPT: ```bash rm -rf checkpoints/EleutherAI/pythia-14m -python scripts/download.py \ +python litgpt/scripts/download.py \ --repo_id EleutherAI/pythia-14m \ --convert_checkpoint false @@ -46,10 +46,10 @@ ls checkpoints/EleutherAI/pythia-14m └── tokenizer_config.json ``` -The required files `lit_config.json` and `lit_model.pth` files can then be manually generated via the `scripts/convert_hf_checkpoint.py` script: +The required files `lit_config.json` and `lit_model.pth` files can then be manually generated via the `litgpt/scripts/convert_hf_checkpoint.py` script: ```bash -python scripts/convert_hf_checkpoint.py \ +python litgpt/scripts/convert_hf_checkpoint.py \ --checkpoint_dir checkpoints/EleutherAI/pythia-14m ``` diff --git a/tutorials/convert_lit_models.md b/tutorials/convert_lit_models.md index 53ba412798..d4b88cf6fd 100644 --- a/tutorials/convert_lit_models.md +++ b/tutorials/convert_lit_models.md @@ -1,11 +1,11 @@ ## Converting LitGPT weights to Hugging Face Transformers -LitGPT weights need to be converted to a format that Hugging Face understands with a [conversion script](../scripts/convert_lit_checkpoint.py) before our scripts can run. +LitGPT weights need to be converted to a format that Hugging Face understands with a [conversion script](../litgpt/scripts/convert_lit_checkpoint.py) before our scripts can run. We provide a helpful script to convert models LitGPT models back to their equivalent Hugging Face Transformers format: ```sh -python scripts/convert_lit_checkpoint.py \ +python litgpt/scripts/convert_lit_checkpoint.py \ --checkpoint_path checkpoints/repo_id/lit_model.pth \ --output_path output_path/converted.pth \ --config_path checkpoints/repo_id/config.json @@ -46,7 +46,7 @@ model = AutoModel.from_pretrained("online_repo_id", state_dict=state_dict) ### Merging LoRA weights -Please note that if you want to convert a model that has been fine-tuned using an adapter like LoRA, these weights should be [merged](../scripts/merge_lora.py) to the checkpoint prior to converting. +Please note that if you want to convert a model that has been fine-tuned using an adapter like LoRA, these weights should be [merged](../litgpt/scripts/merge_lora.py) to the checkpoint prior to converting. ```sh python scripts/merge_lora.py \ @@ -70,12 +70,12 @@ export repo_id=TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T Instead of using TinyLlama, you can replace the `repo_id` target with any other model repository specifier that is currently supported by LitGPT. You can get a list of supported repository specifier -by running `scripts/download.py` without any additional arguments. +by running `litgpt/scripts/download.py` without any additional arguments. Then, we download the model we specified via `$repo_id` above: ```bash -python scripts/download.py --repo_id $repo_id +python litgpt/scripts/download.py --repo_id $repo_id ``` 2. Finetune the model: @@ -104,7 +104,7 @@ python scripts/merge_lora.py \ 4. Convert the finetuning model back into a HF format: ```bash -python scripts/convert_lit_checkpoint.py \ +python litgpt/scripts/convert_lit_checkpoint.py \ --checkpoint_path $finetuned_dir/final/lit_model.pth \ --output_path out/hf-tinyllama/converted_model.pth \ --config_path checkpoints/$repo_id/lit_config.json diff --git a/tutorials/download_code_llama.md b/tutorials/download_code_llama.md index a5075a1086..fb467557cb 100644 --- a/tutorials/download_code_llama.md +++ b/tutorials/download_code_llama.md @@ -13,7 +13,7 @@ All models were trained on 16,000 token contexts and support generations with u To see all the available checkpoints, run: ```bash -python scripts/download.py | grep CodeLlama +python litgpt/scripts/download.py | grep CodeLlama ``` which will print @@ -38,7 +38,7 @@ In order to use a specific checkpoint, for instance [CodeLlama-7b-Python-hf](htt ```bash pip install 'huggingface_hub[hf_transfer] @ git+https://github.com/huggingface/huggingface_hub' -python scripts/download.py --repo_id codellama/CodeLlama-7b-Python-hf +python litgpt/scripts/download.py --repo_id codellama/CodeLlama-7b-Python-hf ``` By default, the checkpoint conversion step will use the data type of the HF checkpoint's parameters. In cases where RAM diff --git a/tutorials/download_dolly.md b/tutorials/download_dolly.md index 1a17dd5d31..103d322f0c 100644 --- a/tutorials/download_dolly.md +++ b/tutorials/download_dolly.md @@ -12,7 +12,7 @@ For detailed info on the models, their training, and their behavior, please see To see all the available checkpoints for Dolly, run: ```bash -python scripts/download.py | grep dolly +python litgpt/scripts/download.py | grep dolly ``` which will print @@ -28,7 +28,7 @@ In order to use a specific Dolly checkpoint, for instance [dolly-v2-3b](https:// ```bash pip install 'huggingface_hub[hf_transfer] @ git+https://github.com/huggingface/huggingface_hub' -python scripts/download.py --repo_id databricks/dolly-v2-3b +python litgpt/scripts/download.py --repo_id databricks/dolly-v2-3b ``` By default, the checkpoint conversion step will use the data type of the HF checkpoint's parameters. In cases where RAM diff --git a/tutorials/download_falcon.md b/tutorials/download_falcon.md index 18c28b4d26..4346ff1ea5 100644 --- a/tutorials/download_falcon.md +++ b/tutorials/download_falcon.md @@ -9,7 +9,7 @@ The first Falcon release includes a base model and an instruction tuned model of To see all the available checkpoints for Falcon, run: ```bash -python scripts/download.py | grep falcon +python litgpt/scripts/download.py | grep falcon ``` which will print @@ -28,7 +28,7 @@ In order to use a specific Falcon checkpoint, for instance [falcon-7b](https://h ```bash pip install 'huggingface_hub[hf_transfer] @ git+https://github.com/huggingface/huggingface_hub' -python scripts/download.py --repo_id tiiuae/falcon-7b +python litgpt/scripts/download.py --repo_id tiiuae/falcon-7b ``` By default, the checkpoint conversion step will use the data type of the HF checkpoint's parameters. In cases where RAM diff --git a/tutorials/download_freewilly_2.md b/tutorials/download_freewilly_2.md index f228533ec6..50fd020941 100644 --- a/tutorials/download_freewilly_2.md +++ b/tutorials/download_freewilly_2.md @@ -7,7 +7,7 @@ FreeWilly2 leverages the Llama 2 70B foundation model to reach a performance tha ```bash pip install 'huggingface_hub[hf_transfer] @ git+https://github.com/huggingface/huggingface_hub' -python scripts/download.py --repo_id stabilityai/FreeWilly2 +python litgpt/scripts/download.py --repo_id stabilityai/FreeWilly2 ``` By default, the checkpoint conversion step will use the data type of the HF checkpoint's parameters. In cases where RAM diff --git a/tutorials/download_function_calling_llama_2.md b/tutorials/download_function_calling_llama_2.md index a275205bd4..c084f1b815 100644 --- a/tutorials/download_function_calling_llama_2.md +++ b/tutorials/download_function_calling_llama_2.md @@ -10,7 +10,7 @@ In order to use the checkpoint, download the weights and convert the checkpoint ```bash pip install 'huggingface_hub[hf_transfer] @ git+https://github.com/huggingface/huggingface_hub' -python scripts/download.py --repo_id Trelis/Llama-2-7b-chat-hf-function-calling-v2 +python litgpt/scripts/download.py --repo_id Trelis/Llama-2-7b-chat-hf-function-calling-v2 ``` By default, the checkpoint conversion step will use the data type of the HF checkpoint's parameters. In cases where RAM diff --git a/tutorials/download_gemma.md b/tutorials/download_gemma.md index d929c4662d..b7089e8c5b 100644 --- a/tutorials/download_gemma.md +++ b/tutorials/download_gemma.md @@ -8,7 +8,7 @@ For more information, please see the [technical report](https://storage.googleap To see all the available checkpoints, run: ```bash -python scripts/download.py | grep gemma +python litgpt/scripts/download.py | grep gemma ``` which will print @@ -30,7 +30,7 @@ After access is granted, you can find your HF hub token in [!WARNING] @@ -61,7 +61,7 @@ In order to use the phi-1.5 model checkpoint, which requires about 3 Gb of disk ```bash pip install 'huggingface_hub[hf_transfer] @ git+https://github.com/huggingface/huggingface_hub' -python scripts/download.py --repo_id microsoft/phi-1_5 +python litgpt/scripts/download.py --repo_id microsoft/phi-1_5 ``` You're done! To execute the model just run: diff --git a/tutorials/download_pythia.md b/tutorials/download_pythia.md index 653b16c980..b5c158b575 100644 --- a/tutorials/download_pythia.md +++ b/tutorials/download_pythia.md @@ -8,7 +8,7 @@ It includes a suite of 8 checkpoints (weights) on 2 different datasets: [The Pil To see all the available checkpoints for Pythia, run: ```bash -python scripts/download.py | grep pythia +python litgpt/scripts/download.py | grep pythia ``` which will print @@ -39,7 +39,7 @@ In order to use a specific Pythia checkpoint, for instance [pythia-1b](https://h ```bash pip install 'huggingface_hub[hf_transfer] @ git+https://github.com/huggingface/huggingface_hub' -python scripts/download.py --repo_id EleutherAI/pythia-1b +python litgpt/scripts/download.py --repo_id EleutherAI/pythia-1b ``` By default, the checkpoint conversion step will use the data type of the HF checkpoint's parameters. In cases where RAM diff --git a/tutorials/download_redpajama_incite.md b/tutorials/download_redpajama_incite.md index 456d704024..c7648e5f45 100644 --- a/tutorials/download_redpajama_incite.md +++ b/tutorials/download_redpajama_incite.md @@ -7,7 +7,7 @@ The release includes a base model, a chat fine-tuned model, and an instruction t To see all the available checkpoints for RedPajama-INCITE, run: ```bash -python scripts/download.py | grep RedPajama +python litgpt/scripts/download.py | grep RedPajama ``` which will print @@ -29,7 +29,7 @@ In order to use a specific RedPajama-INCITE checkpoint, for instance [RedPajama- ```bash pip install 'huggingface_hub[hf_transfer] @ git+https://github.com/huggingface/huggingface_hub' -python scripts/download.py --repo_id togethercomputer/RedPajama-INCITE-Base-3B-v1 +python litgpt/scripts/download.py --repo_id togethercomputer/RedPajama-INCITE-Base-3B-v1 ``` By default, the checkpoint conversion step will use the data type of the HF checkpoint's parameters. In cases where RAM diff --git a/tutorials/download_stablecode.md b/tutorials/download_stablecode.md index 734c30d0c5..1e6a9b7b4b 100644 --- a/tutorials/download_stablecode.md +++ b/tutorials/download_stablecode.md @@ -11,7 +11,7 @@ For more info on the models, please visit the [StableCode repository](https://hu To see all the available checkpoints for StableCode, run: ```bash -python scripts/download.py | grep -E "stable-?code" +python litgpt/scripts/download.py | grep -E "stable-?code" ``` which will print: @@ -29,7 +29,7 @@ In order to use a specific StableCode checkpoint, for instance [stable-code-3b]( pip install 'huggingface_hub[hf_transfer] @ git+https://github.com/huggingface/huggingface_hub' export repo_id=stabilityai/stable-code-3b -python scripts/download.py --repo_id $repo_id +python litgpt/scripts/download.py --repo_id $repo_id ``` By default, the checkpoint conversion step will use the data type of the HF checkpoint's parameters. In cases where RAM diff --git a/tutorials/download_stablelm.md b/tutorials/download_stablelm.md index 5f8d922eca..5aef6fe93f 100644 --- a/tutorials/download_stablelm.md +++ b/tutorials/download_stablelm.md @@ -5,7 +5,7 @@ StableLM is a family of generative language models trained by StabilityAI. To see all the available checkpoints for StableLM, run: ```bash -python scripts/download.py | grep stablelm +python litgpt/scripts/download.py | grep stablelm ``` which will print: @@ -24,7 +24,7 @@ In order to use a specific StableLM checkpoint, for instance [stablelm-base-alph ```bash pip install 'huggingface_hub[hf_transfer] @ git+https://github.com/huggingface/huggingface_hub' -python scripts/download.py --repo_id stabilityai/stablelm-base-alpha-3b +python litgpt/scripts/download.py --repo_id stabilityai/stablelm-base-alpha-3b ``` By default, the checkpoint conversion step will use the data type of the HF checkpoint's parameters. In cases where RAM @@ -56,8 +56,7 @@ In order to use a specific StableLM checkpoint, for instance [StableLM Zephyr 3B pip install 'huggingface_hub[hf_transfer] @ git+https://github.com/huggingface/huggingface_hub' export repo_id=stabilityai/stablelm-zephyr-3b -python scripts/download.py --repo_id $repo_id -python scripts/convert_hf_checkpoint.py --checkpoint_dir checkpoints/$repo_id +python litgpt/scripts/download.py --repo_id $repo_id ``` By default, the `convert_hf_checkpoint` step will use the data type of the HF checkpoint's parameters. In cases where RAM diff --git a/tutorials/download_tinyllama.md b/tutorials/download_tinyllama.md index 5723d4d6e1..681b8c99cf 100644 --- a/tutorials/download_tinyllama.md +++ b/tutorials/download_tinyllama.md @@ -8,7 +8,7 @@ There are two version of TinyLlama available: a base one and a fine-tuned "Chat" To see all available versions, run: ```bash -python scripts/download.py | grep TinyLlama +python litgpt/scripts/download.py | grep TinyLlama ``` which will print @@ -23,7 +23,7 @@ In order to use a specific checkpoint, for instance [TinyLlama 1.1B base model]( ```bash pip install 'huggingface_hub[hf_transfer] @ git+https://github.com/huggingface/huggingface_hub' -python scripts/download.py --repo_id TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T +python litgpt/scripts/download.py --repo_id TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T ``` ----- @@ -32,13 +32,13 @@ With the `Chat` version of the model, the download and conversion procedures are As this version of the model is stored in `safetensor` format, to download it an additional flag is required: ```bash -python scripts/download.py --repo_id TinyLlama/TinyLlama-1.1B-Chat-v1.0 +python litgpt/scripts/download.py --repo_id TinyLlama/TinyLlama-1.1B-Chat-v1.0 ``` The model is shipped in `bfloat16` format, so if your hardware doesn't support it, you can provide `--dtype` argument during model conversion. For example we can convert the weights into `float32` format: ```bash -python scripts/download.py \ +python litgpt/scripts/download.py \ --repo_id checkpoints/TinyLlama/TinyLlama-1.1B-Chat-v1.0 --dtype=float32 ``` diff --git a/tutorials/download_vicuna.md b/tutorials/download_vicuna.md index 97e73091e4..11d24a33eb 100644 --- a/tutorials/download_vicuna.md +++ b/tutorials/download_vicuna.md @@ -5,7 +5,7 @@ Vicuna is an open-source family of chatbots trained by fine-tuning LLaMA on user To see all the available checkpoints for Vicuna, run: ```bash -python scripts/download.py | grep vicuna +python litgpt/scripts/download.py | grep vicuna ``` which will print @@ -25,7 +25,7 @@ In order to use a specific Vicuna checkpoint, for instance [vicuna-7b-v1.5](http ```bash pip install 'huggingface_hub[hf_transfer] @ git+https://github.com/huggingface/huggingface_hub' -python scripts/download.py --repo_id lmsys/vicuna-7b-v1.5 +python litgpt/scripts/download.py --repo_id lmsys/vicuna-7b-v1.5 ``` By default, the checkpoint conversion step will use the data type of the HF checkpoint's parameters. In cases where RAM diff --git a/tutorials/pretrain_tinyllama.md b/tutorials/pretrain_tinyllama.md index 8438fe3a64..8fb6c2a7fd 100644 --- a/tutorials/pretrain_tinyllama.md +++ b/tutorials/pretrain_tinyllama.md @@ -55,7 +55,7 @@ pip install 'lightning[data]' torchmetrics tensorboard sentencepiece zstandard p You will need to have the tokenizer config available: ```bash -python scripts/download.py \ +python litgpt/scripts/download.py \ --repo_id meta-llama/Llama-2-7b-hf \ --access_token your_hf_token \ --tokenizer_only true @@ -118,7 +118,7 @@ The currently supported model names are contained in the [config.py](https://git You can 1) either search this file for lines containing "name =", -2) or run `python scripts/download.py` without additional command line arguments +2) or run `python litgpt/scripts/download.py` without additional command line arguments Keep in mind that training with a single machine will take weeks. To speed up the process, you'll need access to a cluster. Once you're in a cluster, you can follow [these instructions](https://lightning.ai/docs/fabric/stable/fundamentals/launch.html#launch-on-a-cluster) @@ -156,7 +156,7 @@ python litgpt/pretrain.py \ After training is completed, you can convert the checkpoint to a format that can be loaded for evaluation, inference, finetuning etc. ```bash -python scripts/convert_pretrained_checkpoint.py \ +python litgpt/scripts/convert_pretrained_checkpoint.py \ --checkpoint_dir out/pretrain/tiny-llama/step-00060500 \ --output_dir checkpoints/tiny-llama/final ```