diff --git a/litgpt/data/lit_data.py b/litgpt/data/lit_data.py index 1350c16dbd..8347215fbd 100644 --- a/litgpt/data/lit_data.py +++ b/litgpt/data/lit_data.py @@ -52,7 +52,11 @@ def _dataloader(self, input_dir: str, train: bool): from litdata.streaming import StreamingDataset, TokensLoader dataset = StreamingDataset( - input_dir=input_dir, item_loader=TokensLoader(block_size=self.seq_length), shuffle=train, drop_last=True + input_dir=input_dir, + item_loader=TokensLoader(block_size=self.seq_length), + shuffle=train, + drop_last=True, + seed=self.seed, ) dataloader = DataLoader( dataset, batch_size=self.batch_size, pin_memory=True, num_workers=self.num_workers, drop_last=True diff --git a/litgpt/finetune/adapter.py b/litgpt/finetune/adapter.py index aa02977fb9..2ec71784e7 100644 --- a/litgpt/finetune/adapter.py +++ b/litgpt/finetune/adapter.py @@ -145,7 +145,6 @@ def main( model = fabric.setup_module(model) - trainable_params = [p for p in model.parameters() if p.requires_grad] if isinstance(fabric.strategy.precision, BitsandbytesPrecision): import bitsandbytes as bnb @@ -153,7 +152,7 @@ def main( else: optimizer_cls = torch.optim.AdamW optimizer = optimizer_cls( - trainable_params, lr=train.learning_rate, weight_decay=train.weight_decay, betas=(train.beta1, train.beta2) + model.parameters(), lr=train.learning_rate, weight_decay=train.weight_decay, betas=(train.beta1, train.beta2) ) optimizer = fabric.setup_optimizers(optimizer) scheduler = get_lr_scheduler(optimizer, warmup_steps=train.lr_warmup_steps, max_steps=lr_max_steps) diff --git a/litgpt/finetune/adapter_v2.py b/litgpt/finetune/adapter_v2.py index 197bcd0bba..86526a58e5 100644 --- a/litgpt/finetune/adapter_v2.py +++ b/litgpt/finetune/adapter_v2.py @@ -145,7 +145,6 @@ def main( model = fabric.setup_module(model) - trainable_params = [p for p in model.parameters() if p.requires_grad] if isinstance(fabric.strategy.precision, BitsandbytesPrecision): import bitsandbytes as bnb @@ -153,7 +152,7 @@ def main( else: optimizer_cls = torch.optim.AdamW optimizer = optimizer_cls( - trainable_params, lr=train.learning_rate, weight_decay=train.weight_decay, betas=(train.beta1, train.beta2) + model.parameters(), lr=train.learning_rate, weight_decay=train.weight_decay, betas=(train.beta1, train.beta2) ) optimizer = fabric.setup_optimizers(optimizer) scheduler = get_lr_scheduler(optimizer, warmup_steps=train.lr_warmup_steps, max_steps=lr_max_steps) diff --git a/litgpt/finetune/lora.py b/litgpt/finetune/lora.py index bed58862bb..a2c3ef07b2 100644 --- a/litgpt/finetune/lora.py +++ b/litgpt/finetune/lora.py @@ -175,7 +175,6 @@ def main( model = fabric.setup_module(model) - trainable_params = [p for p in model.parameters() if p.requires_grad] if isinstance(fabric.strategy.precision, BitsandbytesPrecision): import bitsandbytes as bnb @@ -183,7 +182,7 @@ def main( else: optimizer_cls = torch.optim.AdamW optimizer = optimizer_cls( - trainable_params, lr=train.learning_rate, weight_decay=train.weight_decay, betas=(train.beta1, train.beta2) + model.parameters(), lr=train.learning_rate, weight_decay=train.weight_decay, betas=(train.beta1, train.beta2) ) optimizer = fabric.setup_optimizers(optimizer) scheduler = get_lr_scheduler(optimizer, warmup_steps=train.lr_warmup_steps, max_steps=lr_max_steps) diff --git a/litgpt/prompts.py b/litgpt/prompts.py index a0e515c3f8..34f20ad541 100644 --- a/litgpt/prompts.py +++ b/litgpt/prompts.py @@ -318,6 +318,7 @@ def apply(self, prompt: str, **kwargs: str) -> str: "tinyllama": TinyLlama, "gemma": Gemma, "h2oai": H2Oai, + "llama3": Llama3, } diff --git a/tests/data/test_lit_data.py b/tests/data/test_lit_data.py index e5c1ea1716..a2c221c119 100644 --- a/tests/data/test_lit_data.py +++ b/tests/data/test_lit_data.py @@ -1,6 +1,7 @@ # Copyright Lightning AI. Licensed under the Apache License 2.0, see LICENSE file. import sys from unittest import mock +from unittest.mock import ANY import pytest @@ -34,3 +35,17 @@ def test_input_dir_and_splits(dl_mock, tmp_path): dl_mock.assert_called_with(input_dir=str("s3://mydataset/data/train"), train=True) data.val_dataloader() dl_mock.assert_called_with(input_dir=str("s3://mydataset/data/val"), train=False) + + +@pytest.mark.skipif(sys.platform == "win32", reason="Needs to implement platform agnostic path/url joining") +@mock.patch("litdata.streaming.StreamingDataset") +def test_dataset_args(streaming_dataset_mock, tmp_path): + data = LitData(data_path=tmp_path, seed=1000) + data.train_dataloader() + streaming_dataset_mock.assert_called_with( + input_dir=str(tmp_path), + item_loader=ANY, + shuffle=True, + drop_last=True, + seed=1000, + ) diff --git a/tutorials/examples/minimal-generate-scripts/README.md b/tutorials/examples/minimal-generate-scripts/README.md new file mode 100644 index 0000000000..5f3beaa726 --- /dev/null +++ b/tutorials/examples/minimal-generate-scripts/README.md @@ -0,0 +1,30 @@ +## Minimal LitGPT Generate Examples in Python + + + +The scripts in this folder provide minimal examples showing how to use LitGPT from within Python without the CLI. + +- `generate.py` is a minimal script that uses the `main` function from LitGPT's `generate` utilities +- `generate-step-by-step.py` is a lower-level script using LitGPT utility functions directly instead of relying on the `main` function menntioned above. + +Assuming you downloaded the checkpoint files via + +```bash +litgpt download --repo_id EleutherAI/pythia-1b +``` + +you can run the scripts as follows: + + +```bash +python generate-step-by-step.py +``` + +or + +```bash +python generate.py +``` + + + diff --git a/tutorials/examples/minimal-generate-scripts/generate-step-by-step.py b/tutorials/examples/minimal-generate-scripts/generate-step-by-step.py new file mode 100644 index 0000000000..b95f5cf4ff --- /dev/null +++ b/tutorials/examples/minimal-generate-scripts/generate-step-by-step.py @@ -0,0 +1,84 @@ +# Copyright Lightning AI. Licensed under the Apache License 2.0, see LICENSE file. + +from pathlib import Path + +import lightning as L +import torch + +from litgpt.prompts import PromptStyle +from litgpt.tokenizer import Tokenizer +from litgpt.utils import load_checkpoint, get_default_supported_precision +from litgpt.generate.base import generate +from litgpt.model import GPT +from litgpt.config import Config + + +def use_model(): + + ################### + # Load model + ################### + + # run `litgpt download --repo_id EleutherAI/pythia-1b` to download the checkpoint first + checkpoint_dir = Path("checkpoints") / "EleutherAI" / "pythia-1b" + config = Config.from_file(checkpoint_dir / "model_config.yaml") + + precision = get_default_supported_precision(training=False) + device = torch.device("cuda") + + fabric = L.Fabric( + accelerator=device.type, + devices=1, + precision=precision, + ) + + checkpoint_path = checkpoint_dir / "lit_model.pth" + tokenizer = Tokenizer(checkpoint_dir) + + prompt_style = PromptStyle.from_config(config) + + with fabric.init_module(empty_init=True): + model = GPT(config) + with fabric.init_tensor(): + model.set_kv_cache(batch_size=1) + + model.eval() + model = fabric.setup_module(model) + load_checkpoint(fabric, model, checkpoint_path) + + device = fabric.device + + ################### + # Predict + ################### + + prompt = "What do Llamas eat?" + max_new_tokens = 50 + + prompt = prompt_style.apply(prompt) + encoded = tokenizer.encode(prompt, device=device) + + prompt_length = encoded.size(0) + max_returned_tokens = prompt_length + max_new_tokens + + torch.manual_seed(123) + + y = generate( + model, + encoded, + max_returned_tokens, + temperature=0.5, + top_k=200, + top_p=1.0, + eos_id=tokenizer.eos_id + ) + + for block in model.transformer.h: + block.attn.kv_cache.reset_parameters() + + decoded_output = tokenizer.decode(y) + print(decoded_output) + + +if __name__ == "__main__": + use_model() \ No newline at end of file diff --git a/tutorials/examples/minimal-generate-scripts/generate.py b/tutorials/examples/minimal-generate-scripts/generate.py new file mode 100644 index 0000000000..72c7b24b79 --- /dev/null +++ b/tutorials/examples/minimal-generate-scripts/generate.py @@ -0,0 +1,29 @@ +# Copyright Lightning AI. Licensed under the Apache License 2.0, see LICENSE file. + +from pathlib import Path +import torch +from litgpt.generate.base import main +from litgpt.utils import get_default_supported_precision + + +def use_model(): + + # run `litgpt download --repo_id EleutherAI/pythia-1b` to download the checkpoint first + checkpoint_dir = Path("checkpoints") / "EleutherAI" / "pythia-1b" + + torch.manual_seed(123) + + main( + prompt="What food do llamas eat?", + max_new_tokens=50, + temperature=0.5, + top_k=200, + top_p=1.0, + checkpoint_dir=checkpoint_dir, + precision=get_default_supported_precision(training=False), + compile=False + ) + + +if __name__ == "__main__": + use_model()