From b985c4ac350e22141974e9fcc502a5de29bdc621 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= Date: Sat, 13 Apr 2024 06:51:04 +0000 Subject: [PATCH] Mistral tokenizer to avoid the HF token --- extensions/thunder/README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/extensions/thunder/README.md b/extensions/thunder/README.md index f7a2e6f886..b66034a484 100644 --- a/extensions/thunder/README.md +++ b/extensions/thunder/README.md @@ -567,13 +567,15 @@ Config: ```yaml out_dir: out/pretrain-thunder data: TinyStories -tokenizer_dir: checkpoints/meta-llama/Llama-2-7b-hf +tokenizer_dir: checkpoints/mistralai/Mistral-7B-v0.1 logger_name: csv ``` Commands: ```bash +litgpt download --repo_id mistralai/Mistral-7B-v0.1 --tokenizer_only true + python extensions/thunder/pretrain.py --config config.yaml --compiler null --train.global_batch_size 32 python extensions/thunder/pretrain.py --config config.yaml --executors '[torchcompile_complete]' --train.global_batch_size 32 python extensions/thunder/pretrain.py --config config.yaml --executors '[sdpa, torchcompile, nvfuser, torch]' --train.global_batch_size 32