From 4f711659c0b1f1cac449b3ff54a1047f2db90227 Mon Sep 17 00:00:00 2001 From: plusbang Date: Fri, 13 Dec 2024 14:01:02 +0800 Subject: [PATCH] fix --- .../llm/example/NPU/HF-Transformers-AutoModels/LLM/generate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/generate.py b/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/generate.py index d3abd13a6e6..6eaee048af7 100644 --- a/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/generate.py +++ b/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/generate.py @@ -52,7 +52,6 @@ attn_implementation="eager" ) tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) - tokenizer.save_pretrained(args.lowbit_path) else: model = AutoModelForCausalLM.load_low_bit( args.lowbit_path, @@ -66,6 +65,7 @@ if args.lowbit_path and not os.path.exists(args.lowbit_path): model.save_low_bit(args.lowbit_path) + tokenizer.save_pretrained(args.lowbit_path) with torch.inference_mode(): input_ids = tokenizer.encode(args.prompt, return_tensors="pt")