forked from NVIDIA/ChatRTX
-
Notifications
You must be signed in to change notification settings - Fork 0
/
utils.py
124 lines (106 loc) · 4.85 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
from pathlib import Path
from typing import Optional
from transformers import AutoTokenizer, T5Tokenizer
from tensorrt_llm.builder import get_engine_version
# TODO(enweiz): Update for refactored models
DEFAULT_HF_MODEL_DIRS = {
'BaichuanForCausalLM': 'baichuan-inc/Baichuan-13B-Chat',
'BloomForCausalLM': 'bigscience/bloom-560m',
'ChatGLMForCausalLM': 'THUDM/chatglm3-6b',
'FalconForCausalLM': 'tiiuae/falcon-rw-1b',
'GPTForCausalLM': 'gpt2-medium',
'GPTJForCausalLM': 'EleutherAI/gpt-j-6b',
'GPTNeoXForCausalLM': 'EleutherAI/gpt-neox-20b',
'InternLMForCausalLM': 'internlm/internlm-chat-7b',
'LlamaForCausalLM': 'meta-llama/Llama-2-7b-hf',
'MPTForCausalLM': 'mosaicml/mpt-7b',
'PhiForCausalLM': 'microsoft/phi-2',
'OPTForCausalLM': 'facebook/opt-350m',
'QWenForCausalLM': 'Qwen/Qwen-7B',
}
DEFAULT_PROMPT_TEMPLATES = {
'InternLMForCausalLM':
"<|User|>:{input_text}<eoh>\n<|Bot|>:",
'QWenForCausalLM':
"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{input_text}<|im_end|>\n<|im_start|>assistant\n",
}
def read_model_name(engine_dir: str):
engine_version = get_engine_version(engine_dir)
with open(Path(engine_dir) / "config.json", 'r') as f:
config = json.load(f)
if engine_version is None:
return config['builder_config']['name'], None
model_arch = config['pretrained_config']['architecture']
model_version = None
if model_arch == 'ChatGLMForCausalLM':
model_version = config['pretrained_config']['chatglm_version']
return model_arch, model_version
def throttle_generator(generator, stream_interval):
for i, out in enumerate(generator):
if not i % stream_interval:
yield out
if i % stream_interval:
yield out
def load_tokenizer(tokenizer_dir: Optional[str] = None,
vocab_file: Optional[str] = None,
model_name: str = 'GPTForCausalLM',
model_version: Optional[str] = None,
tokenizer_type: Optional[str] = None):
if vocab_file is None:
use_fast = True
if tokenizer_type is not None and tokenizer_type == "llama":
use_fast = False
# Should set both padding_side and truncation_side to be 'left'
tokenizer = AutoTokenizer.from_pretrained(tokenizer_dir,
legacy=False,
padding_side='left',
truncation_side='left',
trust_remote_code=True,
tokenizer_type=tokenizer_type,
use_fast=use_fast)
elif model_name == 'GemmaForCausalLM':
from transformers import GemmaTokenizer
# Initialize tokenizer from vocab file.
tokenizer = GemmaTokenizer(vocab_file=vocab_file,
padding_side='left',
truncation_side='left',
legacy=False)
else:
# For gpt-next, directly load from tokenizer.model
tokenizer = T5Tokenizer(vocab_file=vocab_file,
padding_side='left',
truncation_side='left',
legacy=False)
if model_name == 'QWenForCausalLM':
with open(Path(tokenizer_dir) / "generation_config.json") as f:
gen_config = json.load(f)
chat_format = gen_config['chat_format']
if chat_format == 'raw' or chat_format == 'chatml':
pad_id = gen_config['pad_token_id']
end_id = gen_config['eos_token_id']
else:
raise Exception(f"unknown chat format: {chat_format}")
elif model_name == 'ChatGLMForCausalLM' and model_version == 'glm':
pad_id = tokenizer.pad_token_id
end_id = tokenizer.eop_token_id
else:
if tokenizer.pad_token_id is None:
tokenizer.pad_token_id = tokenizer.eos_token_id
pad_id = tokenizer.pad_token_id
end_id = tokenizer.eos_token_id
return tokenizer, pad_id, end_id