Skip to content

Commit

Permalink
[Qwen] add test_net_with_mask in export_onnx.py
Browse files Browse the repository at this point in the history
  • Loading branch information
chuxiaoyi2023 committed Jun 15, 2024
1 parent db160b9 commit 418b6a2
Show file tree
Hide file tree
Showing 3 changed files with 75 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,6 @@ __pycache__
core
*.so
*.log
*.zip
*.tar.gz
*.deb
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -160,4 +160,6 @@ echo "setr vpll_clock 100000000"> /sys/kernel/debug/top/clock

断电几分钟,echo 3 > /proc/sys/vm/drop_caches ,清缓存就正常了,有可能是什么操作造成了内存踩踏

### Q4:执行python_demo时报这个错 ValueError: vector::_M_default_append

A:CMakeLists.txt版本的问题,修改CMakeLists.txt,将第一行改为cmake_minimum_required(VERSION 3.10)
70 changes: 70 additions & 0 deletions models/Qwen1_5/compile/export_onnx.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,76 @@ def convert_penalty_sample_head():
do_constant_folding=True,
opset_version=15)

def build_prompt(query):
return f'<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{query}<|im_end|>\n<|im_start|>assistant\n'

def test_net_with_mask():
embed = Embedding()
blocks = [QwenBlock(i) for i in range(NUM_LAYERS)]
block_kvs = [QwenBlockCache(i) for i in range(NUM_LAYERS)]
query = """tell me about sophgo in ten word"""
print(query)
promt = build_prompt(query)
import numpy as np
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
ids = tokenizer.encode(promt)
print("input ids:{}".format(ids))
token_len = len(ids)
ori_token_len = token_len
ids = ids + (SEQ_LENGTH - token_len) * [0]
input_ids = torch.tensor(ids).view(SEQ_LENGTH).to(device)
out = embed(input_ids).view(1, SEQ_LENGTH, HIDDEN_SIZE)
position_ids = list(range(token_len)) + (SEQ_LENGTH - token_len) * [0]
position_ids = torch.tensor([position_ids]).to(device)
attention_mask = torch.ones((SEQ_LENGTH, SEQ_LENGTH)).float() * -10000.0
for i in range(token_len):
for j in range(token_len):
if j <= i:
attention_mask[i][j] = 0.0
attention_mask = attention_mask.view(
1, 1, SEQ_LENGTH, SEQ_LENGTH).to(device)
k_cache = []
v_cache = []
for i in range(NUM_LAYERS):
# breakpoint()
out[:,token_len:] = 0
out, k, v = blocks[i](out, position_ids, attention_mask)
# k[:, SEQ_LENGTH - token_len:] = k[:, :token_len]
# v[:, SEQ_LENGTH - token_len:] = v[:, :token_len]
# k[:, :SEQ_LENGTH - token_len] = 0
# v[:, :SEQ_LENGTH - token_len] = 0
k_cache.append(k)
v_cache.append(v)
out = out[:, token_len - 1:token_len].view(1, 1, HIDDEN_SIZE)
lm = LmHead()
greedy_head = GreedyHead()
token = greedy_head(lm(out)).view(1)
out_ids = [int(token)]
word = tokenizer.decode([int(token)])
print(word, end="")
while int(token) != tokenizer.eos_token_id and token_len < ori_token_len + 10:
token_len += 1
input_ids = torch.tensor([token]).to(device)
out = embed(input_ids).view(1, 1, HIDDEN_SIZE)
position_ids = torch.tensor([[token_len - 1]]).to(device)
attention_mask = torch.zeros((1, 1, 1, SEQ_LENGTH + 1)).float().to(device)
attention_mask[:, :, :, token_len:SEQ_LENGTH] = -10000.0
for i in range(NUM_LAYERS):
breakpoint()
out, k, v = block_kvs[i](out, position_ids, attention_mask, k_cache[i], v_cache[i])
k_cache[i][:,token_len:token_len+1] = k
v_cache[i][:,token_len:token_len+1] = v
print(out)
token = greedy_head(lm(out)).view(1)
out_ids.append(int(token))
word = tokenizer.decode([int(token)])
print(word, end="")
# np.save(f'torch_{token_len}.npy', out)
print("\noutput_ids:{}".format(out_ids))


# test_net_with_mask()

# create folder to store onnx
if not os.path.exists(folder):
Expand Down

0 comments on commit 418b6a2

Please sign in to comment.