Skip to content

Commit

Permalink
fix gpu-onnx infer
Browse files Browse the repository at this point in the history
  • Loading branch information
unknown authored and unknown committed Jul 2, 2024
1 parent dec409b commit dfa11bb
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 9 deletions.
26 changes: 19 additions & 7 deletions runtime/gpu/model_repo/scoring/1/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def initialize(self, args):
def init_ctc_rescore(self, parameters):
num_processes = multiprocessing.cpu_count()
cutoff_prob = 0.9999
blank_id = 0

alpha = 2.0
beta = 1.0
bidecoder = 0
Expand Down Expand Up @@ -104,8 +104,13 @@ def init_ctc_rescore(self, parameters):

self.num_processes = num_processes
self.cutoff_prob = cutoff_prob
self.blank_id = blank_id
_, vocab = self.load_vocab(vocab_path)

ret = self.load_vocab(vocab_path)
id2vocab, vocab, space_id, blank_id, sos_eos = ret
self.space_id = space_id if space_id else -1
self.blank_id = blank_id if blank_id else 0
self.eos = self.sos = sos_eos if sos_eos else len(vocab) - 1

if lm_path and os.path.exists(lm_path):
self.lm = Scorer(alpha, beta, lm_path, vocab)
print("Successfully load language model!")
Expand All @@ -125,24 +130,31 @@ def init_ctc_rescore(self, parameters):
)
self.vocabulary = vocab
self.bidecoder = bidecoder
sos = eos = len(vocab) - 1
self.sos = sos
self.eos = eos




def load_vocab(self, vocab_file):
"""
load lang_char.txt
"""
id2vocab = {}
space_id, blank_id, sos_eos = None, None, None
with open(vocab_file, "r", encoding="utf-8") as f:
for line in f:
line = line.strip()
char, id = line.split()
id2vocab[int(id)] = char
if char == " ":
space_id = int(id)
elif char == "<blank>":
blank_id = int(id)
elif char == "<sos/eos>":
sos_eos = int(id)
vocab = [0] * len(id2vocab)
for id, char in id2vocab.items():
vocab[id] = char
return id2vocab, vocab
return (id2vocab, vocab, space_id, blank_id, sos_eos)

def load_hotwords(self, hotwords_file):
"""
Expand Down
2 changes: 1 addition & 1 deletion wenet/bin/export_onnx_gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -1200,7 +1200,7 @@ def export_rescoring_decoder(model, configs, args, logger, decoder_onnx_path,
configs['cmvn_conf'] = {}
else:
assert configs['cmvn'] == "global_cmvn"
assert configs['cmvn']['cmvn_conf'] is not None
assert configs['cmvn_conf'] is not None
configs['cmvn_conf']["cmvn_file"] = args.cmvn_file
if (args.reverse_weight != -1.0
and "reverse_weight" in configs["model_conf"]):
Expand Down
13 changes: 12 additions & 1 deletion wenet/bin/recognize_onnx_gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ def main():
configs = override_config(configs, args.override_config)

reverse_weight = configs["model_conf"].get("reverse_weight", 0.0)
special_tokens=configs.get('tokenizer_conf',{}).get('special_tokens', None)
test_conf = copy.deepcopy(configs['dataset_conf'])
test_conf['filter_conf']['max_length'] = 102400
test_conf['filter_conf']['min_length'] = 0
Expand All @@ -138,6 +139,8 @@ def main():
test_conf['fbank_conf']['dither'] = 0.0
test_conf['batch_conf']['batch_type'] = "static"
test_conf['batch_conf']['batch_size'] = args.batch_size



tokenizer = init_tokenizer(configs)
test_dataset = Dataset(args.data_type,
Expand Down Expand Up @@ -165,13 +168,21 @@ def main():
# Load dict
vocabulary = []
char_dict = {}


with open(args.dict, 'r') as fin:
for line in fin:
arr = line.strip().split()
assert len(arr) == 2
char_dict[int(arr[1])] = arr[0]
vocabulary.append(arr[0])
eos = sos = len(char_dict) - 1

vocab_size = len(char_dict)
sos = (vocab_size - 1 if special_tokens is None else
special_tokens.get("<sos>", vocab_size - 1))
eos = (vocab_size - 1 if special_tokens is None else
special_tokens.get("<eos>", vocab_size - 1))

with torch.no_grad(), open(args.result_file, 'w') as fout:
for _, batch in enumerate(test_data_loader):
keys, feats, _, feats_lengths, _ = batch
Expand Down

0 comments on commit dfa11bb

Please sign in to comment.