forked from mhezarei/ai-bot
-
Notifications
You must be signed in to change notification settings - Fork 0
/
pipeline_sentence.py
28 lines (22 loc) · 1.02 KB
/
pipeline_sentence.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
from hazm import word_tokenize, Lemmatizer, Normalizer
from transformers import pipeline
from capitals import capital_dictionary_keys, country_to_capital
def pipeline_sentence(sentence, model, tokenizer):
sentence = change_words(sentence)
normalizer = Normalizer()
sentence = normalizer.normalize(sentence)
sentence_lem = ' '.join([Lemmatizer().lemmatize(x) for x in
word_tokenize(normalizer.normalize(sentence))])
nlp = pipeline("ner", model=model, tokenizer=tokenizer)
sentence_ner = nlp(sentence)
sentence_ner_lem = nlp(sentence_lem)
return sentence_ner, sentence_ner_lem, sentence_lem, sentence
def change_words(sentence):
if "پایتخت" in sentence:
for key in capital_dictionary_keys():
if key in sentence:
sentence = sentence.replace(key, country_to_capital(key))
if " دبی " in sentence:
if "شهر دبی" not in sentence:
sentence = sentence.replace("دبی", "شهر دبی")
return sentence