Skip to content

Commit

Permalink
refacter: Model updated | 중복 코드 함수화
Browse files Browse the repository at this point in the history
  • Loading branch information
yujin37 committed Nov 4, 2023
1 parent cf95050 commit 4ea323c
Showing 1 changed file with 9 additions and 16 deletions.
25 changes: 9 additions & 16 deletions Model/lsa_Similar.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,27 +7,20 @@
tfidf_vectorizer = TfidfVectorizer()
komoran = Komoran('STABLE')

def preprocess_text(text):
tokens = (komoran.get_plain_text(text)).split(' ')
words = [token.split('/')[0] for token in tokens]
return ' '.join(words)

def lsa_Similar(contents, answer):
test = (komoran.get_plain_text(contents[0])).split(' ')
for j in range(len(test)):
temp = test[j].split('/')
test[j] = temp[0]
#print('여기',test)
test = ' '.join(test)
test2 = (komoran.get_plain_text(answer[0])).split(' ')
for j in range(len(test2)):
temp = test2[j].split('/')
test2[j] = temp[0]
#print('여기',test)
test2 = ' '.join(test2)
tfidf_vectorizer = TfidfVectorizer()
tfidf_matrix = tfidf_vectorizer.fit_transform([test, test2])
contents_preprocessed = preprocess_text(contents[0])
answer_preprocessed = preprocess_text(answer[0])

tfidf_matrix = tfidf_vectorizer.fit_transform([contents_preprocessed, answer_preprocessed])

# LSA를 사용하여 차원 축소
lsa = TruncatedSVD(n_components=2)
lsa_matrix = lsa.fit_transform(tfidf_matrix)

# 문장 간 유사도 계산
similarity_matrix = cosine_similarity(lsa_matrix)

response = {
Expand Down

0 comments on commit 4ea323c

Please sign in to comment.