From 33eb1e9045f40f5a1e518d0a7e93e02c0835e040 Mon Sep 17 00:00:00 2001 From: Advaith Rao Date: Tue, 7 Nov 2023 23:57:14 -0500 Subject: [PATCH] Final fix for word2vec + svm input formatting --- utils/util_modeler.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/utils/util_modeler.py b/utils/util_modeler.py index 7825910..a5e1f7b 100644 --- a/utils/util_modeler.py +++ b/utils/util_modeler.py @@ -114,12 +114,15 @@ def fit_transform( np.ndarray: Word2Vec embeddings for the input text. """ + embedding = [] + # Initialize an array to store Word2Vec embeddings for the input text words = self.tokenizer.tokenize(text) # Tokenize the document word_vectors = [self.model[word] if word in self.model else np.zeros(self.model.vector_size) for word in words] document_embedding = np.mean(word_vectors, axis=0) # Calculate the mean of word embeddings for the document + embedding.append(document_embedding) - return document_embedding + return np.array(embedding) class TPSampler: