From b2a9a0e69919b93734993afc91196ddf53c557d4 Mon Sep 17 00:00:00 2001 From: Vanshita Gupta Date: Thu, 9 Nov 2023 02:28:27 -0500 Subject: [PATCH] Fixed pytest for word2vec embedding-using list of lists --- utils/util_modeler.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/utils/util_modeler.py b/utils/util_modeler.py index a5e1f7b..d23da3e 100644 --- a/utils/util_modeler.py +++ b/utils/util_modeler.py @@ -114,15 +114,12 @@ def fit_transform( np.ndarray: Word2Vec embeddings for the input text. """ - embedding = [] - # Initialize an array to store Word2Vec embeddings for the input text words = self.tokenizer.tokenize(text) # Tokenize the document word_vectors = [self.model[word] if word in self.model else np.zeros(self.model.vector_size) for word in words] document_embedding = np.mean(word_vectors, axis=0) # Calculate the mean of word embeddings for the document - embedding.append(document_embedding) - return np.array(embedding) + return document_embedding.tolist() class TPSampler: