diff --git a/django/curator/management/commands/curator_spam_detection.py b/django/curator/management/commands/curator_spam_detection.py index 2552f678a..e3c90903d 100644 --- a/django/curator/management/commands/curator_spam_detection.py +++ b/django/curator/management/commands/curator_spam_detection.py @@ -12,7 +12,7 @@ class Command(BaseCommand): def __init__(self): self.detection = SpamDetector() self.processor = self.detection.processor - self.user_meta_classifier = self.detection.user_metadata_classifier + self.user_meta_classifier = self.detection.usermeta_classifier self.text_classifier = self.detection.text_classifier def add_arguments(self, parser): diff --git a/django/curator/spam_classifiers.py b/django/curator/spam_classifiers.py index b57baf675..f88c36f0f 100644 --- a/django/curator/spam_classifiers.py +++ b/django/curator/spam_classifiers.py @@ -176,6 +176,7 @@ def __init__(self): def fit(self): print("Training TextSpamClassifier...") + model_metrics = None model = Pipeline( [ ("cleaner", FunctionTransformer(self.preprocess)), @@ -187,15 +188,15 @@ def fit(self): all_df = self.processor.get_all_users_df() if all_df.empty: - return None + return model_metrics # = None data_x, data_y = self.concat_pd(all_df) if data_x.empty: - return None + return model_metrics # = None if len(data_y.value_counts()) != 2: print("Cannot create a binary classifier!!") - return None + return model_metrics # = None ( train_x, @@ -218,14 +219,16 @@ def fit(self): def predict(self): print("TextSpamClassifier is making predictions...") + evaluated_user_ids = [] + spam_user_ids = [] df = self.processor.get_unlabelled_by_curator_df() if df.empty: # no-op if no data found - return [] + return evaluated_user_ids, spam_user_ids model = self.load_model(self.MODEL_FILE_PATH) data_x, data_y = self.concat_pd(df) if data_x.empty: - return [] + return evaluated_user_ids, spam_user_ids predictions, confidences = self.get_predictions(model, data_x["text"]) @@ -282,6 +285,7 @@ def __init__(self): def fit(self): print("Training UserMetadataSpamClassifier...") + model_metrics = None model = Pipeline( [ ("cleaner", FunctionTransformer(self.preprocess)), @@ -292,11 +296,11 @@ def fit(self): # obtain df from pipleline df = self.processor.get_all_users_df() if df.empty: - return None # if no untrained data found + return model_metrics # None if no untrained data found if len(df["labelled_by_curator"].value_counts()) != 2: print("Cannot create a binary classifier!!") - return None + return model_metrics feats, targets = self.__input_df_transformation(df) ( @@ -321,9 +325,11 @@ def fit(self): def predict(self): print("UserMetadataSpamClassifier is making predictions...") + evaluated_user_ids = [] + spam_user_ids = [] df = self.processor.get_unlabelled_by_curator_df() if df.empty: # no-op if no data found - return [] + return evaluated_user_ids, spam_user_ids model = self.load_model(self.MODEL_FILE_PATH) @@ -396,6 +402,7 @@ def __input_df_transformation(self, df: pd.DataFrame): ].fillna( "" ) + df.loc[:, ["user_id", "labelled_by_curator"]] = df[ ["user_id", "labelled_by_curator"] ].fillna(0)