diff --git a/notebooks/homomorphic (1).ipynb b/notebooks/homomorphic (1).ipynb
deleted file mode 100644
index aa0b71e..0000000
--- a/notebooks/homomorphic (1).ipynb	
+++ /dev/null
@@ -1,705 +0,0 @@
-{
-  "cells": [
-    {
-      "cell_type": "code",
-      "source": [
-        "# !pip3 install concrete-ml"
-      ],
-      "metadata": {
-        "id": "rQ53bof5WlEL"
-      },
-      "execution_count": 2,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "# !pip3 install nlpaug\n",
-        "# !pip3 install wandb"
-      ],
-      "metadata": {
-        "id": "u2YAx99DY1JG"
-      },
-      "execution_count": 11,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "import gensim.downloader\n",
-        "import nltk\n",
-        "from nltk.tokenize import RegexpTokenizer\n",
-        "from nltk.corpus import stopwords\n",
-        "import string\n",
-        "import wandb\n",
-        "# from torch.utils.data import Sampler\n",
-        "from sklearn.utils.class_weight import compute_sample_weight\n",
-        "from sklearn.base import BaseEstimator, TransformerMixin\n",
-        "from sklearn.utils import shuffle as shuffler\n",
-        "import random\n",
-        "\n",
-        "import nlpaug.augmenter.word as naw"
-      ],
-      "metadata": {
-        "id": "x_hMPWSvXYmc"
-      },
-      "execution_count": 3,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "def get_f1_score(\n",
-        "      y_true: list[int],\n",
-        "      y_pred: list[int],\n",
-        "      average: str = 'weighted'\n",
-        "      ):\n",
-        "      \"\"\"Returns the F1 score.\n",
-        "\n",
-        "      Args:\n",
-        "          y_true (list[int]): The true labels.\n",
-        "          y_pred (list[int]): The predicted labels.\n",
-        "          average (str, optional): The averaging method. Defaults to 'weighted'.\n",
-        "\n",
-        "      Returns:\n",
-        "          float: The F1 score.\n",
-        "      \"\"\"\n",
-        "\n",
-        "      return f1_score(y_true, y_pred, average='weighted')\n",
-        "def calculate_document_embedding(doc, model, tokenizer, embed_size):\n",
-        "      \"\"\"Calculates the document embedding for the given document.\n",
-        "\n",
-        "      Utility function for below class - Word2VecEmbedder\n",
-        "\n",
-        "      Args:\n",
-        "          doc (str): The document.\n",
-        "          model (gensim.models.keyedvectors.Word2VecKeyedVectors): The Word2Vec model.\n",
-        "          tokenizer (nltk.tokenize.regexp.RegexpTokenizer): The tokenizer.\n",
-        "          embed_size (int): The embedding size.\n",
-        "\n",
-        "      Returns:\n",
-        "          np.ndarray: The document embedding.\n",
-        "      \"\"\"\n",
-        "\n",
-        "      doc_embed = np.zeros(embed_size)\n",
-        "      words = tokenizer.tokenize(doc)\n",
-        "      stopset = stopwords.words('english') + list(string.punctuation)\n",
-        "\n",
-        "      #we lowercase the words specifically for OOV embeddings to be same for same words different case\n",
-        "      words = [word.lower() for word in words]\n",
-        "      words = [word for word in words if word not in stopset]\n",
-        "\n",
-        "      word_count = 0\n",
-        "      for word in words:\n",
-        "          if word in model:\n",
-        "              doc_embed += model[word]\n",
-        "              word_count += 1\n",
-        "\n",
-        "      return doc_embed / word_count if word_count != 0 else doc_embed\n",
-        "def evaluate_and_log(\n",
-        "    x: list[str],\n",
-        "    y_true: list[int],\n",
-        "    y_pred: list[int],\n",
-        "    filename: str,\n",
-        "    experiment: wandb = None,\n",
-        "    id: list[str] = None\n",
-        "    ):\n",
-        "      \"\"\"Evaluates the model's performance and logs the results.\n",
-        "\n",
-        "      Args:\n",
-        "          x (list[str]): The texts used for evaluation.\n",
-        "          y_true (list[int]): The actual labels.\n",
-        "          y_pred (list[int]): The predicted labels.\n",
-        "          filename (str): The name of the log file.\n",
-        "      \"\"\"\n",
-        "\n",
-        "      if id is None:\n",
-        "          id = [str(i) for i in range(len(x))]\n",
-        "\n",
-        "      if len(x) != len(y_true) or len(x) != len(y_pred):\n",
-        "          raise ValueError(\"Input lists (x, y_true, and y_pred) must have the same length.\")\n",
-        "\n",
-        "      # Calculate the classification report and confusion matrix\n",
-        "      class_report, conf_matrix = get_classification_report_confusion_matrix(y_true, y_pred)\n",
-        "\n",
-        "      # Find mismatched examples -> indices from y_pred and y_true where they are not the same\n",
-        "      mismatched_indices = np.where(np.array(y_true) != np.array(y_pred))[0]\n",
-        "      mismatched_examples = []\n",
-        "\n",
-        "      if experiment is not None:\n",
-        "          table = wandb.Table(columns=[\"Actual\", \"Predicted\", \"Text\"])\n",
-        "\n",
-        "      for i in mismatched_indices:\n",
-        "          # Format the mismatched example in a code block\n",
-        "          mismatched_example = f\"\\nMail ID: {id[i]}\\nActual: {y_true[i]}\\nPredicted: {y_pred[i]}\\n\\nText: {x[i]}\\n\\n\"\n",
-        "          mismatched_examples.append(mismatched_example)\n",
-        "\n",
-        "          if experiment is not None:\n",
-        "              table.add_data(y_true[i], y_pred[i], x[i])\n",
-        "\n",
-        "      # Format the results for logging\n",
-        "      log_content = f\"---------Classification Report---------\\n{classification_report(y_true, y_pred)}\\n\\n\"\n",
-        "      log_content += f\"---------Confusion Matrix---------\\n{conf_matrix}\\n\\n\"\n",
-        "      log_content += \"---------Mismatched Examples---------\\n\\n\"\n",
-        "      log_content += \"\\n\\n\".join(mismatched_examples)\n",
-        "\n",
-        "      # Log the table\n",
-        "      if experiment is not None:\n",
-        "          wandb.log({\"Mismatched_Examples\": table})\n",
-        "\n",
-        "      # Save the results to the log file\n",
-        "      with open(filename, 'w') as log_file:\n",
-        "          log_file.write(log_content)\n",
-        "\n",
-        "\n",
-        "class Word2VecEmbedder(BaseEstimator, TransformerMixin):\n",
-        "    def __init__(\n",
-        "        self,\n",
-        "        model_name: str = 'word2vec-google-news-300',\n",
-        "        tokenizer=RegexpTokenizer(r'\\w+')\n",
-        "    ):\n",
-        "        self.model = gensim.downloader.load(model_name)\n",
-        "        self.tokenizer = tokenizer\n",
-        "        self.embed_size = 300\n",
-        "\n",
-        "    def fit(\n",
-        "        self,\n",
-        "        X,\n",
-        "        y=None\n",
-        "    ):\n",
-        "        return self\n",
-        "\n",
-        "\n",
-        "    def transform(\n",
-        "        self,\n",
-        "        X\n",
-        "    ):\n",
-        "        \"\"\"Calculate Word2Vec embeddings for the given text.\n",
-        "\n",
-        "        Args:\n",
-        "            X (list): List of text documents.\n",
-        "\n",
-        "        Returns:\n",
-        "            np.ndarray: Word2Vec embeddings for the input text.\n",
-        "        \"\"\"\n",
-        "\n",
-        "        if isinstance(X, str):\n",
-        "            X = [X]\n",
-        "\n",
-        "        return np.vstack([calculate_document_embedding(doc, self.model, self.tokenizer, self.embed_size) for doc in X])\n",
-        "\n",
-        "\n",
-        "class TPSampler:\n",
-        "    def __init__(\n",
-        "        self,\n",
-        "        class_labels,\n",
-        "        tp_ratio=0.1,\n",
-        "        batch_size=32\n",
-        "    ):\n",
-        "        \"\"\"A custom sampler to sample the training data.\n",
-        "\n",
-        "        Args:\n",
-        "            class_labels (list[int]): The class labels of the training data.\n",
-        "            tp_ratio (float, optional): The ratio of true positives to sample. Defaults to 0.1.\n",
-        "            batch_size (int, optional): The batch size. Defaults to 32.\n",
-        "\n",
-        "        Returns:\n",
-        "            iter: The indices of the sampled data.\n",
-        "        \"\"\"\n",
-        "\n",
-        "        self.tp_indices = [i for i, label in enumerate(class_labels) if label == 1]\n",
-        "        self.non_tp_indices = [i for i, label in enumerate(class_labels) if label == 0]\n",
-        "        self.tp_ratio = tp_ratio\n",
-        "        self.batch_size = batch_size\n",
-        "\n",
-        "    def __iter__(self):\n",
-        "        \"\"\"Iterate through the sampled indices.\n",
-        "\n",
-        "        Returns:\n",
-        "            iter: The indices of the sampled data.\n",
-        "        \"\"\"\n",
-        "\n",
-        "        num_samples = len(self.tp_indices)\n",
-        "        tp_batch_size = int(self.tp_ratio * self.batch_size)\n",
-        "        non_tp_batch_size = self.batch_size - tp_batch_size\n",
-        "        sampled_indices = []\n",
-        "\n",
-        "        while len(sampled_indices) < num_samples:\n",
-        "            tp_indices = np.random.choice(self.tp_indices, tp_batch_size, replace=False)\n",
-        "            non_tp_indices = np.random.choice(self.non_tp_indices, non_tp_batch_size, replace=False)\n",
-        "            batch_indices = np.concatenate((tp_indices, non_tp_indices))\n",
-        "            np.random.shuffle(batch_indices)\n",
-        "            sampled_indices.extend(batch_indices)\n",
-        "\n",
-        "        return iter(sampled_indices)\n",
-        "\n",
-        "    def __len__(\n",
-        "        self\n",
-        "    ):\n",
-        "        \"\"\"Returns the total number of samples for the dataloader.\n",
-        "\n",
-        "        Returns:\n",
-        "            int: The total number of samples for the dataloader.\n",
-        "        \"\"\"\n",
-        "\n",
-        "        return len(self.tp_indices)  # This defines the total number of samples for the dataloader\n",
-        "\n",
-        "\n",
-        "class Augmentor:\n",
-        "    def __init__(\n",
-        "        self,\n",
-        "        augmentor = None\n",
-        "    ):\n",
-        "        \"\"\"A custom augmentor to augment the training data.\n",
-        "\n",
-        "        Args:\n",
-        "            augmentor (albumentations.core.composition.Compose): The augmentor to use.\n",
-        "        \"\"\"\n",
-        "\n",
-        "        if augmentor is None:\n",
-        "            augmentor = naw.SynonymAug()\n",
-        "\n",
-        "    def __call__(\n",
-        "        self,\n",
-        "        X,\n",
-        "        y,\n",
-        "        aug_label = 1,\n",
-        "        num_aug_per_label_1 = 10,\n",
-        "        shuffle=True\n",
-        "    ):\n",
-        "        \"\"\"Augment the training data.\n",
-        "\n",
-        "        Args:\n",
-        "            X (list): The input data.\n",
-        "            y (list): The labels.\n",
-        "            aug_label (int, optional): The label to augment. Defaults to 1.\n",
-        "            num_aug_per_label_1 (int, optional): The number of augmentations to apply to the label. Defaults to 10.\n",
-        "            shuffle (bool, optional): Whether to shuffle the data. Defaults to True.\n",
-        "\n",
-        "        Returns:\n",
-        "            tuple: The augmented data and labels.\n",
-        "        \"\"\"\n",
-        "\n",
-        "        if isinstance(X, str):\n",
-        "            X = [X]\n",
-        "        elif isinstance(X, pd.Series):\n",
-        "            X = X.tolist()\n",
-        "\n",
-        "        if isinstance(y, str):\n",
-        "            y = [y]\n",
-        "        elif isinstance(y, pd.Series):\n",
-        "            y = y.tolist()\n",
-        "\n",
-        "        X, y = self.augment_data(X, y, aug_label, num_aug_per_label_1=num_aug_per_label_1)\n",
-        "\n",
-        "        if shuffle:\n",
-        "            X, y = shuffler(X, y, random_state=42)\n",
-        "\n",
-        "        return X, y\n",
-        "\n",
-        "    def augment_data(\n",
-        "        self,\n",
-        "        input_text,\n",
-        "        input_labels,\n",
-        "        aug_label=1,\n",
-        "        num_aug_per_label_1=10\n",
-        "    ):\n",
-        "\n",
-        "        augmented_texts = []\n",
-        "        augmented_labels = []\n",
-        "\n",
-        "        for text, lbl in zip(input_text, input_labels):\n",
-        "            augmented_texts.append(text)\n",
-        "            augmented_labels.append(lbl)\n",
-        "\n",
-        "            # Apply augmentation only to instances with label 1\n",
-        "            if float(lbl) == float(aug_label):\n",
-        "                for _ in range(num_aug_per_label_1):\n",
-        "                    augmented_text = self.apply_augmentation(text)\n",
-        "                    augmented_texts.append(augmented_text)\n",
-        "                    augmented_labels.append(lbl)\n",
-        "\n",
-        "        return augmented_texts, augmented_labels\n",
-        "\n",
-        "    def apply_augmentation(\n",
-        "        self,\n",
-        "        text\n",
-        "    ):\n",
-        "\n",
-        "        # Choose an augmentation technique (you can explore different techniques)\n",
-        "        aug = naw.SynonymAug()\n",
-        "\n",
-        "        # Augment the text\n",
-        "        augmented_text = aug.augment(text)[0]\n",
-        "\n",
-        "        return augmented_text\n",
-        "\n",
-        ""
-      ],
-      "metadata": {
-        "id": "orw9VDGjXPQw"
-      },
-      "execution_count": 4,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "\n",
-        "\n",
-        "from google.colab import drive\n",
-        "drive.mount('/content/drive/', force_remount=True)\n"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "DlZaYdEAYFKL",
-        "outputId": "4f00a84a-dfdc-4611-cd7e-ec33e0388426"
-      },
-      "execution_count": 5,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Mounted at /content/drive/\n"
-          ]
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 6,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "QsT7TK2gWfRc",
-        "outputId": "6928619d-0f12-4bfd-97c5-c0f5a3086058"
-      },
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda'\n"
-          ]
-        }
-      ],
-      "source": [
-        "import pandas as pd\n",
-        "import sys\n",
-        "import numpy as np\n",
-        "from sklearn.model_selection import train_test_split\n",
-        "from sklearn.pipeline import Pipeline\n",
-        "from concrete.ml.sklearn.rf import RandomForestClassifier\n",
-        "# sys.path.append('../')\n",
-        "# from utils.util_modeler import Word2VecEmbedder"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 7,
-      "metadata": {
-        "id": "JVNJyVqUWfRd"
-      },
-      "outputs": [],
-      "source": [
-        "data = pd.read_csv('/content/drive/MyDrive/fraud_detector_data.csv', sep=',' , lineterminator='\\n')"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 15,
-      "metadata": {
-        "id": "Rl5316KsWfRd"
-      },
-      "outputs": [],
-      "source": [
-        "train_data = data[data.Split == 'Train']\n",
-        "sanity_data = data[data.Split == 'Sanity']\n",
-        "gold_fraud_data = data[data.Split == 'Gold Fraud']"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 9,
-      "metadata": {
-        "id": "dcupfyUeWfRd"
-      },
-      "outputs": [],
-      "source": [
-        "# X = train_data.Body\n",
-        "# y = train_data.Label\n",
-        "\n",
-        "# X = X.values\n",
-        "# y = y.values"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 16,
-      "metadata": {
-        "id": "suiVfemxWfRd"
-      },
-      "outputs": [],
-      "source": []
-    },
-    {
-      "cell_type": "code",
-      "source": [],
-      "metadata": {
-        "id": "L6HbnTxT1O3R"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 10,
-      "metadata": {
-        "id": "4732BAuHWfRe"
-      },
-      "outputs": [],
-      "source": [
-        "init_params_rf = {\"max_depth\": 7, \"n_estimators\": 100}\n",
-        "init_params_cml = {\"n_bits\": 3}"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 13,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "nOW_AyIvWfRe",
-        "outputId": "22b296f8-a5c4-4742-866b-d0fb7aa4568c"
-      },
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "[==================================================] 100.0% 1662.8/1662.8MB downloaded\n"
-          ]
-        }
-      ],
-      "source": [
-        "model = Pipeline([\n",
-        "    ('vectorizer', Word2VecEmbedder()),\n",
-        "    ('classifier', RandomForestClassifier(**init_params_rf, **init_params_cml))\n",
-        "])"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "H-fcjO82WfRe"
-      },
-      "outputs": [],
-      "source": [
-        "augmentor = Augmentor()\n",
-        "\n",
-        "train_body, train_labels = augmentor(\n",
-        "    train_data['Body'].tolist(),\n",
-        "    train_data['Label'].tolist(),\n",
-        "    aug_label=1,\n",
-        "    num_aug_per_label_1=9,\n",
-        "    shuffle=True\n",
-        ")\n",
-        "\n",
-        "train_data = pd.DataFrame(\n",
-        "    {\n",
-        "        'Body': train_body,\n",
-        "        'Label': train_labels\n",
-        "    }\n",
-        ")\n",
-        "\n",
-        "train_data.drop_duplicates(subset=['Body'], inplace=True)\n",
-        "train_data.reset_index(drop=True, inplace=True)\n",
-        "\n",
-        "# Call your code that produces output\n",
-        "model.fit(train_data['Body'], train_data['Label'])\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "!pip3 install mlflow\n",
-        "import os\n",
-        "from mlflow.sklearn import save_model\n",
-        "save_model(model,'/content/drive/MyDrive/')"
-      ],
-      "metadata": {
-        "id": "uMgQvxnC9WIJ"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "WANDB_API_KEY=\"f2341bb05edcb6d412894698013186646aefa0e2\"\n",
-        "WANDB_PROJECT=\"Fraud-Detector\"\n",
-        "WANDB_ENTITY=\"regressors\"\n",
-        "\n",
-        "wandbdict = {\n",
-        "    'key': WANDB_API_KEY,\n",
-        "    'entity': WANDB_PROJECT,\n",
-        "    'project': WANDB_ENTITY,\n",
-        "}\n",
-        "wandb.login(key=wandbdict['key'])\n",
-        "run = wandb.init(project=wandbdict['project'], entity=wandbdict['entity'])"
-      ],
-      "metadata": {
-        "id": "6mKt3tUE-U5q"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "f1_scores = {}\n",
-        "os.makedirs('/content/drive/MyDrive/logs')\n",
-        "save_path='/content/drive/MyDrive/'\n"
-      ],
-      "metadata": {
-        "id": "D_c9C2Mq3Qld"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "source": [],
-      "metadata": {
-        "id": "pH2qWaNw6dxB"
-      }
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 31,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 1000
-        },
-        "id": "hngi7nsOWfRe",
-        "outputId": "cc909b03-3ffe-407c-82b0-58fa63cf22ec"
-      },
-      "outputs": [
-        {
-          "output_type": "error",
-          "ename": "LookupError",
-          "evalue": "ignored",
-          "traceback": [
-            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-            "\u001b[0;31mLookupError\u001b[0m                               Traceback (most recent call last)",
-            "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/nltk/corpus/util.py\u001b[0m in \u001b[0;36m__load\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m     83\u001b[0m                 \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 84\u001b[0;31m                     \u001b[0mroot\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnltk\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfind\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"{self.subdir}/{zip_name}\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     85\u001b[0m                 \u001b[0;32mexcept\u001b[0m \u001b[0mLookupError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/nltk/data.py\u001b[0m in \u001b[0;36mfind\u001b[0;34m(resource_name, paths)\u001b[0m\n\u001b[1;32m    582\u001b[0m     \u001b[0mresource_not_found\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34mf\"\\n{sep}\\n{msg}\\n{sep}\\n\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 583\u001b[0;31m     \u001b[0;32mraise\u001b[0m \u001b[0mLookupError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresource_not_found\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    584\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;31mLookupError\u001b[0m: \n**********************************************************************\n  Resource \u001b[93mstopwords\u001b[0m not found.\n  Please use the NLTK Downloader to obtain the resource:\n\n  \u001b[31m>>> import nltk\n  >>> nltk.download('stopwords')\n  \u001b[0m\n  For more information see: https://www.nltk.org/data.html\n\n  Attempted to load \u001b[93mcorpora/stopwords.zip/stopwords/\u001b[0m\n\n  Searched in:\n    - '/root/nltk_data'\n    - '/usr/nltk_data'\n    - '/usr/share/nltk_data'\n    - '/usr/lib/nltk_data'\n    - '/usr/share/nltk_data'\n    - '/usr/local/share/nltk_data'\n    - '/usr/lib/nltk_data'\n    - '/usr/local/lib/nltk_data'\n**********************************************************************\n",
-            "\nDuring handling of the above exception, another exception occurred:\n",
-            "\u001b[0;31mLookupError\u001b[0m                               Traceback (most recent call last)",
-            "\u001b[0;32m<ipython-input-31-d768f88d541e>\u001b[0m in \u001b[0;36m<cell line: 1>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
-            "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/sklearn/pipeline.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y, **fit_params)\u001b[0m\n\u001b[1;32m    376\u001b[0m         \"\"\"\n\u001b[1;32m    377\u001b[0m         \u001b[0mfit_params_steps\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_check_fit_params\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0mfit_params\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 378\u001b[0;31m         \u001b[0mXt\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_fit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mfit_params_steps\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    379\u001b[0m         \u001b[0;32mwith\u001b[0m \u001b[0m_print_elapsed_time\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Pipeline\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_log_message\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msteps\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    380\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_final_estimator\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0;34m\"passthrough\"\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/sklearn/pipeline.py\u001b[0m in \u001b[0;36m_fit\u001b[0;34m(self, X, y, **fit_params_steps)\u001b[0m\n\u001b[1;32m    334\u001b[0m                 \u001b[0mcloned_transformer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mclone\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtransformer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    335\u001b[0m             \u001b[0;31m# Fit or load from cache the current transformer\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 336\u001b[0;31m             X, fitted_transformer = fit_transform_one_cached(\n\u001b[0m\u001b[1;32m    337\u001b[0m                 \u001b[0mcloned_transformer\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    338\u001b[0m                 \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/joblib/memory.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m    351\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    352\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m__call__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 353\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    354\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    355\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mcall_and_shelve\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/sklearn/pipeline.py\u001b[0m in \u001b[0;36m_fit_transform_one\u001b[0;34m(transformer, X, y, weight, message_clsname, message, **fit_params)\u001b[0m\n\u001b[1;32m    868\u001b[0m     \u001b[0;32mwith\u001b[0m \u001b[0m_print_elapsed_time\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmessage_clsname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmessage\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    869\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mhasattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtransformer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"fit_transform\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 870\u001b[0;31m             \u001b[0mres\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtransformer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit_transform\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mfit_params\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    871\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    872\u001b[0m             \u001b[0mres\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtransformer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mfit_params\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtransform\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/sklearn/base.py\u001b[0m in \u001b[0;36mfit_transform\u001b[0;34m(self, X, y, **fit_params)\u001b[0m\n\u001b[1;32m    868\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    869\u001b[0m             \u001b[0;31m# fit method of arity 2 (supervised transformation)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 870\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mfit_params\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtransform\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    871\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    872\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32m<ipython-input-12-7bc8450ded03>\u001b[0m in \u001b[0;36mtransform\u001b[0;34m(self, X)\u001b[0m\n\u001b[1;32m     32\u001b[0m             \u001b[0mX\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     33\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 34\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvstack\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcalculate_document_embedding\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdoc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtokenizer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0membed_size\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mdoc\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     35\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     36\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32m<ipython-input-12-7bc8450ded03>\u001b[0m in \u001b[0;36m<listcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m     32\u001b[0m             \u001b[0mX\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     33\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 34\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvstack\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcalculate_document_embedding\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdoc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtokenizer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0membed_size\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mdoc\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     35\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     36\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32m<ipython-input-30-cde5269a9276>\u001b[0m in \u001b[0;36mcalculate_document_embedding\u001b[0;34m(doc, model, tokenizer, embed_size)\u001b[0m\n\u001b[1;32m     16\u001b[0m       \u001b[0mdoc_embed\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mzeros\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0membed_size\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     17\u001b[0m       \u001b[0mwords\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtokenizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtokenize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdoc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 18\u001b[0;31m       \u001b[0mstopset\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mstopwords\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwords\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'english'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstring\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpunctuation\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     19\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     20\u001b[0m       \u001b[0;31m#we lowercase the words specifically for OOV embeddings to be same for same words different case\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/nltk/corpus/util.py\u001b[0m in \u001b[0;36m__getattr__\u001b[0;34m(self, attr)\u001b[0m\n\u001b[1;32m    119\u001b[0m             \u001b[0;32mraise\u001b[0m \u001b[0mAttributeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"LazyCorpusLoader object has no attribute '__bases__'\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    120\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 121\u001b[0;31m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__load\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    122\u001b[0m         \u001b[0;31m# This looks circular, but its not, since __load() changes our\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    123\u001b[0m         \u001b[0;31m# __class__ to something new:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/nltk/corpus/util.py\u001b[0m in \u001b[0;36m__load\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m     84\u001b[0m                     \u001b[0mroot\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnltk\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfind\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"{self.subdir}/{zip_name}\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     85\u001b[0m                 \u001b[0;32mexcept\u001b[0m \u001b[0mLookupError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 86\u001b[0;31m                     \u001b[0;32mraise\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     87\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     88\u001b[0m         \u001b[0;31m# Load the corpus.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/nltk/corpus/util.py\u001b[0m in \u001b[0;36m__load\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m     79\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     80\u001b[0m             \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 81\u001b[0;31m                 \u001b[0mroot\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnltk\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfind\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"{self.subdir}/{self.__name}\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     82\u001b[0m             \u001b[0;32mexcept\u001b[0m \u001b[0mLookupError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     83\u001b[0m                 \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/nltk/data.py\u001b[0m in \u001b[0;36mfind\u001b[0;34m(resource_name, paths)\u001b[0m\n\u001b[1;32m    581\u001b[0m     \u001b[0msep\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"*\"\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0;36m70\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    582\u001b[0m     \u001b[0mresource_not_found\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34mf\"\\n{sep}\\n{msg}\\n{sep}\\n\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 583\u001b[0;31m     \u001b[0;32mraise\u001b[0m \u001b[0mLookupError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresource_not_found\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    584\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    585\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
-            "\u001b[0;31mLookupError\u001b[0m: \n**********************************************************************\n  Resource \u001b[93mstopwords\u001b[0m not found.\n  Please use the NLTK Downloader to obtain the resource:\n\n  \u001b[31m>>> import nltk\n  >>> nltk.download('stopwords')\n  \u001b[0m\n  For more information see: https://www.nltk.org/data.html\n\n  Attempted to load \u001b[93mcorpora/stopwords\u001b[0m\n\n  Searched in:\n    - '/root/nltk_data'\n    - '/usr/nltk_data'\n    - '/usr/share/nltk_data'\n    - '/usr/lib/nltk_data'\n    - '/usr/share/nltk_data'\n    - '/usr/local/share/nltk_data'\n    - '/usr/lib/nltk_data'\n    - '/usr/local/lib/nltk_data'\n**********************************************************************\n"
-          ]
-        }
-      ],
-      "source": [
-        "train_data['Prediction'] = model.predict(body=train_data['Body'])\n",
-        "evaluate_and_log(x=train_data['Body'].tolist(), y_true=train_data['Label'].tolist(), y_pred=train_data['Prediction'].tolist(), filename=os.path.join(save_path,'logs/train.log'), experiment=run, id = train_data['Mail-ID'].tolist())\n",
-        "f1_scores['train'] = get_f1_score(y_true=train_data['Label'].tolist(), y_pred=train_data['Prediction'].tolist())"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "sanity_data['Prediction'] = model.predict(body=sanity_data['Body'])\n",
-        "evaluate_and_log(x=sanity_data['Body'].tolist(), y_true=sanity_data['Label'].tolist(), y_pred=sanity_data['Prediction'].tolist(), filename=os.path.join(save_path,'logs/sanity.log'), experiment=run, id = sanity_data['Mail-ID'].tolist())\n",
-        "f1_scores['sanity'] = get_f1_score(y_true=sanity_data['Label'].tolist(), y_pred=sanity_data['Prediction'].tolist())\n",
-        "\n"
-      ],
-      "metadata": {
-        "id": "zCaQWTjf_UQ8"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "MOXRQFjZWfRe"
-      },
-      "outputs": [],
-      "source": [
-        "gold_fraud_data['Prediction'] = model.predict(body=gold_fraud_data['Body'])\n",
-        "evaluate_and_log(x=gold_fraud_data['Body'].tolist(), y_true=gold_fraud_data['Label'].tolist(), y_pred=gold_fraud_data['Prediction'].tolist(), filename=os.path.join(save_path,'logs/gold_fraud.log'), experiment=run, id = gold_fraud_data['Mail-ID'].tolist())\n",
-        "f1_scores['gold_fraud'] = get_f1_score(y_true=gold_fraud_data['Label'].tolist(), y_pred=gold_fraud_data['Prediction'].tolist())\n",
-        "\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "#save mismatch data into a csv file\n",
-        "mismatch_data = pd.concat(\n",
-        "    [\n",
-        "        train_data[train_data['Prediction'] != train_data['Label']],\n",
-        "        sanity_data[sanity_data['Prediction'] != sanity_data['Label']],\n",
-        "        gold_fraud_data[gold_fraud_data['Prediction'] != gold_fraud_data['Label']]\n",
-        "    ],\n",
-        "    axis=0,\n",
-        "    ignore_index=True\n",
-        ")\n",
-        "\n",
-        "mismatch_data.to_csv(os.path.join(save_path,'logs/mismatch_data.csv'), index=False)"
-      ],
-      "metadata": {
-        "id": "G4s7RYRV_a9p"
-      },
-      "execution_count": null,
-      "outputs": []
-    }
-  ],
-  "metadata": {
-    "kernelspec": {
-      "display_name": "Fraud-Detector-env",
-      "language": "python",
-      "name": "python3"
-    },
-    "language_info": {
-      "codemirror_mode": {
-        "name": "ipython",
-        "version": 3
-      },
-      "file_extension": ".py",
-      "mimetype": "text/x-python",
-      "name": "python",
-      "nbconvert_exporter": "python",
-      "pygments_lexer": "ipython3",
-      "version": "3.9.12"
-    },
-    "colab": {
-      "provenance": []
-    }
-  },
-  "nbformat": 4,
-  "nbformat_minor": 0
-}
\ No newline at end of file