diff --git a/ch06/01_main-chapter-code/ch06.ipynb b/ch06/01_main-chapter-code/ch06.ipynb index 856d6182..9299460e 100644 --- a/ch06/01_main-chapter-code/ch06.ipynb +++ b/ch06/01_main-chapter-code/ch06.ipynb @@ -79,6 +79,28 @@ "" ] }, + { + "cell_type": "code", + "execution_count": 2, + "id": "946c3e56-b04b-4b0f-b35f-b485ce5b28df", + "metadata": {}, + "outputs": [], + "source": [ + "# Utility to prevent certain cells from being executed twice\n", + "\n", + "from IPython.core.magic import register_line_cell_magic\n", + "\n", + "executed_cells = set()\n", + "\n", + "@register_line_cell_magic\n", + "def run_once(line, cell):\n", + " if line not in executed_cells:\n", + " get_ipython().run_cell(cell)\n", + " executed_cells.add(line)\n", + " else:\n", + " print(f\"Cell '{line}' has already been executed.\")" + ] + }, { "cell_type": "markdown", "id": "3a84cf35-b37f-4c15-8972-dfafc9fadc1c", @@ -167,7 +189,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "id": "def7c09b-af9c-4216-90ce-5e67aed1065c", "metadata": { "colab": { @@ -181,7 +203,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "File downloaded and saved as sms_spam_collection/SMSSpamCollection.tsv\n" + "sms_spam_collection/SMSSpamCollection.tsv already exists. Skipping download and extraction.\n" ] } ], @@ -230,7 +252,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "id": "da0ed4da-ac31-4e4d-8bdd-2153be4656a4", "metadata": { "colab": { @@ -344,7 +366,7 @@ "[5572 rows x 2 columns]" ] }, - "execution_count": 3, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -368,7 +390,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "id": "495a5280-9d7c-41d4-9719-64ab99056d4c", "metadata": { "colab": { @@ -406,7 +428,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "id": "7be4a0a2-9704-4a96-b38f-240339818688", "metadata": { "colab": { @@ -428,6 +450,9 @@ } ], "source": [ + "%%run_once balance_df\n", + "\n", + "\n", "def create_balanced_dataset(df):\n", " \n", " # Count the instances of \"spam\"\n", @@ -441,6 +466,7 @@ "\n", " return balanced_df\n", "\n", + "\n", "balanced_df = create_balanced_dataset(df)\n", "print(balanced_df[\"Label\"].value_counts())" ] @@ -457,14 +483,133 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "id": "c1b10c3d-5d57-42d0-8de8-cf80a06f5ffd", "metadata": { "id": "c1b10c3d-5d57-42d0-8de8-cf80a06f5ffd" }, "outputs": [], "source": [ - "balanced_df[\"Label\"] = balanced_df[\"Label\"].map({\"ham\": 0, \"spam\": 1})" + "%%run_once label_mapping\n", + "balanced_df[\"Label\"] = balanced_df[\"Label\"].map({\"ham\": 0, \"spam\": 1}) " + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "e6f7f062-ef4e-4020-8275-71990cab4414", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " | Label | \n", + "Text | \n", + "
---|---|---|
4307 | \n", + "0 | \n", + "Awww dat is sweet! We can think of something t... | \n", + "
4138 | \n", + "0 | \n", + "Just got to <#> | \n", + "
4831 | \n", + "0 | \n", + "The word \"Checkmate\" in chess comes from the P... | \n", + "
4461 | \n", + "0 | \n", + "This is wishing you a great day. Moji told me ... | \n", + "
5440 | \n", + "0 | \n", + "Thank you. do you generally date the brothas? | \n", + "
... | \n", + "... | \n", + "... | \n", + "
5537 | \n", + "1 | \n", + "Want explicit SEX in 30 secs? Ring 02073162414... | \n", + "
5540 | \n", + "1 | \n", + "ASKED 3MOBILE IF 0870 CHATLINES INCLU IN FREE ... | \n", + "
5547 | \n", + "1 | \n", + "Had your contract mobile 11 Mnths? Latest Moto... | \n", + "
5566 | \n", + "1 | \n", + "REMINDER FROM O2: To get 2.50 pounds free call... | \n", + "
5567 | \n", + "1 | \n", + "This is the 2nd time we have tried 2 contact u... | \n", + "
1494 rows × 2 columns
\n", + "