diff --git a/ch06/01_main-chapter-code/ch06.ipynb b/ch06/01_main-chapter-code/ch06.ipynb index 856d6182..9299460e 100644 --- a/ch06/01_main-chapter-code/ch06.ipynb +++ b/ch06/01_main-chapter-code/ch06.ipynb @@ -79,6 +79,28 @@ "" ] }, + { + "cell_type": "code", + "execution_count": 2, + "id": "946c3e56-b04b-4b0f-b35f-b485ce5b28df", + "metadata": {}, + "outputs": [], + "source": [ + "# Utility to prevent certain cells from being executed twice\n", + "\n", + "from IPython.core.magic import register_line_cell_magic\n", + "\n", + "executed_cells = set()\n", + "\n", + "@register_line_cell_magic\n", + "def run_once(line, cell):\n", + " if line not in executed_cells:\n", + " get_ipython().run_cell(cell)\n", + " executed_cells.add(line)\n", + " else:\n", + " print(f\"Cell '{line}' has already been executed.\")" + ] + }, { "cell_type": "markdown", "id": "3a84cf35-b37f-4c15-8972-dfafc9fadc1c", @@ -167,7 +189,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "id": "def7c09b-af9c-4216-90ce-5e67aed1065c", "metadata": { "colab": { @@ -181,7 +203,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "File downloaded and saved as sms_spam_collection/SMSSpamCollection.tsv\n" + "sms_spam_collection/SMSSpamCollection.tsv already exists. Skipping download and extraction.\n" ] } ], @@ -230,7 +252,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "id": "da0ed4da-ac31-4e4d-8bdd-2153be4656a4", "metadata": { "colab": { @@ -344,7 +366,7 @@ "[5572 rows x 2 columns]" ] }, - "execution_count": 3, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -368,7 +390,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "id": "495a5280-9d7c-41d4-9719-64ab99056d4c", "metadata": { "colab": { @@ -406,7 +428,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "id": "7be4a0a2-9704-4a96-b38f-240339818688", "metadata": { "colab": { @@ -428,6 +450,9 @@ } ], "source": [ + "%%run_once balance_df\n", + "\n", + "\n", "def create_balanced_dataset(df):\n", " \n", " # Count the instances of \"spam\"\n", @@ -441,6 +466,7 @@ "\n", " return balanced_df\n", "\n", + "\n", "balanced_df = create_balanced_dataset(df)\n", "print(balanced_df[\"Label\"].value_counts())" ] @@ -457,14 +483,133 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "id": "c1b10c3d-5d57-42d0-8de8-cf80a06f5ffd", "metadata": { "id": "c1b10c3d-5d57-42d0-8de8-cf80a06f5ffd" }, "outputs": [], "source": [ - "balanced_df[\"Label\"] = balanced_df[\"Label\"].map({\"ham\": 0, \"spam\": 1})" + "%%run_once label_mapping\n", + "balanced_df[\"Label\"] = balanced_df[\"Label\"].map({\"ham\": 0, \"spam\": 1}) " + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "e6f7f062-ef4e-4020-8275-71990cab4414", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
LabelText
43070Awww dat is sweet! We can think of something t...
41380Just got to <#>
48310The word \"Checkmate\" in chess comes from the P...
44610This is wishing you a great day. Moji told me ...
54400Thank you. do you generally date the brothas?
.........
55371Want explicit SEX in 30 secs? Ring 02073162414...
55401ASKED 3MOBILE IF 0870 CHATLINES INCLU IN FREE ...
55471Had your contract mobile 11 Mnths? Latest Moto...
55661REMINDER FROM O2: To get 2.50 pounds free call...
55671This is the 2nd time we have tried 2 contact u...
\n", + "

1494 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " Label Text\n", + "4307 0 Awww dat is sweet! We can think of something t...\n", + "4138 0 Just got to <#>\n", + "4831 0 The word \"Checkmate\" in chess comes from the P...\n", + "4461 0 This is wishing you a great day. Moji told me ...\n", + "5440 0 Thank you. do you generally date the brothas?\n", + "... ... ...\n", + "5537 1 Want explicit SEX in 30 secs? Ring 02073162414...\n", + "5540 1 ASKED 3MOBILE IF 0870 CHATLINES INCLU IN FREE ...\n", + "5547 1 Had your contract mobile 11 Mnths? Latest Moto...\n", + "5566 1 REMINDER FROM O2: To get 2.50 pounds free call...\n", + "5567 1 This is the 2nd time we have tried 2 contact u...\n", + "\n", + "[1494 rows x 2 columns]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "balanced_df" ] }, {