From bc3c492d3f8a8a44c14fd2d090d149c146a4ec97 Mon Sep 17 00:00:00 2001 From: Aurelien Geron Date: Mon, 21 Jan 2019 18:13:10 +0800 Subject: [PATCH] Create image directory and check for sklearn >= 0.20 and TensorFlow >= 2.0-preview --- 06_decision_trees.ipynb | 317 +++++++++++++++--- 07_ensemble_learning_and_random_forests.ipynb | 33 +- 08_dimensionality_reduction.ipynb | 31 +- 09_unsupervised_learning.ipynb | 14 +- 10_neural_nets_with_keras.ipynb | 18 +- 5 files changed, 313 insertions(+), 100 deletions(-) diff --git a/06_decision_trees.ipynb b/06_decision_trees.ipynb index c94e9b5a9..f84774dee 100644 --- a/06_decision_trees.ipynb +++ b/06_decision_trees.ipynb @@ -25,7 +25,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "First, let's import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures. We also check that Python 3.5 or later is installed (although Python 2.x may work, it is deprecated so we strongly recommend you use Python 3 instead)." + "First, let's import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures. We also check that Python 3.5 or later is installed (although Python 2.x may work, it is deprecated so we strongly recommend you use Python 3 instead), as well as Scikit-Learn ≥0.20." ] }, { @@ -38,6 +38,10 @@ "import sys\n", "assert sys.version_info >= (3, 5)\n", "\n", + "# Scikit-Learn ≥0.20 is required\n", + "import sklearn\n", + "assert sklearn.__version__ >= \"0.20\"\n", + "\n", "# Common imports\n", "import numpy as np\n", "import os\n", @@ -56,32 +60,15 @@ "# Where to save the figures\n", "PROJECT_ROOT_DIR = \".\"\n", "CHAPTER_ID = \"decision_trees\"\n", + "IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID)\n", + "os.makedirs(IMAGES_PATH, exist_ok=True)\n", "\n", - "def image_path(fig_id):\n", - " return os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID, fig_id)\n", - "\n", - "def save_fig(fig_id, tight_layout=True):\n", + "def save_fig(fig_id, tight_layout=True, fig_extension=\"png\", resolution=300):\n", + " path = os.path.join(IMAGES_PATH, fig_id + \".\" + fig_extension)\n", " print(\"Saving figure\", fig_id)\n", " if tight_layout:\n", " plt.tight_layout()\n", - " plt.savefig(image_path(fig_id) + \".png\", format='png', dpi=300)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This notebook assumes you have installed Scikit-Learn ≥0.20." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "import sklearn\n", - "assert sklearn.__version__ >= \"0.20\"" + " plt.savefig(path, format=fig_extension, dpi=resolution)" ] }, { @@ -93,7 +80,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -107,7 +94,7 @@ " splitter='best')" ] }, - "execution_count": 3, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -126,25 +113,127 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Tree\n", + "\n", + "\n", + "\n", + "0\n", + "\n", + "petal length (cm) <= 2.45\n", + "gini = 0.667\n", + "samples = 150\n", + "value = [50, 50, 50]\n", + "class = setosa\n", + "\n", + "\n", + "\n", + "1\n", + "\n", + "gini = 0.0\n", + "samples = 50\n", + "value = [50, 0, 0]\n", + "class = setosa\n", + "\n", + "\n", + "\n", + "0->1\n", + "\n", + "\n", + "True\n", + "\n", + "\n", + "\n", + "2\n", + "\n", + "petal width (cm) <= 1.75\n", + "gini = 0.5\n", + "samples = 100\n", + "value = [0, 50, 50]\n", + "class = versicolor\n", + "\n", + "\n", + "\n", + "0->2\n", + "\n", + "\n", + "False\n", + "\n", + "\n", + "\n", + "3\n", + "\n", + "gini = 0.168\n", + "samples = 54\n", + "value = [0, 49, 5]\n", + "class = versicolor\n", + "\n", + "\n", + "\n", + "2->3\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "4\n", + "\n", + "gini = 0.043\n", + "samples = 46\n", + "value = [0, 1, 45]\n", + "class = virginica\n", + "\n", + "\n", + "\n", + "2->4\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ + "from graphviz import Source\n", "from sklearn.tree import export_graphviz\n", "\n", "export_graphviz(\n", " tree_clf,\n", - " out_file=image_path(\"iris_tree.dot\"),\n", + " out_file=os.path.join(IMAGES_PATH, \"iris_tree.dot\"),\n", " feature_names=iris.feature_names[2:],\n", " class_names=iris.target_names,\n", " rounded=True,\n", " filled=True\n", - " )" + " )\n", + "\n", + "Source.from_file(os.path.join(IMAGES_PATH, \"iris_tree.dot\"))" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -218,7 +307,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -227,7 +316,7 @@ "array([[0. , 0.90740741, 0.09259259]])" ] }, - "execution_count": 6, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -238,7 +327,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -247,7 +336,7 @@ "array([1])" ] }, - "execution_count": 7, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -265,7 +354,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -274,7 +363,7 @@ "array([[4.8, 1.8]])" ] }, - "execution_count": 8, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -285,7 +374,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -299,7 +388,7 @@ " splitter='best')" ] }, - "execution_count": 9, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -315,7 +404,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -352,7 +441,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -398,7 +487,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -430,7 +519,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -486,7 +575,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -500,7 +589,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -513,7 +602,7 @@ " presort=False, random_state=42, splitter='best')" ] }, - "execution_count": 15, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -527,7 +616,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -594,19 +683,151 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "export_graphviz(\n", " tree_reg1,\n", - " out_file=image_path(\"regression_tree.dot\"),\n", + " out_file=os.path.join(IMAGES_PATH, \"regression_tree.dot\"),\n", " feature_names=[\"x1\"],\n", " rounded=True,\n", " filled=True\n", " )" ] }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Tree\n", + "\n", + "\n", + "\n", + "0\n", + "\n", + "x1 <= 0.197\n", + "mse = 0.098\n", + "samples = 200\n", + "value = 0.354\n", + "\n", + "\n", + "\n", + "1\n", + "\n", + "x1 <= 0.092\n", + "mse = 0.038\n", + "samples = 44\n", + "value = 0.689\n", + "\n", + "\n", + "\n", + "0->1\n", + "\n", + "\n", + "True\n", + "\n", + "\n", + "\n", + "4\n", + "\n", + "x1 <= 0.772\n", + "mse = 0.074\n", + "samples = 156\n", + "value = 0.259\n", + "\n", + "\n", + "\n", + "0->4\n", + "\n", + "\n", + "False\n", + "\n", + "\n", + "\n", + "2\n", + "\n", + "mse = 0.018\n", + "samples = 20\n", + "value = 0.854\n", + "\n", + "\n", + "\n", + "1->2\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "3\n", + "\n", + "mse = 0.013\n", + "samples = 24\n", + "value = 0.552\n", + "\n", + "\n", + "\n", + "1->3\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "5\n", + "\n", + "mse = 0.015\n", + "samples = 110\n", + "value = 0.111\n", + "\n", + "\n", + "\n", + "4->5\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "6\n", + "\n", + "mse = 0.036\n", + "samples = 46\n", + "value = 0.615\n", + "\n", + "\n", + "\n", + "4->6\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Source.from_file(os.path.join(IMAGES_PATH, \"regression_tree.dot\"))" + ] + }, { "cell_type": "code", "execution_count": 18, diff --git a/07_ensemble_learning_and_random_forests.ipynb b/07_ensemble_learning_and_random_forests.ipynb index 717a34fd1..afc2709b2 100644 --- a/07_ensemble_learning_and_random_forests.ipynb +++ b/07_ensemble_learning_and_random_forests.ipynb @@ -25,7 +25,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "First, let's import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures. We also check that Python 3.5 or later is installed (although Python 2.x may work, it is deprecated so we strongly recommend you use Python 3 instead)." + "First, let's import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures. We also check that Python 3.5 or later is installed (although Python 2.x may work, it is deprecated so we strongly recommend you use Python 3 instead), as well as Scikit-Learn ≥0.20." ] }, { @@ -38,6 +38,10 @@ "import sys\n", "assert sys.version_info >= (3, 5)\n", "\n", + "# Scikit-Learn ≥0.20 is required\n", + "import sklearn\n", + "assert sklearn.__version__ >= \"0.20\"\n", + "\n", "# Common imports\n", "import numpy as np\n", "import os\n", @@ -56,32 +60,15 @@ "# Where to save the figures\n", "PROJECT_ROOT_DIR = \".\"\n", "CHAPTER_ID = \"ensembles\"\n", + "IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID)\n", + "os.makedirs(IMAGES_PATH, exist_ok=True)\n", "\n", - "def image_path(fig_id):\n", - " return os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID, fig_id)\n", - "\n", - "def save_fig(fig_id, tight_layout=True):\n", + "def save_fig(fig_id, tight_layout=True, fig_extension=\"png\", resolution=300):\n", + " path = os.path.join(IMAGES_PATH, fig_id + \".\" + fig_extension)\n", " print(\"Saving figure\", fig_id)\n", " if tight_layout:\n", " plt.tight_layout()\n", - " plt.savefig(image_path(fig_id) + \".png\", format='png', dpi=300)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This notebook assumes you have installed Scikit-Learn ≥0.20." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "import sklearn\n", - "assert sklearn.__version__ >= \"0.20\"" + " plt.savefig(path, format=fig_extension, dpi=resolution)" ] }, { diff --git a/08_dimensionality_reduction.ipynb b/08_dimensionality_reduction.ipynb index 6251a1ff5..578feaa3d 100644 --- a/08_dimensionality_reduction.ipynb +++ b/08_dimensionality_reduction.ipynb @@ -20,7 +20,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "First, let's import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures. We also check that Python 3.5 or later is installed (although Python 2.x may work, it is deprecated so we strongly recommend you use Python 3 instead)." + "First, let's import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures. We also check that Python 3.5 or later is installed (although Python 2.x may work, it is deprecated so we strongly recommend you use Python 3 instead), as well as Scikit-Learn ≥0.20." ] }, { @@ -33,6 +33,10 @@ "import sys\n", "assert sys.version_info >= (3, 5)\n", "\n", + "# Scikit-Learn ≥0.20 is required\n", + "import sklearn\n", + "assert sklearn.__version__ >= \"0.20\"\n", + "\n", "# Common imports\n", "import numpy as np\n", "import os\n", @@ -51,36 +55,21 @@ "# Where to save the figures\n", "PROJECT_ROOT_DIR = \".\"\n", "CHAPTER_ID = \"dim_reduction\"\n", + "IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID)\n", + "os.makedirs(IMAGES_PATH, exist_ok=True)\n", "\n", - "def save_fig(fig_id, tight_layout=True):\n", - " path = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID, fig_id + \".png\")\n", + "def save_fig(fig_id, tight_layout=True, fig_extension=\"png\", resolution=300):\n", + " path = os.path.join(IMAGES_PATH, fig_id + \".\" + fig_extension)\n", " print(\"Saving figure\", fig_id)\n", " if tight_layout:\n", " plt.tight_layout()\n", - " plt.savefig(path, format='png', dpi=300)\n", + " plt.savefig(path, format=fig_extension, dpi=resolution)\n", "\n", "# Ignore useless warnings (see SciPy issue #5998)\n", "import warnings\n", "warnings.filterwarnings(action=\"ignore\", message=\"^internal gelsd\")" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This notebook assumes you have installed Scikit-Learn ≥0.20." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "import sklearn\n", - "assert sklearn.__version__ >= \"0.20\"" - ] - }, { "cell_type": "markdown", "metadata": {}, diff --git a/09_unsupervised_learning.ipynb b/09_unsupervised_learning.ipynb index 6303661d0..d23b00ea9 100644 --- a/09_unsupervised_learning.ipynb +++ b/09_unsupervised_learning.ipynb @@ -20,7 +20,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "First, let's import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures. We also check that Python 3.5 or later is installed (although Python 2.x may work, it is deprecated so we strongly recommend you use Python 3 instead)." + "First, let's import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures. We also check that Python 3.5 or later is installed (although Python 2.x may work, it is deprecated so we strongly recommend you use Python 3 instead), as well as Scikit-Learn ≥0.20." ] }, { @@ -33,6 +33,10 @@ "import sys\n", "assert sys.version_info >= (3, 5)\n", "\n", + "# Scikit-Learn ≥0.20 is required\n", + "import sklearn\n", + "assert sklearn.__version__ >= \"0.20\"\n", + "\n", "# Common imports\n", "import numpy as np\n", "import os\n", @@ -51,13 +55,15 @@ "# Where to save the figures\n", "PROJECT_ROOT_DIR = \".\"\n", "CHAPTER_ID = \"unsupervised_learning\"\n", + "IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID)\n", + "os.makedirs(IMAGES_PATH, exist_ok=True)\n", "\n", - "def save_fig(fig_id, tight_layout=True):\n", - " path = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID, fig_id + \".png\")\n", + "def save_fig(fig_id, tight_layout=True, fig_extension=\"png\", resolution=300):\n", + " path = os.path.join(IMAGES_PATH, fig_id + \".\" + fig_extension)\n", " print(\"Saving figure\", fig_id)\n", " if tight_layout:\n", " plt.tight_layout()\n", - " plt.savefig(path, format='png', dpi=300)\n", + " plt.savefig(path, format=fig_extension, dpi=resolution)\n", "\n", "# Ignore useless warnings (see SciPy issue #5998)\n", "import warnings\n", diff --git a/10_neural_nets_with_keras.ipynb b/10_neural_nets_with_keras.ipynb index 7cd95032c..8fae35422 100644 --- a/10_neural_nets_with_keras.ipynb +++ b/10_neural_nets_with_keras.ipynb @@ -20,7 +20,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "First, let's import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures. We also check that Python 3.5 or later is installed (although Python 2.x may work, it is deprecated so we strongly recommend you use Python 3 instead)." + "First, let's import a few common modules, ensure MatplotLib plots figures inline and prepare a function to save the figures. We also check that Python 3.5 or later is installed (although Python 2.x may work, it is deprecated so we strongly recommend you use Python 3 instead), as well as Scikit-Learn ≥0.20 and TensorFlow ≥2.0-preview." ] }, { @@ -33,6 +33,14 @@ "import sys\n", "assert sys.version_info >= (3, 5)\n", "\n", + "# Scikit-Learn ≥0.20 is required\n", + "import sklearn\n", + "assert sklearn.__version__ >= \"0.20\"\n", + "\n", + "# TensorFlow ≥2.0-preview is required\n", + "import tensorflow as tf\n", + "assert hasattr(tf.compat, \"v1\")\n", + "\n", "# Common imports\n", "import numpy as np\n", "import os\n", @@ -51,13 +59,15 @@ "# Where to save the figures\n", "PROJECT_ROOT_DIR = \".\"\n", "CHAPTER_ID = \"ann\"\n", + "IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID)\n", + "os.makedirs(IMAGES_PATH, exist_ok=True)\n", "\n", - "def save_fig(fig_id, tight_layout=True):\n", - " path = os.path.join(PROJECT_ROOT_DIR, \"images\", CHAPTER_ID, fig_id + \".png\")\n", + "def save_fig(fig_id, tight_layout=True, fig_extension=\"png\", resolution=300):\n", + " path = os.path.join(IMAGES_PATH, fig_id + \".\" + fig_extension)\n", " print(\"Saving figure\", fig_id)\n", " if tight_layout:\n", " plt.tight_layout()\n", - " plt.savefig(path, format='png', dpi=300)\n", + " plt.savefig(path, format=fig_extension, dpi=resolution)\n", "\n", "# Ignore useless warnings (see SciPy issue #5998)\n", "import warnings\n",