From cfde8ba787cfff7f9d67ca5f3212677e75bdd651 Mon Sep 17 00:00:00 2001
From: ZenithClown <debmalyapramanik.005@gmail.com>
Date: Wed, 12 Apr 2023 14:06:21 +0530
Subject: [PATCH] =?UTF-8?q?=F0=9F=8E=89=20bump=20code=20to=20v0.1.1=20rele?=
 =?UTF-8?q?ase,=20update=20boilerplate?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* remove long markdown commands from noteook boilerplate - reduces notebook length and overhead

* fix imports and comment out unnessary import statements

* added imports for tensorflow and sklearn.metrics as an example

* minor changes to pandas settings on import
---
 VERSION                     |   2 +-
 notebooks/BOILERPLATE.ipynb | 136 +++++++++++++++++++++++++-----------
 2 files changed, 96 insertions(+), 42 deletions(-)

diff --git a/VERSION b/VERSION
index b82608c..8308b63 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-v0.1.0
+v0.1.1
diff --git a/notebooks/BOILERPLATE.ipynb b/notebooks/BOILERPLATE.ipynb
index a1ab830..2632d4a 100644
--- a/notebooks/BOILERPLATE.ipynb
+++ b/notebooks/BOILERPLATE.ipynb
@@ -8,17 +8,16 @@
     "\n",
     "---\n",
     "\n",
-    "\n",
     "**Objective:** The file provides a simple *boilerplate* to concentrate on what is necessary, and stop doing same tasks! The boilerplate is also configured with certain [**nbextensions**](https://gitlab.com/ZenithClown/computer-configurations-and-setups) that I personally use. Install them, if required, else ignore them as they do not participate in any type of code-optimizations. For any new project *edit* this file or `File > Make a Copy` to get started with the project. Some settings and configurations are already provided, as mentioned below."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 1,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2023-01-11T15:50:13.490916Z",
-     "start_time": "2023-01-11T15:50:13.479904Z"
+     "end_time": "2023-04-12T08:16:36.922254Z",
+     "start_time": "2023-04-12T08:16:36.904106Z"
     }
    },
    "outputs": [
@@ -26,7 +25,8 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Current Code Version: v0.1.0-beta\n"
+      "Current Code Version: v0.1.1\n",
+      "\n"
      ]
     }
    ],
@@ -59,11 +59,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 2,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2023-01-11T15:50:34.230463Z",
-     "start_time": "2023-01-11T15:50:34.217051Z"
+     "end_time": "2023-04-12T08:16:55.883978Z",
+     "start_time": "2023-04-12T08:16:55.867980Z"
     }
    },
    "outputs": [],
@@ -75,11 +75,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 3,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2023-01-11T15:51:07.931774Z",
-     "start_time": "2023-01-11T15:51:07.926776Z"
+     "end_time": "2023-04-12T08:28:01.539456Z",
+     "start_time": "2023-04-12T08:28:01.534825Z"
     }
    },
    "outputs": [],
@@ -100,7 +100,7 @@
    },
    "outputs": [],
    "source": [
-    "from copy import deepcopy      # dataframe is mutable\n",
+    "# from copy import deepcopy      # dataframe is mutable\n",
     "# from tqdm import tqdm as TQ    # progress bar for loops\n",
     "# from uuid import uuid4 as UUID # unique identifier for objs"
    ]
@@ -133,7 +133,7 @@
    },
    "outputs": [],
    "source": [
-    "import logging # configure logging on `global arguments` section, as file path is required"
+    "# import logging # configure logging on `global arguments` section, as file path is required"
    ]
   },
   {
@@ -167,9 +167,47 @@
     "plt.style.use('default-style');\n",
     "\n",
     "pd.set_option('display.max_rows', 50) # max. rows to show\n",
-    "pd.set_option('display.max_columns', 15) # max. cols to show\n",
+    "pd.set_option('display.max_columns', 17) # max. cols to show\n",
     "np.set_printoptions(precision = 3, threshold = 15) # set np options\n",
-    "pd.options.display.float_format = '{:,.2f}'.format # float precisions"
+    "pd.options.display.float_format = '{:,.3f}'.format # float precisions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# sklearn metrices for analysis can be imported as below\n",
+    "# considering `regression` problem, rmse is imported metrics\n",
+    "# for rmse, use `squared = False` : https://stackoverflow.com/a/18623635/\n",
+    "# from sklearn.metrics import mean_squared_error as MSE"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import tensorflow as tf\n",
+    "print(f\"Tensorflow Version: {tf.__version__}\", end = \"\\n\") # required >= 2.8\n",
+    "\n",
+    "# check physical devices, and gpu compute capability (if available)\n",
+    "if len(tf.config.list_physical_devices(device_type = \"GPU\")):\n",
+    "    # https://stackoverflow.com/q/38009682/6623589\n",
+    "    # https://stackoverflow.com/a/59179238/6623589\n",
+    "    print(\"GPU Computing Available.\", end = \" \")\n",
+    "    \n",
+    "    # experimentally, get the gpu details and computation power\n",
+    "    # https://www.tensorflow.org/api_docs/python/tf/config/experimental/get_device_details\n",
+    "    devices = tf.config.list_physical_devices(device_type = \"GPU\")[0] # first\n",
+    "    details = tf.config.experimental.get_device_details(devices) # only first\n",
+    "    details.get('device_name', 'compute_capability')\n",
+    "    print(f\"EXPERIMENTAL : {details}\")\n",
+    "else:\n",
+    "    print(\"GPU Computing Not Available. If `GPU` is present, check configuration. Detected Devices:\")\n",
+    "    print(\"  > \", tf.config.list_physical_devices())"
    ]
   },
   {
@@ -192,7 +230,11 @@
     "echo %VARNAME%\n",
     "```\n",
     "\n",
-    "Once you've setup your system with [`PYTHONPATH`](https://bic-berkeley.github.io/psych-214-fall-2016/using_pythonpath.html) as per [*python documentation*](https://docs.python.org/3/using/cmdline.html#envvar-PYTHONPATH) is an important directory where any `import` statements looks for based on their order of importance. If a source code/module is not available check necessary environment variables and/or ask the administrator for the source files. For testing purpose, the module boasts the use of `src`, `utils` and `config` directories. However, these directories are available at `ROOT` level, and thus using `sys.path.append()` to add directories while importing."
+    "Once you've setup your system with [`PYTHONPATH`](https://bic-berkeley.github.io/psych-214-fall-2016/using_pythonpath.html) as per [*python documentation*](https://docs.python.org/3/using/cmdline.html#envvar-PYTHONPATH) is an important directory where any `import` statements looks for based on their order of importance. If a source code/module is not available check necessary environment variables and/or ask the administrator for the source files. For testing purpose, the module boasts the use of `src`, `utils` and `config` directories. However, these directories are available at `ROOT` level, and thus using `sys.path.append()` to add directories while importing.\n",
+    "\n",
+    "**Getting Started** with **`submodules`**\n",
+    "\n",
+    "A [`submodule`](https://git-scm.com/book/en/v2/Git-Tools-Submodules) provides functionality to integrate a seperate project in the current repository - this is typically useful to remove code-duplicacy and central repository to control dependent modules. More information on initializing and using submodule is available [here](https://www.youtube.com/watch?v=gSlXo2iLBro). Check [Github-GISTS/ZenithClown](https://gist.github.com/ZenithClown) for more information."
    ]
   },
   {
@@ -203,12 +245,21 @@
    "source": [
     "# append `src` and sub-modules to call additional files these directory are\n",
     "# project specific and not to be added under environment or $PATH variable\n",
-    "sys.path.append(os.path.join(\"..\", \"src\")) # parent/source files directory\n",
     "sys.path.append(os.path.join(\"..\", \"src\", \"agents\")) # agents for reinforcement modelling\n",
     "sys.path.append(os.path.join(\"..\", \"src\", \"engine\")) # derivative engines for model control\n",
     "sys.path.append(os.path.join(\"..\", \"src\", \"models\")) # actual models for decision making tools"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# also append the `utilities` directory for additional helpful codes\n",
+    "sys.path.append(os.path.join(\"..\", \"utilities\"))"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -230,29 +281,37 @@
    "outputs": [],
    "source": [
     "ROOT = \"..\" # the document root is one level up, that contains all code structure\n",
-    "DATA = join(ROOT, \"data\") # the directory contains all data files, subdirectory (if any) can also be used/defined\n",
+    "DATA = os.path.join(ROOT, \"data\") # the directory contains all data files, subdirectory (if any) can also be used/defined\n",
     "\n",
     "# processed data directory can be used, such that preprocessing steps is not\n",
     "# required to run again-and-again each time on kernel restart\n",
-    "PROCESSED_DATA = join(DATA, \"processed\")"
+    "PROCESSED_DATA = os.path.join(DATA, \"processed\")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2022-05-07T12:02:38.898998Z",
-     "start_time": "2022-05-07T12:02:38.888970Z"
+     "end_time": "2023-04-12T08:28:13.816861Z",
+     "start_time": "2023-04-12T08:28:13.803865Z"
     }
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Code Execution Started on: Wed, Apr 12 2023\n"
+     ]
+    }
+   ],
    "source": [
     "# long projects can be overwhelming, and keeping track of files, outputs and\n",
     "# saved models can be intriguing! to help this out, `today` can be used. for\n",
     "# instance output can be stored at `output/<today>/` etc.\n",
     "# `today` is so configured that it permits windows/*.nix file/directory names\n",
-    "today = dt.strftime(dt.strptime(ctime(), \"%a %b %d %H:%M:%S %Y\"), \"%a, %b %d %Y\")\n",
+    "today = dt.datetime.strftime(dt.datetime.strptime(time.ctime(), \"%a %b %d %H:%M:%S %Y\"), \"%a, %b %d %Y\")\n",
     "print(f\"Code Execution Started on: {today}\") # only date, name of the sub-directory"
    ]
   },
@@ -267,12 +326,12 @@
    },
    "outputs": [],
    "source": [
-    "OUTPUT_DIR = join(ROOT, \"output\", today)\n",
-    "makedirs(OUTPUT_DIR, exist_ok = True) # create dir if not exist\n",
+    "OUTPUT_DIR = os.path.join(ROOT, \"output\", today)\n",
+    "os.makedirs(OUTPUT_DIR, exist_ok = True) # create dir if not exist\n",
     "\n",
     "# also create directory for `logs`\n",
-    "LOGS_DIR = join(ROOT, \"logs\", open(\"../VERSION\", 'rt').read())\n",
-    "makedirs(LOGS_DIR, exist_ok = True)"
+    "# LOGS_DIR = os.path.join(ROOT, \"logs\", open(\"../VERSION\", 'rt').read())\n",
+    "# os.makedirs(LOGS_DIR, exist_ok = True)"
    ]
   },
   {
@@ -286,26 +345,21 @@
    },
    "outputs": [],
    "source": [
-    "logging.basicConfig(\n",
-    "    filename = join(LOGS_DIR, f\"{today}.log\"), # change `reports` file name\n",
-    "    filemode = \"a\", # append logs to existing file, if file exists\n",
-    "    format = \"%(asctime)s - %(name)s - CLASS:%(levelname)s:%(levelno)s:L#%(lineno)d - %(message)s\",\n",
-    "    level = logging.DEBUG\n",
-    ")"
+    "# logging.basicConfig(\n",
+    "#     filename = os.path.join(LOGS_DIR, f\"{today}.log\"), # change `reports` file name\n",
+    "#     filemode = \"a\", # append logs to existing file, if file exists\n",
+    "#     format = \"%(asctime)s - %(name)s - CLASS:%(levelname)s:%(levelno)s:L#%(lineno)d - %(message)s\",\n",
+    "#     level = logging.DEBUG\n",
+    "# )"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Read Input File(s)\n",
-    "\n",
-    "A typical machine learning project revolves around six important stages (as available in [Amazon ML Life Cycle Documentation](https://docs.aws.amazon.com/wellarchitected/latest/machine-learning-lens/well-architected-machine-learning-lifecycle.html)). The notebook boilerplate is provided to address two pillars:\n",
-    "\n",
-    " 1. **Data Processing:** An integral part of any machine learning project, which is the most time consuming step! A brief introduction and best practices is available [here](https://towardsdatascience.com/introduction-to-data-preprocessing-in-machine-learning-a9fa83a5dc9d).\n",
-    " 2. **Model Development:** From understanding to deployment, this section address development (training, validating and testing) of an machine learning model.\n",
+    "## Model Development & PoC Section\n",
     "\n",
-    "![ML Life Cycle](https://docs.aws.amazon.com/wellarchitected/latest/machine-learning-lens/images/ml-lifecycle.png)"
+    "A typical machine learning project revolves around six important stages (as available in [Amazon ML Life Cycle Documentation](https://docs.aws.amazon.com/wellarchitected/latest/machine-learning-lens/well-architected-machine-learning-lifecycle.html)). This notebook boilerplate can be used to understand the data file, perform statitical tests and other EDA as required for any AI/ML application. Later, using the below study a *full-fledged* application can be generated using other sections of the boilerplate."
    ]
   }
  ],