From aae77d9feb1a597b926e738ef02f6d8a8c97787a Mon Sep 17 00:00:00 2001
From: huseinzol05 <husein.zol05@gmail.com>
Date: Thu, 17 Mar 2022 19:59:38 +0800
Subject: [PATCH] fix sentiment docs

---
 docs/load-sentiment.ipynb              |   31 +-
 example/sentiment/load-sentiment.ipynb |   31 +-
 load-tokenizer.ipynb                   | 1527 ------------------------
 malaya/__init__.py                     |    2 +-
 setup.py                               |    2 +-
 5 files changed, 62 insertions(+), 1531 deletions(-)
 delete mode 100644 load-tokenizer.ipynb
diff --git a/docs/load-sentiment.ipynb b/docs/load-sentiment.ipynb
index d3bdce75..53ccaaf2 100644
--- a/docs/load-sentiment.ipynb
+++ b/docs/load-sentiment.ipynb
@@ -661,7 +661,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "#### Open emotion visualization dashboard\n",
+    "#### Open sentiment visualization dashboard\n",
     "\n",
     "Default when you call `predict_words` it will open a browser with visualization dashboard, you can disable by `visualization=False`.\n",
     "\n",
@@ -1186,6 +1186,35 @@
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
    "version": "3.7.7"
+  },
+  "varInspector": {
+   "cols": {
+    "lenName": 16,
+    "lenType": 16,
+    "lenVar": 40
+   },
+   "kernels_config": {
+    "python": {
+     "delete_cmd_postfix": "",
+     "delete_cmd_prefix": "del ",
+     "library": "var_list.py",
+     "varRefreshCmd": "print(var_dic_list())"
+    },
+    "r": {
+     "delete_cmd_postfix": ") ",
+     "delete_cmd_prefix": "rm(",
+     "library": "var_list.r",
+     "varRefreshCmd": "cat(var_dic_list()) "
+    }
+   },
+   "types_to_exclude": [
+    "module",
+    "function",
+    "builtin_function_or_method",
+    "instance",
+    "_Feature"
+   ],
+   "window_display": false
   }
  },
  "nbformat": 4,
diff --git a/example/sentiment/load-sentiment.ipynb b/example/sentiment/load-sentiment.ipynb
index d3bdce75..53ccaaf2 100644
--- a/example/sentiment/load-sentiment.ipynb
+++ b/example/sentiment/load-sentiment.ipynb
@@ -661,7 +661,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "#### Open emotion visualization dashboard\n",
+    "#### Open sentiment visualization dashboard\n",
     "\n",
     "Default when you call `predict_words` it will open a browser with visualization dashboard, you can disable by `visualization=False`.\n",
     "\n",
@@ -1186,6 +1186,35 @@
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
    "version": "3.7.7"
+  },
+  "varInspector": {
+   "cols": {
+    "lenName": 16,
+    "lenType": 16,
+    "lenVar": 40
+   },
+   "kernels_config": {
+    "python": {
+     "delete_cmd_postfix": "",
+     "delete_cmd_prefix": "del ",
+     "library": "var_list.py",
+     "varRefreshCmd": "print(var_dic_list())"
+    },
+    "r": {
+     "delete_cmd_postfix": ") ",
+     "delete_cmd_prefix": "rm(",
+     "library": "var_list.r",
+     "varRefreshCmd": "cat(var_dic_list()) "
+    }
+   },
+   "types_to_exclude": [
+    "module",
+    "function",
+    "builtin_function_or_method",
+    "instance",
+    "_Feature"
+   ],
+   "window_display": false
   }
  },
  "nbformat": 4,
diff --git a/load-tokenizer.ipynb b/load-tokenizer.ipynb
deleted file mode 100644
index 70d6a7e5..00000000
--- a/load-tokenizer.ipynb
+++ /dev/null
@@ -1,1527 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Word and sentence tokenizer"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "<div class=\"alert alert-info\">\n",
-    "\n",
-    "This tutorial is available as an IPython notebook at [Malaya/example/tokenizer](https://github.com/huseinzol05/Malaya/tree/master/example/tokenizer).\n",
-    "    \n",
-    "</div>"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "CPU times: user 6.52 s, sys: 1.42 s, total: 7.94 s\n",
-      "Wall time: 9.94 s\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%time\n",
-    "import malaya"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "string1 = 'xjdi ke, y u xsuke makan HUSEIN kt situ tmpt, i hate it. pelikle, pada'\n",
-    "string2 = 'i mmg2 xske mknn HUSEIN kampng tmpat, i love them. pelikle saye'\n",
-    "string3 = 'perdana menteri ke11 sgt suka makn ayam, harganya cuma rm15.50'\n",
-    "string4 = 'pada 10/4, kementerian mengumumkan, 1/100'\n",
-    "string5 = 'Husein Zolkepli dapat tempat ke-12 lumba lari hari ni'\n",
-    "string6 = 'Husein Zolkepli (2011 - 2019) adalah ketua kampng di kedah sekolah King Edward ke-IV'\n",
-    "string7 = '2jam 30 minit aku tunggu kau, 60.1 kg kau ni, suhu harini 31.2c, aku dahaga minum 600ml'\n",
-    "string8 = 'online & desktop: regexr.com or download the desktop version for Mac'\n",
-    "string9 = 'belajaq unity di google.us.edi?34535/534534?dfg=g&fg unity'\n",
-    "string10 = 'Gambar ni membantu. Gambar tutorial >>. facebook. com/story. story_fbid=10206183032200965&id=1418962070'"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Load word tokenizer\n",
-    "\n",
-    "```python\n",
-    "class Tokenizer:\n",
-    "    def __init__(self, lowercase: bool = False, **kwargs):\n",
-    "        \"\"\"\n",
-    "        Load Tokenizer object.\n",
-    "        Check supported regex pattern at \n",
-    "        https://github.com/huseinzol05/Malaya/blob/master/malaya/text/regex.py#L85\n",
-    "\n",
-    "        Parameters\n",
-    "        ----------\n",
-    "        lowercase: bool, optional (default=False)\n",
-    "            lowercase tokens.\n",
-    "        emojis: bool, optional (default=True)\n",
-    "            True to keep emojis.\n",
-    "        urls: bool, optional (default=True)\n",
-    "            True to keep urls.\n",
-    "        urls_improved: bool, optional (default=True)\n",
-    "            True to keep urls, better version.\n",
-    "        tags: bool, optional (default=True)\n",
-    "            True to keep tags: <tag>.\n",
-    "        emails: bool, optional (default=True)\n",
-    "            True to keep emails.\n",
-    "        users: bool, optional (default=True)\n",
-    "            True to keep users handles: @cbaziotis.\n",
-    "        hashtags: bool, optional (default=True)\n",
-    "            True to keep hashtags.\n",
-    "        phones: bool, optional (default=True)\n",
-    "            True to keep phones.\n",
-    "        percents: bool, optional (default=True)\n",
-    "            True to keep percents.\n",
-    "        money: bool, optional (default=True)\n",
-    "            True to keep money expressions.\n",
-    "        date: bool, optional (default=True)\n",
-    "            True to keep date expressions.\n",
-    "        time: bool, optional (default=True)\n",
-    "            True to keep time expressions.\n",
-    "        acronyms: bool, optional (default=True)\n",
-    "            True to keep acronyms.\n",
-    "        emoticons: bool, optional (default=True)\n",
-    "            True to keep emoticons.\n",
-    "        censored: bool, optional (default=True)\n",
-    "            True to keep censored words: f**k.\n",
-    "        emphasis: bool, optional (default=True)\n",
-    "            True to keep words with emphasis: *very* good.\n",
-    "        numbers: bool, optional (default=True)\n",
-    "            True to keep numbers.\n",
-    "        temperature: bool, optional (default=True)\n",
-    "            True to keep temperatures\n",
-    "        distance: bool, optional (default=True)\n",
-    "            True to keep distances.\n",
-    "        volume: bool, optional (default=True)\n",
-    "            True to keep volumes.\n",
-    "        duration: bool, optional (default=True)\n",
-    "            True to keep durations.\n",
-    "        weight: bool, optional (default=True)\n",
-    "            True to keep weights.\n",
-    "        hypen: bool, optional (default=True)\n",
-    "            True to keep hypens.\n",
-    "        \"\"\"\n",
-    "```"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "tokenizer = malaya.tokenizer.Tokenizer()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['xjdi',\n",
-       " 'ke',\n",
-       " ',',\n",
-       " 'y',\n",
-       " 'u',\n",
-       " 'xsuke',\n",
-       " 'makan',\n",
-       " 'HUSEIN',\n",
-       " 'kt',\n",
-       " 'situ',\n",
-       " 'tmpt',\n",
-       " ',',\n",
-       " 'i',\n",
-       " 'hate',\n",
-       " 'it',\n",
-       " '.',\n",
-       " 'pelikle',\n",
-       " ',',\n",
-       " 'pada']"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tokenizer.tokenize(string1)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['i',\n",
-       " 'mmg2',\n",
-       " 'xske',\n",
-       " 'mknn',\n",
-       " 'HUSEIN',\n",
-       " 'kampng',\n",
-       " 'tmpat',\n",
-       " ',',\n",
-       " 'i',\n",
-       " 'love',\n",
-       " 'them',\n",
-       " '.',\n",
-       " 'pelikle',\n",
-       " 'saye']"
-      ]
-     },
-     "execution_count": 5,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tokenizer.tokenize(string2)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['perdana',\n",
-       " 'menteri',\n",
-       " 'ke11',\n",
-       " 'sgt',\n",
-       " 'suka',\n",
-       " 'makn',\n",
-       " 'ayam',\n",
-       " ',',\n",
-       " 'harganya',\n",
-       " 'cuma',\n",
-       " 'rm15.50']"
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tokenizer.tokenize(string3)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['pada',\n",
-       " '10',\n",
-       " '/',\n",
-       " '4',\n",
-       " ',',\n",
-       " 'kementerian',\n",
-       " 'mengumumkan',\n",
-       " ',',\n",
-       " '1',\n",
-       " '/',\n",
-       " '100']"
-      ]
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tokenizer.tokenize(string4)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['Husein',\n",
-       " 'Zolkepli',\n",
-       " '(',\n",
-       " '2011',\n",
-       " '-',\n",
-       " '2019',\n",
-       " ')',\n",
-       " 'adalah',\n",
-       " 'ketua',\n",
-       " 'kampng',\n",
-       " 'di',\n",
-       " 'kedah',\n",
-       " 'sekolah',\n",
-       " 'King',\n",
-       " 'Edward',\n",
-       " 'ke-IV']"
-      ]
-     },
-     "execution_count": 8,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tokenizer.tokenize(string6)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['2jam',\n",
-       " '30 minit',\n",
-       " 'aku',\n",
-       " 'tunggu',\n",
-       " 'kau',\n",
-       " ',',\n",
-       " '60.1 kg',\n",
-       " 'kau',\n",
-       " 'ni',\n",
-       " ',',\n",
-       " 'suhu',\n",
-       " 'harini',\n",
-       " '31.2c',\n",
-       " ',',\n",
-       " 'aku',\n",
-       " 'dahaga',\n",
-       " 'minum',\n",
-       " '600ml']"
-      ]
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tokenizer.tokenize(string7)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['online',\n",
-       " '&',\n",
-       " 'desktop',\n",
-       " ':',\n",
-       " 'regexr.com',\n",
-       " 'or',\n",
-       " 'download',\n",
-       " 'the',\n",
-       " 'desktop',\n",
-       " 'version',\n",
-       " 'for',\n",
-       " 'Mac']"
-      ]
-     },
-     "execution_count": 10,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tokenizer.tokenize(string8)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['belajaq', 'unity', 'di', 'google.us.edi?34535/534534?dfg=g&fg', 'unity']"
-      ]
-     },
-     "execution_count": 11,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tokenizer.tokenize(string9)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "####  url"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['website', 'saya', 'http://huseinhouse.com']"
-      ]
-     },
-     "execution_count": 12,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tokenizer.tokenize('website saya http://huseinhouse.com')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['website', 'saya', 'huseinhouse.com']"
-      ]
-     },
-     "execution_count": 13,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tokenizer.tokenize('website saya huseinhouse.com')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['website', 'saya', 'huseinhouse.com/pelik?a=1']"
-      ]
-     },
-     "execution_count": 14,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tokenizer.tokenize('website saya huseinhouse.com/pelik?a=1')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### tags"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['panggil', 'saya', '<husein>']"
-      ]
-     },
-     "execution_count": 15,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tokenizer.tokenize('panggil saya <husein>')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['panggil', 'saya', '<', 'husein', '>']"
-      ]
-     },
-     "execution_count": 16,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tokenizer.tokenize('panggil saya <husein >')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### emails"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['email', 'saya', 'husein@rumah.com']"
-      ]
-     },
-     "execution_count": 17,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tokenizer.tokenize('email saya husein@rumah.com')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['email', 'saya', 'husein@rumah.com.my']"
-      ]
-     },
-     "execution_count": 18,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tokenizer.tokenize('email saya husein@rumah.com.my')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### users"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['twitter', 'saya', '@husein123zolkepli']"
-      ]
-     },
-     "execution_count": 19,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tokenizer.tokenize('twitter saya @husein123zolkepli')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['twitter', 'saya', '@', 'husein123zolkepli']"
-      ]
-     },
-     "execution_count": 20,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tokenizer.tokenize('twitter saya @ husein123zolkepli')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### hashtags"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['panggil', 'saya', '#huseincomel']"
-      ]
-     },
-     "execution_count": 21,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tokenizer.tokenize('panggil saya #huseincomel')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['panggil', 'saya', '#', 'huseincomel']"
-      ]
-     },
-     "execution_count": 22,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tokenizer.tokenize('panggil saya # huseincomel')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### phones"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 23,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['call', 'sye', 'di', '013-1234567']"
-      ]
-     },
-     "execution_count": 23,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tokenizer.tokenize('call sye di 013-1234567')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 24,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['call', 'sye', 'di', '013', '-', '1234567']"
-      ]
-     },
-     "execution_count": 24,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tokenizer.tokenize('call sye di 013- 1234567')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### percents"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 25,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['saya', 'sokong', '100%']"
-      ]
-     },
-     "execution_count": 25,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tokenizer.tokenize('saya sokong 100%')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 26,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['saya', 'sokong', '100', '%']"
-      ]
-     },
-     "execution_count": 26,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tokenizer.tokenize('saya sokong 100 %')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### money"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 27,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['saya', 'tinggal', 'rm100']"
-      ]
-     },
-     "execution_count": 27,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tokenizer.tokenize('saya tinggal rm100')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 28,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['saya', 'tinggal', 'rm100k']"
-      ]
-     },
-     "execution_count": 28,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tokenizer.tokenize('saya tinggal rm100k')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 29,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['saya', 'tinggal', 'rm100M']"
-      ]
-     },
-     "execution_count": 29,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tokenizer.tokenize('saya tinggal rm100M')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 30,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['saya', 'tinggal', 'rm100.123M']"
-      ]
-     },
-     "execution_count": 30,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tokenizer.tokenize('saya tinggal rm100.123M')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 31,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['saya', 'tinggal', '40 sen']"
-      ]
-     },
-     "execution_count": 31,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tokenizer.tokenize('saya tinggal 40 sen')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 32,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['saya', 'tinggal', '21 ringgit', '50 sen']"
-      ]
-     },
-     "execution_count": 32,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tokenizer.tokenize('saya tinggal 21 ringgit 50 sen')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### date"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 33,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['tarikh', 'perjumpaan', '10/11/2011']"
-      ]
-     },
-     "execution_count": 33,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tokenizer.tokenize('tarikh perjumpaan 10/11/2011')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 34,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['tarikh', 'perjumpaan', '10-11-2011']"
-      ]
-     },
-     "execution_count": 34,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tokenizer.tokenize('tarikh perjumpaan 10-11-2011')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 35,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['tarikh', 'perjumpaan', '12 mei 2011']"
-      ]
-     },
-     "execution_count": 35,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tokenizer.tokenize('tarikh perjumpaan 12 mei 2011')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 36,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['tarikh', 'perjumpaan', 'mei 12 2011']"
-      ]
-     },
-     "execution_count": 36,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tokenizer.tokenize('tarikh perjumpaan mei 12 2011')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### time"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 37,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['jumpa', '3 am']"
-      ]
-     },
-     "execution_count": 37,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tokenizer.tokenize('jumpa 3 am')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 38,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['jumpa', '22:00']"
-      ]
-     },
-     "execution_count": 38,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tokenizer.tokenize('jumpa 22:00')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### censored"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 39,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['f**k', 'lah']"
-      ]
-     },
-     "execution_count": 39,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tokenizer.tokenize('f**k lah')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### emphasis"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 40,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['*damn*', 'good', 'weih']"
-      ]
-     },
-     "execution_count": 40,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tokenizer.tokenize('*damn* good weih')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### numbers"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 41,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['no', 'saya', '123']"
-      ]
-     },
-     "execution_count": 41,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tokenizer.tokenize('no saya 123')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### temperature"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 42,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['sejuk', 'harini', ',', '31.1c']"
-      ]
-     },
-     "execution_count": 42,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tokenizer.tokenize('sejuk harini, 31.1c')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 43,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['sejuk', 'harini', ',', '31.1C']"
-      ]
-     },
-     "execution_count": 43,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tokenizer.tokenize('sejuk harini, 31.1C')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### distance"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 44,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['nak', 'sampai', 'lagi', '31km']"
-      ]
-     },
-     "execution_count": 44,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tokenizer.tokenize('nak sampai lagi 31km')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 45,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['nak', 'sampai', 'lagi', '31 km']"
-      ]
-     },
-     "execution_count": 45,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tokenizer.tokenize('nak sampai lagi 31 km')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### volume"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 46,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['botol', 'ni', '400ml']"
-      ]
-     },
-     "execution_count": 46,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tokenizer.tokenize('botol ni 400ml')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 47,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['botol', 'ni', '400 l']"
-      ]
-     },
-     "execution_count": 47,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tokenizer.tokenize('botol ni 400 l')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### duration"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 48,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['aku', 'dah', 'tunggu', 'kau', '2jam', 'kut']"
-      ]
-     },
-     "execution_count": 48,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tokenizer.tokenize('aku dah tunggu kau 2jam kut')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 49,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['aku', 'dah', 'tunggu', 'kau', '2 jam', 'kut']"
-      ]
-     },
-     "execution_count": 49,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tokenizer.tokenize('aku dah tunggu kau 2 jam kut')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 50,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['lagi', '10 minit', '3 jam']"
-      ]
-     },
-     "execution_count": 50,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tokenizer.tokenize('lagi 10 minit 3 jam')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### weight"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 51,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['berat', 'kau', '60 kg']"
-      ]
-     },
-     "execution_count": 51,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tokenizer.tokenize('berat kau 60 kg')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 52,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['berat', 'kau', '60kg']"
-      ]
-     },
-     "execution_count": 52,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tokenizer.tokenize('berat kau 60kg')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### hypen"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 53,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['sememang-memangnya', 'kau', 'sakai']"
-      ]
-     },
-     "execution_count": 53,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tokenizer.tokenize('sememang-memangnya kau sakai')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 54,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['sememang', '-', 'memangnya', 'kau', 'sakai']"
-      ]
-     },
-     "execution_count": 54,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tokenizer.tokenize('sememang- memangnya kau sakai')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Sentence tokenizer\n",
-    "\n",
-    "We considered prefixes, suffixes, starters, acronyms, websites, emails, digits, before digits, time and month to split a sentence into multiple sentences.\n",
-    "\n",
-    "```python\n",
-    "class SentenceTokenizer:\n",
-    "    def __init__(self):\n",
-    "        pass\n",
-    "```"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 55,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "s = \"\"\"\n",
-    "no.1 polis bertemu dengan suspek di ladang getah. polis tembak pui pui pui bertubi tubi\n",
-    "\"\"\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 56,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "s_tokenizer = malaya.tokenizer.SentenceTokenizer()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 57,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['no.1 polis bertemu dengan suspek di ladang getah.',\n",
-       " 'polis tembak pui pui pui bertubi tubi.']"
-      ]
-     },
-     "execution_count": 57,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "s_tokenizer.tokenize(s)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 58,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "s = \"\"\"\n",
-    "email saya di husein.zol01@gmail.com, nanti jom berkopi\n",
-    "\"\"\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 59,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['email saya di husein.zol01@gmail.com, nanti jom berkopi.']"
-      ]
-     },
-     "execution_count": 59,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "s_tokenizer.tokenize(s)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 60,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "s = \"\"\"\n",
-    "ke. 2 cerita nya begini. saya berjalan jalan ditepi muara jumpa anak dara.\n",
-    "\"\"\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 61,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['ke.2 cerita nya begini.',\n",
-       " 'saya berjalan jalan ditepi muara jumpa anak dara.']"
-      ]
-     },
-     "execution_count": 61,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "s_tokenizer.tokenize(s)"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.7"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
diff --git a/malaya/__init__.py b/malaya/__init__.py
index d4a55b7c..dbd8ba42 100644
--- a/malaya/__init__.py
+++ b/malaya/__init__.py
@@ -9,7 +9,7 @@
 from malaya_boilerplate.utils import get_home
 
 version = '4.7'
-bump_version = '4.7.2'
+bump_version = '4.7.3'
 __version__ = bump_version
 
 package = 'malaya'
diff --git a/setup.py b/setup.py
index ac8a86fc..7d32563d 100644
--- a/setup.py
+++ b/setup.py
@@ -15,7 +15,7 @@ def readme():
 setuptools.setup(
     name=__packagename__,
     packages=setuptools.find_packages(),
-    version='4.7.2',
+    version='4.7.3',
     python_requires='>=3.6.*',
     description='Natural-Language-Toolkit for bahasa Malaysia, powered by Deep Learning Tensorflow.',
     long_description=readme(),