diff --git a/vietnamese_cleaner/regex_tokenize.py b/vietnamese_cleaner/regex_tokenize.py
index 7dcc415..5978df1 100644
--- a/vietnamese_cleaner/regex_tokenize.py
+++ b/vietnamese_cleaner/regex_tokenize.py
@@ -7,8 +7,7 @@
 
 import re
 import sys
-
-from underthesea.feature_engineering.text import Text
+import unicodedata
 
 specials = [r"==>", r"->", r"\.\.\.", r">>", r"=\)\)"]
 digit = r"\d+([\.,_]\d+)+"
@@ -87,6 +86,18 @@
 patterns = re.compile(patterns, re.VERBOSE | re.UNICODE)
 
 
+def Text(text):
+    """ provide a wrapper for python string
+    map byte to str (python 3)
+    all string in utf-8 encoding
+    normalize string to NFC
+    """
+    if not type(text) == str:
+        text = text.decode("utf-8")
+    text = unicodedata.normalize("NFC", text)
+    return text
+
+
 def tokenize(text, format=None):
     """Tokenize text for word segmentation.