MycroftAI · ChanceNCounter · Mar 15, 2020 · Mar 24, 2020 · Mar 30, 2020 · Mar 30, 2020
diff --git a/lingua_franca/lang/parse_en.py b/lingua_franca/lang/parse_en.py
@@ -14,16 +14,17 @@
 # limitations under the License.
 #
 from datetime import datetime, timedelta
-
 from dateutil.relativedelta import relativedelta
+from math import ceil, floor
+
+import json
+import re
 
 from lingua_franca.lang.parse_common import is_numeric, look_for_fractions, \
     invert_dict, ReplaceableNumber, partition_list, tokenize, Token, Normalizer
 from lingua_franca.lang.common_data_en import _ARTICLES_EN, _NUM_STRING_EN, \
     _LONG_ORDINAL_EN, _LONG_SCALE_EN, _SHORT_SCALE_EN, _SHORT_ORDINAL_EN
 
-import re
-import json
 from lingua_franca import resolve_resource_file
 from lingua_franca.time import now_local
 
@@ -77,14 +78,22 @@ def generate_plurals_en(originals):
 _STRING_LONG_ORDINAL_EN = invert_dict(_LONG_ORDINAL_EN)
 
 
-def _convert_words_to_numbers_en(text, short_scale=True, ordinals=False):
+def _convert_words_to_numbers_en(text, short_scale=True, ordinals=False,
+                                    decimal_places=None):
     """
     Convert words in a string into their equivalent numbers.
     Args:
-        text str:
-        short_scale boolean: True if short scale numbers should be used.
-        ordinals boolean: True if ordinals (e.g. first, second, third) should
+        text (str):
+        short_scale (bool): True if short scale numbers should be used.
+        ordinals (bool): True if ordinals (e.g. first, second, third) should
                           be parsed to their number values (1, 2, 3...)
+        decimal_places (int or None): Positive value will round to X places.
+                                      Val of 0 will round up to nearest int,
+                                        equivalent to `math.ceil(result)`
+                                      Val of -1 will round down to nearest int,
+                                        equivalent to `math.floor(result)`
+                                      Val of None will perform no rounding,
+                                      potentially returning a very long string.
 
     Returns:
         str
@@ -94,7 +103,8 @@ def _convert_words_to_numbers_en(text, short_scale=True, ordinals=False):
     text = text.lower()
     tokens = tokenize(text)
     numbers_to_replace = \
-        _extract_numbers_with_text_en(tokens, short_scale, ordinals)
+        _extract_numbers_with_text_en(
+            tokens, short_scale, ordinals, places=decimal_places)
     numbers_to_replace.sort(key=lambda number: number.start_index)
 
     results = []
@@ -114,7 +124,8 @@ def _convert_words_to_numbers_en(text, short_scale=True, ordinals=False):
 
 
 def _extract_numbers_with_text_en(tokens, short_scale=True,
-                                  ordinals=False, fractional_numbers=True):
+                                  ordinals=False, fractional_numbers=True,
+                                  places=None):
     """
     Extract all numbers from a list of Tokens, with the words that
     represent them.
@@ -138,7 +149,8 @@ def _extract_numbers_with_text_en(tokens, short_scale=True,
     while True:
         to_replace = \
             _extract_number_with_text_en(tokens, short_scale,
-                                         ordinals, fractional_numbers)
+                                         ordinals, fractional_numbers,
+                                         places=places)
 
         if not to_replace:
             break
@@ -156,7 +168,8 @@ def _extract_numbers_with_text_en(tokens, short_scale=True,
 
 
 def _extract_number_with_text_en(tokens, short_scale=True,
-                                 ordinals=False, fractional_numbers=True):
+                                 ordinals=False, fractional_numbers=True,
+                                 places=None):
     """
     This function extracts a number from a list of Tokens.
 
@@ -172,15 +185,17 @@ def _extract_number_with_text_en(tokens, short_scale=True,
     """
     number, tokens = \
         _extract_number_with_text_en_helper(tokens, short_scale,
-                                            ordinals, fractional_numbers)
+                                            ordinals, fractional_numbers,
+                                            places=places)
     while tokens and tokens[0].word in _ARTICLES_EN:
         tokens.pop(0)
     return ReplaceableNumber(number, tokens)
 
 
 def _extract_number_with_text_en_helper(tokens,
                                         short_scale=True, ordinals=False,
-                                        fractional_numbers=True):
+                                        fractional_numbers=True,
+                                        places=None):
     """
     Helper for _extract_number_with_text_en.
 
@@ -205,7 +220,8 @@ def _extract_number_with_text_en_helper(tokens,
             return fraction, fraction_text
 
         decimal, decimal_text = \
-            _extract_decimal_with_text_en(tokens, short_scale, ordinals)
+            _extract_decimal_with_text_en(
+                tokens, short_scale, ordinals, places=places)
         if decimal:
             return decimal, decimal_text
 
@@ -254,7 +270,7 @@ def _extract_fraction_with_text_en(tokens, short_scale, ordinals):
     return None, None
 
 
-def _extract_decimal_with_text_en(tokens, short_scale, ordinals):
+def _extract_decimal_with_text_en(tokens, short_scale, ordinals, places=None):
     """
     Extract decimal numbers from a string.
 
@@ -264,13 +280,16 @@ def _extract_decimal_with_text_en(tokens, short_scale, ordinals):
         While this is a helper for extractnumber_en, it also depends on
         extractnumber_en, to parse out the components of the decimal.
 
-        This does not currently handle things like:
-            number dot number number number
-
     Args:
         tokens [Token]: The text to parse.
         short_scale boolean:
         ordinals boolean:
+        places [int] or None: Number of decimal places to return
+                              None performs no rounding
+                              Positive int rounds to so many places
+                              0 value rounds up to nearest int
+                              -1 value rounds down to nearest int
+                              other values throw error
 
     Returns:
         (float, [Token])
@@ -281,24 +300,58 @@ def _extract_decimal_with_text_en(tokens, short_scale, ordinals):
     for c in _DECIMAL_MARKER:
         partitions = partition_list(tokens, lambda t: t.word == c)
 
-        if len(partitions) == 3:
+        if len(partitions) >= 3:
             numbers1 = \
                 _extract_numbers_with_text_en(partitions[0], short_scale,
-                                              ordinals, fractional_numbers=False)
+                                              ordinals, fractional_numbers=False,
+                                              places=places)
             numbers2 = \
                 _extract_numbers_with_text_en(partitions[2], short_scale,
-                                              ordinals, fractional_numbers=False)
-
+                                              ordinals, fractional_numbers=False,
+                                              places=places)
             if not numbers1 or not numbers2:
                 return None, None
 
+            # `numbers2` may have caught numbers which are part of the
+            # input string, but which are not part of *this* number.
+            # For example, for the input string:
+            # "a ratio of one point five to one"
+            # `numbers2` might read, `numbers2 == [5, 1]`
+            #
+            # truncate `numbers2` to contain only those tokens which were
+            # adjacent in the input string.
+            idx = 1
+            stop = False
+            while idx < len(numbers2) and not stop:
+                if numbers2[idx].tokens[0].index != numbers2[idx-1].tokens[0].index + 1 or \
+                        numbers2[idx].value is None:
+                    stop = True
+                else:
+                    idx += 1
+            numbers2 = numbers2[:idx]
+
             number = numbers1[-1]
-            decimal = numbers2[0]
 
-            # TODO handle number dot number number number
-            if "." not in str(decimal.text):
-                return number.value + float('0.' + str(decimal.value)), \
-                    number.tokens + partitions[1] + decimal.tokens
+
+            if "." not in str(numbers2[0].text):
+                return_value = float('0.' + "".join([str(
+                    decimal.value) for decimal in numbers2]))
+                return_value = number.value + return_value
+                if places is not None:
+                    if places == 0:
+                        return_value = ceil(return_value)
+                    elif places == -1:
+                        return_value = floor(return_value)
+                    if places < 1:
+                        return_value = int(return_value)
+                return_tokens = number.tokens + partitions[1]
+                for n in numbers2:
+                    return_tokens += n.tokens
+                if not places:
+                    return return_value, return_tokens
+
+                return (round(return_value, places) if places > 0
+                        else return_value), return_tokens
     return None, None
 
 
@@ -319,8 +372,8 @@ def _extract_whole_number_with_text_en(tokens, short_scale, ordinals):
         The value parsed, and tokens that it corresponds to.
 
     """
-    multiplies, string_num_ordinal, string_num_scale = \
-        _initialize_number_data(short_scale)
+    multiplies, string_num_ordinal, string_num_scale = _initialize_number_data(
+        short_scale)
 
     number_words = []  # type: [Token]
     val = False
@@ -560,7 +613,7 @@ def _initialize_number_data(short_scale):
     return multiplies, string_num_ordinal_en, string_num_scale_en
 
 
-def extractnumber_en(text, short_scale=True, ordinals=False):
+def extractnumber_en(text, short_scale=True, ordinals=False, decimal_places=None):
     """
     This function extracts a number from a text string,
     handles pronunciations in long scale and short scale
@@ -571,13 +624,15 @@ def extractnumber_en(text, short_scale=True, ordinals=False):
         text (str): the string to normalize
         short_scale (bool): use short scale if True, long scale if False
         ordinals (bool): consider ordinal numbers, third=3 instead of 1/3
+        decimal_places (int or None): rounds to # decimal places. uses builtin round()
     Returns:
         (int) or (float) or False: The extracted number or False if no number
                                    was found
 
     """
     return _extract_number_with_text_en(tokenize(text.lower()),
-                                        short_scale, ordinals).value
+                                        short_scale, ordinals,
+                                        places=decimal_places).value
 
 
 def extract_duration_en(text):
@@ -1476,7 +1531,7 @@ def isFractional_en(input_str, short_scale=True):
     return False
 
 
-def extract_numbers_en(text, short_scale=True, ordinals=False):
+def extract_numbers_en(text, short_scale=True, ordinals=False, decimal_places=None):
     """
         Takes in a string and extracts a list of numbers.
 
@@ -1487,11 +1542,12 @@ def extract_numbers_en(text, short_scale=True, ordinals=False):
             is now common in most English speaking countries.
             See https://en.wikipedia.org/wiki/Names_of_large_numbers
         ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
+        decimal_places (int or False): rounds to # decimal places. uses builtin round()
     Returns:
         list: list of extracted numbers as floats
     """
     results = _extract_numbers_with_text_en(tokenize(text),
-                                            short_scale, ordinals)
+                                            short_scale, ordinals, places=decimal_places)
     return [float(result.value) for result in results]
 
 

diff --git a/lingua_franca/parse.py b/lingua_franca/parse.py
@@ -77,8 +77,12 @@ def match_one(query, choices):
     else:
         return best
 
+# TODO update these docstrings when decimal_places has been implemented
+#     in all parsers
 
-def extract_numbers(text, short_scale=True, ordinals=False, lang=None):
+
+def extract_numbers(text, short_scale=True, ordinals=False, lang=None,
+                    decimal_places=None):
     """
         Takes in a string and extracts a list of numbers.
 
@@ -90,12 +94,19 @@ def extract_numbers(text, short_scale=True, ordinals=False, lang=None):
             See https://en.wikipedia.org/wiki/Names_of_large_numbers
         ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
         lang (str): the BCP-47 code for the language to use, None uses default
+        decimal_places (int or None): Positive value will round to X places.
+                                      Val of 0 will round up to nearest int,
+                                        equivalent to `math.ceil(result)`
+                                      Val of -1 will round down to nearest int,
+                                        equivalent to `math.floor(result)`
+                                      Val of None will perform no rounding,
+                                      potentially returning a very long string.
     Returns:
         list: list of extracted numbers as floats, or empty list if none found
     """
     lang_code = get_primary_lang_code(lang)
     if lang_code == "en":
-        return extract_numbers_en(text, short_scale, ordinals)
+        return extract_numbers_en(text, short_scale, ordinals, decimal_places)
     elif lang_code == "de":
         return extract_numbers_de(text, short_scale, ordinals)
     elif lang_code == "fr":
@@ -112,7 +123,8 @@ def extract_numbers(text, short_scale=True, ordinals=False, lang=None):
     return []
 
 
-def extract_number(text, short_scale=True, ordinals=False, lang=None):
+def extract_number(text, short_scale=True, ordinals=False, lang=None,
+                   decimal_places=None):
     """Takes in a string and extracts a number.
 
     Args:
@@ -123,14 +135,21 @@ def extract_number(text, short_scale=True, ordinals=False, lang=None):
             See https://en.wikipedia.org/wiki/Names_of_large_numbers
         ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
         lang (str): the BCP-47 code for the language to use, None uses default
+        decimal_places (int or None): Positive value will round to X places.
+                                      Val of 0 will round up to nearest int,
+                                        equivalent to `math.ceil(result)`
+                                      Val of -1 will round down to nearest int,
+                                        equivalent to `math.floor(result)`
+                                      Val of None will perform no rounding,
+                                      potentially returning a very long string.
     Returns:
         (int, float or False): The number extracted or False if the input
                                text contains no numbers
     """
     lang_code = get_primary_lang_code(lang)
     if lang_code == "en":
         return extractnumber_en(text, short_scale=short_scale,
-                                ordinals=ordinals)
+                                ordinals=ordinals, decimal_places=decimal_places)
     elif lang_code == "es":
         return extractnumber_es(text)
     elif lang_code == "pt":

diff --git a/test/test_parse.py b/test/test_parse.py
@@ -151,10 +151,12 @@ def test_extract_number(self):
         self.assertEqual(extract_number("eight hundred trillion two hundred \
                                         fifty seven"), 800000000000257.0)
 
-        # TODO handle this case
-        # self.assertEqual(
-        #    extract_number("6 dot six six six"),
-        #    6.666)
+        self.assertEqual(extract_number("6 dot six six six"), 6.666)
+        self.assertEqual(extract_number(
+            "6 dot six six six", decimal_places=2), round(6.666, 2))
+        self.assertEqual(extract_number(
+            "6 point seventy", decimal_places=2), 6.7)
+
         self.assertTrue(extract_number("The tennis player is fast") is False)
         self.assertTrue(extract_number("fraggle") is False)
 
@@ -735,6 +737,24 @@ def test_multiple_numbers(self):
         self.assertEqual(extract_numbers("this is a seven eight nine and a"
                                          " half test"),
                          [7.0, 8.0, 9.5])
+        self.assertEqual(extract_numbers("this is a six point five seven nine"
+                                         " bingo ten nancy forty six test"),
+                         [6.579, 10.0, 46.0])
+        self.assertEqual(extract_numbers("this is a six point five seven nine"
+                                         " bingo ten nancy forty six test"
+                                         " with decimal rounding", decimal_places=2),
+                         [round(6.579, 2), 10, 46])
+        # test integer rounding, multiple decimals in string
+        self.assertEqual(extract_numbers(
+            "five hundred seventy point seven two and thirty one point eight"),
+            [570.72, 31.8])
+        self.assertEqual(extract_numbers(
+            "five hundred seventy point seven two and thirty one point eight",
+            decimal_places=0), [571, 32])
+        self.assertEqual(extract_numbers(
+            "five hundred seventy point seven two and thirty one point eight",
+            decimal_places=-1), [570, 31])
+
 
     def test_contractions(self):
         self.assertEqual(normalize("ain't"), "is not")