From d04c6d228955be284f2dfc8ae489ecce3a170b8d Mon Sep 17 00:00:00 2001 From: Mika Date: Thu, 5 Aug 2021 20:50:20 +0300 Subject: [PATCH] hfst-dev --- setup.py | 6 +++--- test_uralicnlp.py | 7 ++++--- uralicNLP/uralicApi.py | 9 +++++---- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/setup.py b/setup.py index 4b61c08..3757ef2 100644 --- a/setup.py +++ b/setup.py @@ -23,7 +23,7 @@ # Versions should comply with PEP440. For a discussion on single-sourcing # the version across setup.py and the project code, see # https://packaging.python.org/en/latest/single_source_version.html - version='1.3.0', + version='1.3.1', description='An NLP library for Uralic languages such as Finnish and Sami. Also supports Arabic, Russian etc.', long_description=long_description, @@ -34,7 +34,7 @@ zip_safe=False, # Author details author='Mika Hämäläinen', - author_email='mika.hamalainen@helsinki.fi', + author_email='mika@rootroo.com', # Choose your license license='Creative Commons Attribution-NonCommercial-NoDerivatives 4.0 International Public License', @@ -69,7 +69,7 @@ # your project is installed. For an analysis of "install_requires" vs pip's # requirements files see: # https://packaging.python.org/en/latest/requirements.html - install_requires=["requests", "hfst", "mikatools>=0.0.6", "argparse", "future>=0.18.2", "tinydb"], + install_requires=["requests", "hfst-dev", "mikatools>=0.0.6", "argparse", "future>=0.18.2", "tinydb"], # List additional groups of dependencies here (e.g. development # dependencies). You can install these using the following syntax, diff --git a/test_uralicnlp.py b/test_uralicnlp.py index 551b388..3a0d751 100644 --- a/test_uralicnlp.py +++ b/test_uralicnlp.py @@ -8,7 +8,7 @@ import re from mikatools import * -#uralicApi.get_all_forms("kissa", "N", "fin") +print(uralicApi.get_all_forms("kissa", "N", "fin")) #uralicApi.get_transducer("spa", analyzer=True).lookup_optimize() #print(uralicApi.analyze("hola", "spa")) @@ -123,9 +123,10 @@ print(word[0].get_attribute("form")) """ - +""" print(uralicApi.analyze("hörpähdin", "fin", neural_fallback=True)) print(uralicApi.lemmatize("nirhautan", "fin", neural_fallback=True)) print(uralicApi.generate("hömpötti+N+Sg+Gen", "fin", neural_fallback=True)) print(uralicApi.generate("koirailla+V+Act+Ind+Prs+Sg1", "fin", neural_fallback=True)) -print(uralicApi.analyze("juoksen", "fin", neural_fallback=True)) \ No newline at end of file +print(uralicApi.analyze("juoksen", "fin", neural_fallback=True)) +""" \ No newline at end of file diff --git a/uralicNLP/uralicApi.py b/uralicNLP/uralicApi.py index b1c7f39..b2facd6 100644 --- a/uralicNLP/uralicApi.py +++ b/uralicNLP/uralicApi.py @@ -21,7 +21,8 @@ # Fall back to Python 2's urllib2 from urllib2 import urlopen new_python = False -import hfst + +import hfst_dev as hfst api_url = "https://akusanat.com/smsxml/" download_server_url = "https://models.uralicnlp.com/nightly/" @@ -224,9 +225,9 @@ def get_all_forms(word, pos, language, descriptive=True, limit_forms=-1, filter_ analyzer2 = analyzer.copy() analyzer2.compose(reg) output = analyzer2.extract_paths(max_cycles=1, max_number=limit_forms,output='text').replace("@_EPSILON_SYMBOL_@","").split("\n") - output = filter(lambda x: x, output) - output = list(map(lambda x: x.split('\t'), output)) - return list(map(lambda x: (x[0], float(x[1]),), output)) + output = [_o.split('\t') for _o in output if _o] + output = [(":".join(_o[:-1]), float(_o[-1]), ) for _o in output] + return output def generate(query, language, force_local=True, descriptive=False, dictionary_forms=False, remove_symbols=True, filename=None, neural_fallback=False): if force_local or __where_models(language, safe=True):