From de958452a09dce18aa1e5c1fb4e4d10d2119eac6 Mon Sep 17 00:00:00 2001 From: wkyoshida Date: Sat, 5 Nov 2022 22:17:42 -0300 Subject: [PATCH] #14 cldr package downloads --- .gitignore | 5 +++++ package.json | 5 +++++ .../extract_transform/process_unicode.py | 19 +++++++++++++++++- .../load/gen_emoji_suggestions.ipynb | 20 +++++++++++++++++++ 4 files changed, 48 insertions(+), 1 deletion(-) create mode 100644 package.json diff --git a/.gitignore b/.gitignore index 5b933cb1b..61993f2bc 100644 --- a/.gitignore +++ b/.gitignore @@ -17,3 +17,8 @@ dist *.egg-info # Caches __pycache__ + +# NPM files +########### +node_modules +package-lock.json diff --git a/package.json b/package.json new file mode 100644 index 000000000..bd182d03a --- /dev/null +++ b/package.json @@ -0,0 +1,5 @@ +{ + "dependencies": { + "cldr-annotations-derived-full": "latest" + } +} diff --git a/src/scribe_data/extract_transform/process_unicode.py b/src/scribe_data/extract_transform/process_unicode.py index 5799303c0..452ac90d9 100644 --- a/src/scribe_data/extract_transform/process_unicode.py +++ b/src/scribe_data/extract_transform/process_unicode.py @@ -8,6 +8,10 @@ gen_emoji_autosuggestions """ +import json + +from scribe_data.load.update_utils import get_language_iso + def gen_emoji_autosuggestions( language="English", num_emojis=500, @@ -42,6 +46,19 @@ def gen_emoji_autosuggestions( autosuggest_dict = {} - # TODO + ### TODO further updates - here for data loading illustration + + language = get_language_iso(language) + + cldr_file_path = f'node_modules/cldr-annotations-derived-full/annotationsDerived/{language}/annotations.json' + + with open(cldr_file_path, 'r') as file: + cldr_data = json.load(file) + + emoji_dict = cldr_data['annotationsDerived']['annotations'] + + print("Number of emojis loaded:", len(emoji_dict)) + + ### return autosuggest_dict diff --git a/src/scribe_data/load/gen_emoji_suggestions.ipynb b/src/scribe_data/load/gen_emoji_suggestions.ipynb index 94a33caff..979b025ab 100644 --- a/src/scribe_data/load/gen_emoji_suggestions.ipynb +++ b/src/scribe_data/load/gen_emoji_suggestions.ipynb @@ -71,6 +71,26 @@ "pwd = pwd.split(\"scribe_data\")[0]\n", "sys.path.append(pwd)" ] + }, + { + "cell_type": "markdown", + "id": "2add942e", + "metadata": {}, + "source": [ + "# Download Latest Unicode" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8e166da1", + "metadata": {}, + "outputs": [], + "source": [ + "%%bash\n", + ". $HOME/.nvm/nvm.sh # Pick up the 'npm' command\n", + "npm install" + ] } ], "metadata": {