Skip to content

Commit

Permalink
Merge pull request scribe-org#480 from axif0/pyICU
Browse files Browse the repository at this point in the history
(New) Fixed pyICU capabilities for emoji functionalities
  • Loading branch information
andrewtavis authored Oct 24, 2024
2 parents c13f50e + c35989f commit 328d916
Show file tree
Hide file tree
Showing 3 changed files with 264 additions and 34 deletions.
211 changes: 211 additions & 0 deletions src/scribe_data/check/check_pyicu.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,211 @@
"""
Check to see if the requirements of the emoji process are installed.
.. raw:: html
<!--
* Copyright (C) 2024 Scribe
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
-->
"""

import os
import platform # added to check the OS
import subprocess
import sys
from pathlib import Path

import pkg_resources
import requests


def check_if_pyicu_installed():
installed_packages = {pkg.key for pkg in pkg_resources.working_set}

return "pyicu" in installed_packages


def get_python_version_and_architecture():
"""
Get the current Python version and architecture.
Returns
-------
python_version : str
The Python version in the format 'cpXY'.
architecture : str
The architecture type ('amd64' or 'win32').
"""
version = sys.version_info
python_version = f"cp{version.major}{version.minor}"
architecture = "win_amd64" if sys.maxsize > 2**32 else "win32"
return python_version, architecture


def fetch_wheel_releases():
"""
Fetch the release data for PyICU from GitHub.
Returns
-------
available_wheels : list
A list of tuples containing wheel file names and their download URLs.
total_size_mb : float
The total size of all available wheels in MB.
"""
url = "https://api.github.com/repos/cgohlke/pyicu-build/releases"
response = requests.get(url)
response.raise_for_status() # raise an error for bad responses

available_wheels = []
total_size_bytes = 0

for release in response.json():
for asset in release["assets"]:
if asset["name"].endswith(".whl"):
available_wheels.append((asset["name"], asset["browser_download_url"]))
total_size_bytes += asset["size"]

total_size_mb = total_size_bytes / (1024 * 1024) # convert bytes to MB
return available_wheels, total_size_mb


def download_wheel_file(wheel_url, output_dir):
"""
Download the wheel file from the given URL.
Parameters
----------
wheel_url : str
The URL of the wheel file to download.
output_dir : str
The directory to save the downloaded file.
Returns
-------
str : path to the downloaded wheel file.
"""
response = requests.get(wheel_url)
response.raise_for_status() # raise an error for bad responses

wheel_filename = os.path.basename(wheel_url)
wheel_path = os.path.join(output_dir, wheel_filename)

with open(wheel_path, "wb") as wheel_file:
wheel_file.write(response.content)

return wheel_path


def find_matching_wheel(wheels, python_version, architecture):
"""
Find the matching wheel file based on Python version and architecture.
Parameters
----------
wheels : list
The list of available wheels.
python_version : str
The Python version (e.g., 'cp311').
architecture : str
The architecture type (e.g., 'win_amd64').
Returns
-------
str : The download URL of the matching wheel or None if not found.
"""
return next(
(
download_url
for name, download_url in wheels
if python_version in name and architecture in name
),
None,
)


def check_and_install_pyicu():
package_name = "PyICU"
installed_packages = {pkg.key for pkg in pkg_resources.working_set}
if package_name.lower() not in installed_packages:
# print(f"{package_name} not found. Installing...")

# Fetch available wheels from GitHub to estimate download size.
wheels, total_size_mb = fetch_wheel_releases()

print(
f"{package_name} is not installed.\nIt will be downloaded from 'https://github.com/repos/cgohlke/pyicu'"
f"\nApproximately {total_size_mb:.2f} MB will be downloaded.\nDo you want to proceed? (Y/n)?"
)

user_input = input().strip().lower()
if user_input in ["", "y", "yes"]:
print("Proceeding with installation...")

else:
print("Installation aborted by the user.")
return False

# Check the operating system.
if platform.system() != "Windows":
# If not Windows, directly use pip to install PyICU.
try:
subprocess.run(
[sys.executable, "-m", "pip", "install", package_name], check=True
)
print(f"{package_name} has been installed successfully.")

except subprocess.CalledProcessError as e:
print(f"Error occurred while installing {package_name}: {e}")
return False

else:
# Windows-specific installation using wheel files.
python_version, architecture = get_python_version_and_architecture()

# Find the matching wheel for the current Python version and architecture.
wheel_url = find_matching_wheel(wheels, python_version, architecture)

if not wheel_url:
print(
"No matching wheel file found for your Python version and architecture."
)
return False

# Download the wheel file.
output_dir = Path.cwd() # use the current directory for simplicity
wheel_path = download_wheel_file(wheel_url, output_dir)

# Install PyICU using pip.
try:
subprocess.run(
[sys.executable, "-m", "pip", "install", wheel_path],
check=True,
)
print(f"{package_name} has been installed successfully.")

# Remove the downloaded wheel file.
os.remove(wheel_path)
print(f"Removed temporary file: {wheel_path}")

except subprocess.CalledProcessError as e:
print(f"Error occurred while installing {package_name}: {e}")
return False

return True
9 changes: 0 additions & 9 deletions src/scribe_data/cli/get.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,12 +160,3 @@ def get_data(

if interactive:
return True

# Handle emoji keywords process failure.
elif data_type in {"emoji-keywords", "emoji_keywords"}:
print(
"\nThe Scribe-Data emoji functionality is powered by PyICU, which is currently not installed."
)
print(
"Please check the installation guide at https://github.com/scribe-org/Scribe-Data/blob/main/src/scribe_data/unicode/UNICODE_INSTALLTION.md for more information.\n"
)
78 changes: 53 additions & 25 deletions src/scribe_data/unicode/generate_emoji_keywords.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@
import os
from pathlib import Path

from scribe_data.check.check_pyicu import (
check_and_install_pyicu,
check_if_pyicu_installed,
)
from scribe_data.unicode.process_unicode import gen_emoji_lexicon
from scribe_data.utils import export_formatted_data, get_language_iso

Expand All @@ -31,29 +35,53 @@


def generate_emoji(language, output_dir: str = None):
iso = get_language_iso(language=language)
path_to_cldr_annotations = (
Path(__file__).parent / "cldr-annotations-full" / "annotations"
)
if iso in os.listdir(path_to_cldr_annotations):
print(f"Emoji Generation for language {language} is supported")

else:
print(f"Emoji Generation for language {language} is not supported")
return

updated_path = output_dir[2:] if output_dir.startswith("./") else output_dir
export_dir = Path(updated_path) / language.capitalize()
export_dir.mkdir(parents=True, exist_ok=True)

if emoji_keywords_dict := gen_emoji_lexicon(
language=language,
emojis_per_keyword=EMOJI_KEYWORDS_DICT,
):
export_formatted_data(
file_path=output_dir,
formatted_data=emoji_keywords_dict,
query_data_in_use=True,
language=language,
data_type=DATA_TYPE,
"""
Generates emoji keywords for a specified language and exports the data to the given directory.
This function first checks and installs the PyICU package, which is necessary for the script to run.
If the installation is successful, it proceeds with generating emoji keywords based on the specified language.
The results are then exported to the provided output directory.
Parameters
----------
language : str
The ISO code of the language for which to generate emoji keywords.
output_dir : str, optional
The directory where the generated data will be saved.
If not specified, the data will be saved in a default directory.
Returns
-------
None: The function does not return any value but outputs data to the specified directory.
"""
if check_and_install_pyicu() and check_if_pyicu_installed() is False:
print("Thank you.")

if check_if_pyicu_installed():
iso = get_language_iso(language=language)
path_to_cldr_annotations = (
Path(__file__).parent / "cldr-annotations-full" / "annotations"
)
if iso in os.listdir(path_to_cldr_annotations):
print(f"Emoji Generation for language {language} is supported")

else:
print(f"Emoji Generation for language {language} is not supported")
return

updated_path = output_dir[2:] if output_dir.startswith("./") else output_dir
export_dir = Path(updated_path) / language.capitalize()
export_dir.mkdir(parents=True, exist_ok=True)

if emoji_keywords_dict := gen_emoji_lexicon(
language=language,
emojis_per_keyword=EMOJI_KEYWORDS_DICT,
):
export_formatted_data(
file_path=output_dir,
formatted_data=emoji_keywords_dict,
query_data_in_use=True,
language=language.capitalize(),
data_type=DATA_TYPE,
)

0 comments on commit 328d916

Please sign in to comment.