diff --git a/src/scribe_data/check/check_pyicu.py b/src/scribe_data/check/check_pyicu.py new file mode 100644 index 00000000..c67b4d3b --- /dev/null +++ b/src/scribe_data/check/check_pyicu.py @@ -0,0 +1,211 @@ +""" +Check to see if the requirements of the emoji process are installed. + +.. raw:: html + +""" + +import os +import platform # added to check the OS +import subprocess +import sys +from pathlib import Path + +import pkg_resources +import requests + + +def check_if_pyicu_installed(): + installed_packages = {pkg.key for pkg in pkg_resources.working_set} + + return "pyicu" in installed_packages + + +def get_python_version_and_architecture(): + """ + Get the current Python version and architecture. + + Returns + ------- + python_version : str + The Python version in the format 'cpXY'. + + architecture : str + The architecture type ('amd64' or 'win32'). + """ + version = sys.version_info + python_version = f"cp{version.major}{version.minor}" + architecture = "win_amd64" if sys.maxsize > 2**32 else "win32" + return python_version, architecture + + +def fetch_wheel_releases(): + """ + Fetch the release data for PyICU from GitHub. + + Returns + ------- + available_wheels : list + A list of tuples containing wheel file names and their download URLs. + + total_size_mb : float + The total size of all available wheels in MB. + """ + url = "https://api.github.com/repos/cgohlke/pyicu-build/releases" + response = requests.get(url) + response.raise_for_status() # raise an error for bad responses + + available_wheels = [] + total_size_bytes = 0 + + for release in response.json(): + for asset in release["assets"]: + if asset["name"].endswith(".whl"): + available_wheels.append((asset["name"], asset["browser_download_url"])) + total_size_bytes += asset["size"] + + total_size_mb = total_size_bytes / (1024 * 1024) # convert bytes to MB + return available_wheels, total_size_mb + + +def download_wheel_file(wheel_url, output_dir): + """ + Download the wheel file from the given URL. + + Parameters + ---------- + wheel_url : str + The URL of the wheel file to download. + + output_dir : str + The directory to save the downloaded file. + + Returns + ------- + str : path to the downloaded wheel file. + """ + response = requests.get(wheel_url) + response.raise_for_status() # raise an error for bad responses + + wheel_filename = os.path.basename(wheel_url) + wheel_path = os.path.join(output_dir, wheel_filename) + + with open(wheel_path, "wb") as wheel_file: + wheel_file.write(response.content) + + return wheel_path + + +def find_matching_wheel(wheels, python_version, architecture): + """ + Find the matching wheel file based on Python version and architecture. + + Parameters + ---------- + wheels : list + The list of available wheels. + + python_version : str + The Python version (e.g., 'cp311'). + + architecture : str + The architecture type (e.g., 'win_amd64'). + + Returns + ------- + str : The download URL of the matching wheel or None if not found. + """ + return next( + ( + download_url + for name, download_url in wheels + if python_version in name and architecture in name + ), + None, + ) + + +def check_and_install_pyicu(): + package_name = "PyICU" + installed_packages = {pkg.key for pkg in pkg_resources.working_set} + if package_name.lower() not in installed_packages: + # print(f"{package_name} not found. Installing...") + + # Fetch available wheels from GitHub to estimate download size. + wheels, total_size_mb = fetch_wheel_releases() + + print( + f"{package_name} is not installed.\nIt will be downloaded from 'https://github.com/repos/cgohlke/pyicu'" + f"\nApproximately {total_size_mb:.2f} MB will be downloaded.\nDo you want to proceed? (Y/n)?" + ) + + user_input = input().strip().lower() + if user_input in ["", "y", "yes"]: + print("Proceeding with installation...") + + else: + print("Installation aborted by the user.") + return False + + # Check the operating system. + if platform.system() != "Windows": + # If not Windows, directly use pip to install PyICU. + try: + subprocess.run( + [sys.executable, "-m", "pip", "install", package_name], check=True + ) + print(f"{package_name} has been installed successfully.") + + except subprocess.CalledProcessError as e: + print(f"Error occurred while installing {package_name}: {e}") + return False + + else: + # Windows-specific installation using wheel files. + python_version, architecture = get_python_version_and_architecture() + + # Find the matching wheel for the current Python version and architecture. + wheel_url = find_matching_wheel(wheels, python_version, architecture) + + if not wheel_url: + print( + "No matching wheel file found for your Python version and architecture." + ) + return False + + # Download the wheel file. + output_dir = Path.cwd() # use the current directory for simplicity + wheel_path = download_wheel_file(wheel_url, output_dir) + + # Install PyICU using pip. + try: + subprocess.run( + [sys.executable, "-m", "pip", "install", wheel_path], + check=True, + ) + print(f"{package_name} has been installed successfully.") + + # Remove the downloaded wheel file. + os.remove(wheel_path) + print(f"Removed temporary file: {wheel_path}") + + except subprocess.CalledProcessError as e: + print(f"Error occurred while installing {package_name}: {e}") + return False + + return True diff --git a/src/scribe_data/cli/get.py b/src/scribe_data/cli/get.py index 3bde5383..3e4dd277 100644 --- a/src/scribe_data/cli/get.py +++ b/src/scribe_data/cli/get.py @@ -160,12 +160,3 @@ def get_data( if interactive: return True - - # Handle emoji keywords process failure. - elif data_type in {"emoji-keywords", "emoji_keywords"}: - print( - "\nThe Scribe-Data emoji functionality is powered by PyICU, which is currently not installed." - ) - print( - "Please check the installation guide at https://github.com/scribe-org/Scribe-Data/blob/main/src/scribe_data/unicode/UNICODE_INSTALLTION.md for more information.\n" - ) diff --git a/src/scribe_data/unicode/generate_emoji_keywords.py b/src/scribe_data/unicode/generate_emoji_keywords.py index 756f06b3..beb34257 100644 --- a/src/scribe_data/unicode/generate_emoji_keywords.py +++ b/src/scribe_data/unicode/generate_emoji_keywords.py @@ -23,6 +23,10 @@ import os from pathlib import Path +from scribe_data.check.check_pyicu import ( + check_and_install_pyicu, + check_if_pyicu_installed, +) from scribe_data.unicode.process_unicode import gen_emoji_lexicon from scribe_data.utils import export_formatted_data, get_language_iso @@ -31,29 +35,53 @@ def generate_emoji(language, output_dir: str = None): - iso = get_language_iso(language=language) - path_to_cldr_annotations = ( - Path(__file__).parent / "cldr-annotations-full" / "annotations" - ) - if iso in os.listdir(path_to_cldr_annotations): - print(f"Emoji Generation for language {language} is supported") - - else: - print(f"Emoji Generation for language {language} is not supported") - return - - updated_path = output_dir[2:] if output_dir.startswith("./") else output_dir - export_dir = Path(updated_path) / language.capitalize() - export_dir.mkdir(parents=True, exist_ok=True) - - if emoji_keywords_dict := gen_emoji_lexicon( - language=language, - emojis_per_keyword=EMOJI_KEYWORDS_DICT, - ): - export_formatted_data( - file_path=output_dir, - formatted_data=emoji_keywords_dict, - query_data_in_use=True, - language=language, - data_type=DATA_TYPE, + """ + Generates emoji keywords for a specified language and exports the data to the given directory. + + This function first checks and installs the PyICU package, which is necessary for the script to run. + If the installation is successful, it proceeds with generating emoji keywords based on the specified language. + The results are then exported to the provided output directory. + + Parameters + ---------- + language : str + The ISO code of the language for which to generate emoji keywords. + + output_dir : str, optional + The directory where the generated data will be saved. + If not specified, the data will be saved in a default directory. + + Returns + ------- + None: The function does not return any value but outputs data to the specified directory. + """ + if check_and_install_pyicu() and check_if_pyicu_installed() is False: + print("Thank you.") + + if check_if_pyicu_installed(): + iso = get_language_iso(language=language) + path_to_cldr_annotations = ( + Path(__file__).parent / "cldr-annotations-full" / "annotations" ) + if iso in os.listdir(path_to_cldr_annotations): + print(f"Emoji Generation for language {language} is supported") + + else: + print(f"Emoji Generation for language {language} is not supported") + return + + updated_path = output_dir[2:] if output_dir.startswith("./") else output_dir + export_dir = Path(updated_path) / language.capitalize() + export_dir.mkdir(parents=True, exist_ok=True) + + if emoji_keywords_dict := gen_emoji_lexicon( + language=language, + emojis_per_keyword=EMOJI_KEYWORDS_DICT, + ): + export_formatted_data( + file_path=output_dir, + formatted_data=emoji_keywords_dict, + query_data_in_use=True, + language=language.capitalize(), + data_type=DATA_TYPE, + )