diff --git a/exasol/nb_connector/ai_lab_config.py b/exasol/nb_connector/ai_lab_config.py index bec507b..f7a7740 100644 --- a/exasol/nb_connector/ai_lab_config.py +++ b/exasol/nb_connector/ai_lab_config.py @@ -42,6 +42,9 @@ class AILabConfig(Enum): saas_database_id = auto() saas_database_name = auto() storage_backend = auto() + slc_target_dir = auto() + slc_source = auto() + slc_alias = auto() class StorageBackend(Enum): diff --git a/exasol/nb_connector/language_container_activation.py b/exasol/nb_connector/language_container_activation.py index 1736418..23effaa 100644 --- a/exasol/nb_connector/language_container_activation.py +++ b/exasol/nb_connector/language_container_activation.py @@ -1,5 +1,7 @@ from typing import Dict +import pyexasol # type: ignore + from exasol.nb_connector.secret_store import Secrets from exasol.nb_connector.connections import open_pyexasol_connection @@ -89,3 +91,12 @@ def get_activation_sql(conf: Secrets) -> str: # Build and return an SQL command for the language container activation. merged_langs_str = " ".join(f"{key}={value}" for key, value in lang_definitions.items()) return f"ALTER SESSION SET SCRIPT_LANGUAGES='{merged_langs_str}';" + + +def open_pyexasol_connection_with_lang_definitions(conf: Secrets, **kwargs) -> pyexasol.ExaConnection: + """ + Opens a `pyexasol` connection and applies the `ALTER SESSION` command using all registered languages. + """ + conn = open_pyexasol_connection(conf, **kwargs) + conn.execute(get_activation_sql(conf)) + return conn diff --git a/exasol/nb_connector/slct_manager.py b/exasol/nb_connector/slct_manager.py new file mode 100644 index 0000000..07137f3 --- /dev/null +++ b/exasol/nb_connector/slct_manager.py @@ -0,0 +1,226 @@ +import logging +import os +import re +import contextlib +import shutil +from collections import namedtuple +from typing import Optional, List + +from exasol_integration_test_docker_environment.lib.docker import ContextDockerClient # type: ignore +from git import Repo +from pathlib import Path +from exasol_script_languages_container_tool.lib import api as exaslct_api # type: ignore +from exasol.nb_connector.ai_lab_config import AILabConfig as CKey, AILabConfig +from exasol.nb_connector.language_container_activation import ACTIVATION_KEY_PREFIX +from exasol.nb_connector.secret_store import Secrets + +DEFAULT_ALIAS = "ai_lab_default" +PATH_IN_BUCKET = "container" + +# Activation SQL for the Custom SLC will be saved in the secret +# store with this key. +SLC_ACTIVATION_KEY_PREFIX = ACTIVATION_KEY_PREFIX + "slc_" + +# This is the flavor customers are supposed to use for modifications. +REQUIRED_FLAVOR = "template-Exasol-all-python-3.10" + +# Path to the used flavor within the script-languages-release repository +FLAVOR_PATH_IN_SLC_REPO = Path("flavors") / REQUIRED_FLAVOR + +PipPackageDefinition = namedtuple('PipPackageDefinition', ['pkg', 'version']) + + +class SlcDir: + def __init__(self, secrets: Secrets): + self._secrets = secrets + + @property + def root_dir(self) -> Path: + target_dir = self._secrets.get(AILabConfig.slc_target_dir) + if not target_dir: + raise RuntimeError("slc target dir is not defined in secrets.") + return Path(target_dir) + + @property + def flavor_dir(self) -> Path: + return self.root_dir / FLAVOR_PATH_IN_SLC_REPO + + @property + def custom_pip_file(self) -> Path: + """ + Returns the path to the custom pip file of the flavor + """ + return self.flavor_dir / "flavor_customization" / "packages" / "python3_pip_packages" + + @contextlib.contextmanager + def enter(self): + """Changes working directory and returns to previous on exit.""" + prev_cwd = Path.cwd() + os.chdir(self.root_dir) + try: + yield + finally: + os.chdir(prev_cwd) + + def __str__(self): + return str(self.root_dir) + + +class WorkingDir: + def __init__(self, p: Optional[Path]): + if p is None: + self.root_dir = Path.cwd() + else: + self.root_dir = p + + @property + def export_path(self): + """ + Returns the export path for script-languages-container + """ + return self.root_dir / "container" + + @property + def output_path(self): + """ + Returns the output path containing caches and logs. + """ + return self.root_dir / "output" + + def cleanup_output_path(self): + """ + Remove the output path recursively. + """ + shutil.rmtree(self.output_path) + + def cleanup_export_path(self): + """ + Remove the export path recursively + """ + shutil.rmtree(self.export_path) + + +class SlctManager: + def __init__(self, secrets: Secrets, working_path: Optional[Path] = None): + self.working_path = WorkingDir(working_path) + self.slc_dir = SlcDir(secrets) + self._secrets = secrets + + def check_slc_repo_complete(self) -> bool: + """ + Checks if the target dir for the script-languages repository is present and correct. + """ + print(f"Script-languages repository path is '{self.slc_dir}'") + if not self.slc_dir.flavor_dir.is_dir(): + return False + return True + + def clone_slc_repo(self): + """ + Clones the script-languages-release repository from Github into the target dir configured in the secret store. + """ + if not self.slc_dir.root_dir.is_dir(): + logging.info(f"Cloning into {self.slc_dir}...") + repo = Repo.clone_from("https://github.com/exasol/script-languages-release", self.slc_dir.root_dir) + logging.info("Fetching submodules...") + repo.submodule_update(recursive=True) + else: + logging.warning(f"Directory '{self.slc_dir}' already exists. Skipping cloning....") + + def export(self): + """ + Exports the current script-languages-container to the export directory. + """ + with self.slc_dir.enter(): + exaslct_api.export(flavor_path=(str(FLAVOR_PATH_IN_SLC_REPO),), + export_path=str(self.working_path.export_path), + output_directory=str(self.working_path.output_path), + release_name=self.language_alias,) + + def upload(self): + """ + Uploads the current script-languages-container to the database + and stores the activation string in the secret store. + """ + bucketfs_name = self._secrets.get(CKey.bfs_service) + bucket_name = self._secrets.get(CKey.bfs_bucket) + database_host = self._secrets.get(CKey.bfs_host_name) + bucketfs_port = self._secrets.get(CKey.bfs_port) + bucketfs_username = self._secrets.get(CKey.bfs_user) + bucketfs_password = self._secrets.get(CKey.bfs_password) + + with self.slc_dir.enter(): + exaslct_api.upload(flavor_path=(str(FLAVOR_PATH_IN_SLC_REPO),), + database_host=database_host, + bucketfs_name=bucketfs_name, + bucket_name=bucket_name, bucketfs_port=int(bucketfs_port), + bucketfs_username=bucketfs_username, + bucketfs_password=bucketfs_password, path_in_bucket=PATH_IN_BUCKET, + release_name=self.language_alias, + output_directory=str(self.working_path.output_path)) + container_name = f"{REQUIRED_FLAVOR}-release-{self.language_alias}" + result = exaslct_api.generate_language_activation(flavor_path=str(FLAVOR_PATH_IN_SLC_REPO), + bucketfs_name=bucketfs_name, + bucket_name=bucket_name, container_name=container_name, + path_in_bucket=PATH_IN_BUCKET) + + alter_session_cmd = result[0] + re_res = re.search(r"ALTER SESSION SET SCRIPT_LANGUAGES='(.*)'", alter_session_cmd) + activation_key = re_res.groups()[0] + _, url = activation_key.split("=", maxsplit=1) + self._secrets.save(self._alias_key, f"{self.language_alias}={url}") + + @property + def _alias_key(self): + return SLC_ACTIVATION_KEY_PREFIX + self.language_alias + + @property + def activation_key(self) -> str: + """ + Returns the language activation string for the uploaded script-language-container. + Can be used in `ALTER SESSION` or `ALTER_SYSTEM` SQL commands to activate + the language of the uploaded script-language-container. + """ + activation_key = self._secrets.get(self._alias_key) + if not activation_key: + raise RuntimeError("SLC activation key not defined in secrets.") + return activation_key + + @property + def language_alias(self) -> str: + """ + Returns the stored language alias. + """ + language_alias = self._secrets.get(AILabConfig.slc_alias, DEFAULT_ALIAS) + if not language_alias: + return DEFAULT_ALIAS + return language_alias + + @language_alias.setter + def language_alias(self, alias: str): + """ + Stores the language alias in the secret store. + """ + self._secrets.save(AILabConfig.slc_alias, alias) + + def append_custom_packages(self, pip_packages: List[PipPackageDefinition]): + """ + Appends packages to the custom pip file. + Note: This method is not idempotent: Multiple calls with the same package definitions will result in duplicated entries. + """ + with open(self.slc_dir.custom_pip_file, "a") as f: + for p in pip_packages: + print(f"{p.pkg}|{p.version}", file=f) + + @property + def slc_docker_images(self): + with ContextDockerClient() as docker_client: + images = docker_client.images.list(name="exasol/script-language-container") + image_tags = [img.tags[0] for img in images] + return image_tags + + def clean_all_images(self): + """ + Deletes all local docker images. + """ + exaslct_api.clean_all_images(output_directory=str(self.working_path.output_path)) diff --git a/poetry.lock b/poetry.lock index 6fa49cf..b784391 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2656,6 +2656,20 @@ tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""} [package.extras] testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] +[[package]] +name = "pytest-dependency" +version = "0.6.0" +description = "Manage dependencies of tests" +optional = false +python-versions = ">=3.4" +files = [ + {file = "pytest-dependency-0.6.0.tar.gz", hash = "sha256:934b0e6a39d95995062c193f7eaeed8a8ffa06ff1bcef4b62b0dc74a708bacc1"}, +] + +[package.dependencies] +pytest = ">=3.7.0" +setuptools = "*" + [[package]] name = "pytest-mock" version = "3.14.0" @@ -2780,6 +2794,7 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, @@ -4417,4 +4432,4 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", [metadata] lock-version = "2.0" python-versions = ">=3.10,<4.0" -content-hash = "6cc43a6605086a2a64e1f4812d49b176ce33425adb671003d3cf65c4b44e5f2d" +content-hash = "db3d47a8e79f2f260c3eb06165a2f39391a066796aa35f4f2fd83076db194ea6" diff --git a/pyproject.toml b/pyproject.toml index 1275caa..e743c7c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,6 +41,8 @@ types-requests = "^2.31.0.6" ifaddr = "^0.2.0" exasol-saas-api = {git = "https://github.com/exasol/saas-api-python.git", branch = "main"} ibis-framework = {extras = ["exasol"], version = "^9.1.0"} +exasol-script-languages-container-tool = ">=0.19.0" +GitPython = ">=2.1.0" [build-system] @@ -51,6 +53,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry.dev-dependencies] pytest = "^7.1.1" pytest-mock = "^3.7.0" +pytest_dependency = ">=0.6.0" exasol-toolbox = "^0.5.0" diff --git a/test/integration/test_slct_manager.py b/test/integration/test_slct_manager.py new file mode 100644 index 0000000..c7f5ca3 --- /dev/null +++ b/test/integration/test_slct_manager.py @@ -0,0 +1,193 @@ +import textwrap +from pathlib import Path +from tempfile import TemporaryDirectory +from typing import List, Tuple + +import pytest +from exasol_integration_test_docker_environment.lib.docker import ContextDockerClient + +from exasol.nb_connector.ai_lab_config import AILabConfig +from exasol.nb_connector.itde_manager import bring_itde_up +from exasol.nb_connector.language_container_activation import open_pyexasol_connection_with_lang_definitions +from exasol.nb_connector.secret_store import Secrets +from exasol.nb_connector.slct_manager import SlctManager, PipPackageDefinition +from test.integration.test_itde_manager import remove_itde + + +@pytest.fixture(scope="module") +def working_path() -> Path: + with TemporaryDirectory() as d: + yield Path(d) + + +@pytest.fixture(scope="module") +def secrets_file(working_path: Path) -> Path: + return working_path / "sample_database.db" + + +@pytest.fixture(scope="module") +def slc_secrets(secrets_file, working_path) -> Secrets: + secrets = Secrets(secrets_file, master_password="abc") + secrets.save(AILabConfig.slc_target_dir, str(working_path / "script_languages_release")) + return secrets + + +@pytest.fixture(scope="module") +def slct_manager(slc_secrets: Secrets, working_path: Path) -> SlctManager: + return SlctManager(slc_secrets, working_path) + + +@pytest.fixture(scope="module") +def itde(slc_secrets: Secrets): + bring_itde_up(slc_secrets) + yield + remove_itde() + + +@pytest.fixture +def custom_packages() -> List[Tuple[str, str, str]]: + return [("xgboost", "2.0.3", "xgboost"), ("scikit-learn", "1.5.0", "sklearn")] + + +@pytest.mark.dependency(name="clone") +def test_clone_slc(slct_manager): + slct_manager.clone_slc_repo() + + +@pytest.mark.dependency( + name="check_config", depends=["clone"] +) +def test_check_slc_config(slct_manager): + config_ok = slct_manager.check_slc_repo_complete() + assert config_ok + + +@pytest.mark.dependency( + name="export_slc", depends=["check_config"] +) +def test_export_slc(slct_manager): + slct_manager.export() + export_path = slct_manager.working_path.export_path + assert export_path.exists() + tgz = [f for f in export_path.glob("*.tar.gz")] + assert len(tgz) == 1 + assert tgz[0].is_file() + tgz_sum = [f for f in export_path.glob("*.tar.gz.sha512sum")] + assert len(tgz_sum) == 1 + assert tgz_sum[0].is_file() + +@pytest.mark.dependency( + name="slc_images", depends=["export_slc"] +) +def test_slc_images(slct_manager): + images = slct_manager.slc_docker_images + assert len(images) > 0 + for img in images: + assert "exasol/script-language-container" in img + + +@pytest.mark.dependency( + name="upload_slc", depends=["check_config"] +) +def test_upload(slct_manager: SlctManager, itde): + slct_manager.language_alias = "my_python" + slct_manager.upload() + assert slct_manager.activation_key == "my_python=localzmq+protobuf:///bfsdefault/default/container/template-Exasol-all-python-3.10-release-my_python?lang=python#buckets/bfsdefault/default/container/template-Exasol-all-python-3.10-release-my_python/exaudf/exaudfclient_py3" + + +@pytest.mark.dependency( + name="append_custom_packages", depends=["upload_slc"] +) +def test_append_custom_packages(slct_manager: SlctManager, custom_packages: List[Tuple[str, str, str]]): + slct_manager.append_custom_packages([PipPackageDefinition(pkg, version) for pkg, version, _ in custom_packages]) + with open(slct_manager.slc_dir.custom_pip_file, "r") as f: + pip_content = f.read() + for custom_package, version, _ in custom_packages: + assert f"{custom_package}|{version}" in pip_content + + +@pytest.mark.dependency( + name="upload_slc_with_new_packages", depends=["append_custom_packages"] +) +def test_upload_slc_with_new_packages(slc_secrets: Secrets, slct_manager: SlctManager, + custom_packages: List[Tuple[str, str, str]]): + slct_manager.language_alias = "my_new_python" + slct_manager.upload() + assert slct_manager.activation_key == "my_new_python=localzmq+protobuf:///bfsdefault/default/container/template-Exasol-all-python-3.10-release-my_new_python?lang=python#buckets/bfsdefault/default/container/template-Exasol-all-python-3.10-release-my_new_python/exaudf/exaudfclient_py3" + + +@pytest.mark.dependency( + name="udf_with_new_packages", depends=["upload_slc_with_new_packages"] +) +def test_udf_with_new_packages(slc_secrets: Secrets, slct_manager: SlctManager, + custom_packages: List[Tuple[str, str, str]]): + import_statements = "\n".join(f" import {module}" for pkg, version, module in custom_packages) + udf = textwrap.dedent(f""" +CREATE OR REPLACE {slct_manager.language_alias} SET SCRIPT test_custom_packages(i integer) +EMITS (o VARCHAR(2000000)) AS +def run(ctx): +{import_statements} + + ctx.emit("success") +/ + """) + con = open_pyexasol_connection_with_lang_definitions(slc_secrets) + try: + con.execute("CREATE SCHEMA TEST") + con.execute(udf) + res = con.execute("select test_custom_packages(1)") + rows = res.fetchall() + assert rows == [('success',)] + finally: + con.close() + + +@pytest.mark.dependency( + name="test_old_alias", depends=["udf_with_new_packages"] +) +def test_old_alias(slc_secrets: Secrets, slct_manager: SlctManager): + + udf = textwrap.dedent(f""" +CREATE OR REPLACE my_python SET SCRIPT test_old_slc(i integer) +EMITS (o VARCHAR(2000000)) AS +def run(ctx): + ctx.emit("success") +/ + """) + con = open_pyexasol_connection_with_lang_definitions(slc_secrets, schema='TEST') + try: + con.execute(udf) + res = con.execute("select test_old_slc(1)") + rows = res.fetchall() + assert rows == [('success',)] + finally: + con.close() + + +@pytest.mark.dependency( + name="clean_up_images", depends=["upload_slc_with_new_packages"] +) +def test_clean_up_images(slct_manager: SlctManager): + slct_manager.clean_all_images() + with ContextDockerClient() as docker_client: + images = docker_client.images.list(name="exasol/script-language-container") + assert len(images) == 0 + + +@pytest.mark.dependency( + name="clean_up_output_path", depends=["clean_up_images"] +) +def test_clean_output(slct_manager: SlctManager): + slct_manager.working_path.cleanup_output_path() + p = Path(slct_manager.working_path.output_path) + assert not p.is_dir() + + +@pytest.mark.dependency( + name="clean_up_export_path", depends=["clean_up_images"] +) +def test_clean_export(slct_manager: SlctManager): + slct_manager.working_path.cleanup_export_path() + p = Path(slct_manager.working_path.export_path) + assert not p.is_dir() +