Skip to content

Commit

Permalink
#133 override language def and use internal bfs host/port (#134)
Browse files Browse the repository at this point in the history
* #133 Fixed a couple of defects

* #133 Added tests

* #133 Added tests

* #133 Addressed review comments

* #133 Fixed an error in the language_definition_context

* #133 Fixed an error in the test_sagemaker_extension_wrapper.py
  • Loading branch information
ahsimb authored Sep 5, 2024
1 parent e3a8525 commit c66d2e6
Show file tree
Hide file tree
Showing 9 changed files with 139 additions and 39 deletions.
6 changes: 6 additions & 0 deletions doc/changes/changes_0.3.0.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,9 @@
## Features

* #131: Let the itde_manager use an instance of the DockerDB provided externally.

## Bugfixes

* #133: Fixed the following defects:
- Using the `allow_override` option when deploying a language container.
- Storing internal bucket-fs host and port in a bucket-fs connection object.
2 changes: 2 additions & 0 deletions exasol/nb_connector/ai_lab_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ class AILabConfig(Enum):
client_key = auto()
bfs_host_name = auto()
bfs_port = auto()
bfs_internal_host_name = auto()
bfs_internal_port = auto()
bfs_service = auto()
bfs_bucket = auto()
bfs_user = auto()
Expand Down
8 changes: 6 additions & 2 deletions exasol/nb_connector/extension_wrapper_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,9 +145,13 @@ def to_json_str(**kwargs) -> str:

backend = get_backend(conf)
if backend == StorageBackend.onprem:
host = conf.get(CKey.bfs_host_name, conf.get(CKey.db_host_name))
# Here we are using the internal bucket-fs host and port, falling back
# to the external parameters if the former are not specified.
host = conf.get(CKey.bfs_internal_host_name,
conf.get(CKey.bfs_host_name, conf.get(CKey.db_host_name)))
port = conf.get(CKey.bfs_internal_port, conf.get(CKey.bfs_port))
protocol = "https" if str_to_bool(conf, CKey.bfs_encryption, True) else "http"
url = f"{protocol}://{host}:{conf.get(CKey.bfs_port)}"
url = f"{protocol}://{host}:{port}"
verify: Optional[bool] = (False if conf.get(CKey.trusted_ca)
else optional_str_to_bool(conf.get(CKey.cert_vld)))
conn_to = to_json_str(backend=bfs.path.StorageBackend.onprem.name,
Expand Down
18 changes: 14 additions & 4 deletions exasol/nb_connector/sagemaker_extension_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@

def deploy_language_container(conf: Secrets,
version: str,
language_alias: str) -> None:
language_alias: str,
allow_override: bool) -> None:
"""
Calls the Sagemaker Extension's language container deployment API.
Downloads the specified released version of the extension from the GitHub
Expand All @@ -56,6 +57,8 @@ def deploy_language_container(conf: Secrets,
Sagemaker Extension version.
language_alias:
The language alias of the extension's language container.
allow_override:
If True allows overriding the language definition.
"""

deployer = SmeLanguageContainerDeployer.create( # pylint: disable=unexpected-keyword-arg
Expand All @@ -65,7 +68,8 @@ def deploy_language_container(conf: Secrets,
)

# Install the language container.
deployer.download_from_github_and_run(version, False)
deployer.download_from_github_and_run(version, alter_system=False,
allow_override=allow_override)

# Save the activation SQL in the secret store.
language_def = deployer.get_language_definition(deployer.SLC_NAME)
Expand Down Expand Up @@ -99,7 +103,8 @@ def initialize_sme_extension(conf: Secrets,
language_alias: str = LANGUAGE_ALIAS,
run_deploy_container: bool = True,
run_deploy_scripts: bool = True,
run_encapsulate_aws_credentials: bool = True) -> None:
run_encapsulate_aws_credentials: bool = True,
allow_override: bool = True) -> None:
"""
Performs all necessary operations to get the Sagemaker Extension
up and running. See the "Getting Started" and "Setup" sections of the
Expand All @@ -122,13 +127,18 @@ def initialize_sme_extension(conf: Secrets,
run_encapsulate_aws_credentials:
If set to False will skip the creation of the database connection
object encapsulating the AWS credentials.
allow_override:
If True allows overriding the language definition. Otherwise, if
the database already has a language definition for the specified
language alias, an attempt to deploy the container will result
in a RuntimeError.
"""

# Make the connection object name
aws_conn_name = "_".join([AWS_CONNECTION_PREFIX, str(conf.get(CKey.db_user))])

if run_deploy_container:
deploy_language_container(conf, version, language_alias)
deploy_language_container(conf, version, language_alias, allow_override)

# Create the required objects in the database
if run_deploy_scripts:
Expand Down
19 changes: 15 additions & 4 deletions exasol/nb_connector/transformers_extension_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@

def deploy_language_container(conf: Secrets,
version: str,
language_alias: str) -> None:
language_alias: str,
allow_override: bool) -> None:
"""
Calls the Transformers Extension's language container deployment API.
Downloads the specified released version of the extension from the GitHub
Expand All @@ -62,12 +63,15 @@ def deploy_language_container(conf: Secrets,
Parameters:
conf:
The secret store. The store must contain the DB connection parameters
and the parameters of the BucketFS service.
version:
Transformers Extension version.
language_alias:
The language alias of the extension's language container.
allow_override:
If True allows overriding the language definition.
"""

deployer = TeLanguageContainerDeployer.create(
Expand All @@ -77,7 +81,8 @@ def deploy_language_container(conf: Secrets,
)

# Install the language container.
deployer.download_from_github_and_run(version, False)
deployer.download_from_github_and_run(version, alter_system=False,
allow_override=allow_override)

# Save the activation SQL in the secret store.
language_def = deployer.get_language_definition(deployer.SLC_NAME)
Expand Down Expand Up @@ -113,7 +118,8 @@ def initialize_te_extension(conf: Secrets,
run_deploy_container: bool = True,
run_deploy_scripts: bool = True,
run_encapsulate_bfs_credentials: bool = True,
run_encapsulate_hf_token: bool = True) -> None:
run_encapsulate_hf_token: bool = True,
allow_override: bool = True) -> None:
"""
Performs all necessary operations to get the Transformers Extension
up and running. See the "Getting Started" and "Setup" sections of the
Expand All @@ -139,6 +145,11 @@ def initialize_te_extension(conf: Secrets,
run_encapsulate_hf_token:
If set to False will skip the creation of the database connection
object encapsulating the Huggingface token.
allow_override:
If True allows overriding the language definition. Otherwise, if
the database already has a language definition for the specified
language alias, an attempt to deploy the container will result
in a RuntimeError.
"""

# Make the connection object names
Expand All @@ -148,7 +159,7 @@ def initialize_te_extension(conf: Secrets,
hf_conn_name = "_".join([HF_CONNECTION_PREFIX, db_user]) if token else ""

if run_deploy_container:
deploy_language_container(conf, version, language_alias)
deploy_language_container(conf, version, language_alias, allow_override)

# Create the required objects in the database
if run_deploy_scripts:
Expand Down
26 changes: 17 additions & 9 deletions test/integration/test_sagemaker_extension_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
activate_languages,
assert_connection_exists,
assert_run_empty_udf,
get_script_counts
get_script_counts,
language_definition_context,
)


Expand All @@ -22,13 +23,20 @@ def test_initialize_sme_extension(
secrets.save(CKey.aws_access_key_id, "FAKEKEYIDDONTUSEIT")
secrets.save(CKey.aws_secret_access_key, "FakeSecretAccessKeyDontTryToUseIt")

# Run the extension deployment.
initialize_sme_extension(secrets)
# At the moment the language alias is hard-coded in the extension,
# so we have to use this exact value.
language_alias = 'PYTHON3_SME'

with open_pyexasol_connection(secrets) as pyexasol_connection:
activate_languages(pyexasol_connection, secrets)
assert_run_empty_udf("PYTHON3_SME", pyexasol_connection, secrets)
script_counts = get_script_counts(pyexasol_connection, secrets)
assert script_counts["SCRIPTING"] >= 4
assert script_counts["UDF"] >= 5
assert_connection_exists(secrets.get(CKey.sme_aws_connection), pyexasol_connection)

with language_definition_context(pyexasol_connection, language_alias):

# Run the extension deployment.
initialize_sme_extension(secrets, language_alias=language_alias)

activate_languages(pyexasol_connection, secrets)
assert_run_empty_udf(language_alias, pyexasol_connection, secrets)
script_counts = get_script_counts(pyexasol_connection, secrets)
assert script_counts["SCRIPTING"] >= 4
assert script_counts["UDF"] >= 5
assert_connection_exists(secrets.get(CKey.sme_aws_connection), pyexasol_connection)
26 changes: 13 additions & 13 deletions test/integration/test_transformers_extension_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,28 +10,28 @@
activate_languages,
assert_connection_exists,
assert_run_empty_udf,
get_script_counts
get_script_counts,
language_definition_context,
)


@pytest.mark.skip(reason="The expected functionality is not yet implemented in the Transformers Extension")
def test_initialize_te_extension(
request: FixtureRequest,
secrets: Secrets,
setup_itde
):
test_name: str = request.node.name
language_alias = f"PYTHON3_TE_{test_name.upper()}"
language_alias = f"PYTHON3_TE_TEST"
secrets.save(CKey.huggingface_token, "abc")

with open_pyexasol_connection(secrets) as pyexasol_connection:

# Run the extension deployment.
initialize_te_extension(secrets, language_alias=language_alias)
with language_definition_context(pyexasol_connection, language_alias):

activate_languages(pyexasol_connection, secrets)
assert_run_empty_udf(language_alias, pyexasol_connection, secrets)
script_counts = get_script_counts(pyexasol_connection, secrets)
assert script_counts["UDF"] > 5
assert_connection_exists(secrets.get(CKey.te_bfs_connection), pyexasol_connection)
assert_connection_exists(secrets.get(CKey.te_hf_connection), pyexasol_connection)
# Run the extension deployment.
initialize_te_extension(secrets, language_alias=language_alias)

activate_languages(pyexasol_connection, secrets)
assert_run_empty_udf(language_alias, pyexasol_connection, secrets)
script_counts = get_script_counts(pyexasol_connection, secrets)
assert script_counts["UDF"] > 5
assert_connection_exists(secrets.get(CKey.te_bfs_connection), pyexasol_connection)
assert_connection_exists(secrets.get(CKey.te_hf_connection), pyexasol_connection)
36 changes: 31 additions & 5 deletions test/unit/test_extension_wrapper_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,21 @@
from exasol.nb_connector.ai_lab_config import AILabConfig as CKey
from exasol.nb_connector.extension_wrapper_common import encapsulate_bucketfs_credentials

DB_HOST = '1.2.3.4'


@pytest.fixture
def filled_secrets(secrets) -> Secrets:
secrets.save(CKey.db_host_name, 'localhost')
secrets.save(CKey.db_host_name, DB_HOST)
secrets.save(CKey.db_port, '8888')
secrets.save(CKey.db_user, 'user')
secrets.save(CKey.db_password, 'password')
secrets.save(CKey.db_password, 'my_db_password')
secrets.save(CKey.bfs_port, '6666')
secrets.save(CKey.bfs_encryption, 'True')
secrets.save(CKey.bfs_service, 'bfsdefault')
secrets.save(CKey.bfs_bucket, 'default')
secrets.save(CKey.bfs_user, 'user'),
secrets.save(CKey.bfs_password, 'password')
secrets.save(CKey.bfs_password, 'my_bfs_password')
return secrets


Expand Down Expand Up @@ -61,13 +63,37 @@ def test_bucketfs_credentials_default(mock_connect, filled_secrets):
query_params = mock_connection.execute.call_args_list[0].kwargs['query_params']
validate_params(query_params['BUCKETFS_ADDRESS'], (
['backend', 'url', 'service_name', 'bucket_name', 'path'],
['onprem', 'https://localhost:6666', 'bfsdefault', 'default', path_in_bucket]
['onprem', f'https://{DB_HOST}:6666', 'bfsdefault', 'default', path_in_bucket]
))
validate_params(query_params['BUCKETFS_USER'], (
['username'], ['user']
))
validate_params(query_params['BUCKETFS_PASSWORD'], (
['password'], ['password']
['password'], ['my_bfs_password']
))


@unittest.mock.patch("pyexasol.connect")
def test_bucketfs_credentials_internal(mock_connect, filled_secrets):

path_in_bucket = 'location'
internal_host = 'localhost'
internal_port = 3377
filled_secrets.save(CKey.bfs_internal_host_name, internal_host)
filled_secrets.save(CKey.bfs_internal_port, str(internal_port))

mock_connection = unittest.mock.MagicMock()
mock_connection.__enter__.return_value = mock_connection
mock_connect.return_value = mock_connection

encapsulate_bucketfs_credentials(filled_secrets, path_in_bucket=path_in_bucket,
connection_name='whatever')

mock_connection.execute.assert_called_once()
query_params = mock_connection.execute.call_args_list[0].kwargs['query_params']
validate_params(query_params['BUCKETFS_ADDRESS'], (
['backend', 'url', 'service_name', 'bucket_name', 'path'],
['onprem', f'https://{internal_host}:{internal_port}', 'bfsdefault', 'default', path_in_bucket]
))


Expand Down
37 changes: 35 additions & 2 deletions test/utils/integration_test_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations
import textwrap
from typing import Dict
from contextlib import contextmanager
import pytest

from pyexasol import ExaConnection
Expand Down Expand Up @@ -71,7 +72,7 @@ def run(ctx):

def get_script_counts(
pyexasol_connection: ExaConnection, secrets: Secrets
) -> Dict[str, int]:
) -> dict[str, int]:
"""
Returns numbers of installed scripts of different types.
"""
Expand Down Expand Up @@ -99,3 +100,35 @@ def assert_connection_exists(
"""
).fetchall()
assert result


@contextmanager
def language_definition_context(pyexasol_connection: ExaConnection,
language_alias: str | None = None) -> None:
"""
A context manager that preserves the current language definitions at both
SESSION and SYSTEM levels. Optionally creates a definition for the specified
alias to test the ability to override an existing definition.
"""
def alter_language_settings(alter_type: str, lang_definition: str):
sql = f"ALTER {alter_type} SET SCRIPT_LANGUAGES='{lang_definition}';"
pyexasol_connection.execute(sql)

# Remember the current language settings.
alter_types = ['SYSTEM', 'SESSION']
sql0 = (f"""SELECT {', '.join(alter_type + '_VALUE' for alter_type in alter_types)} """
"FROM SYS.EXA_PARAMETERS WHERE PARAMETER_NAME='SCRIPT_LANGUAGES';")
current_definitions = pyexasol_connection.execute(sql0).fetchall()[0]

for alter_type in alter_types:
# Creates a trivial language definition for the specified alias.
if language_alias:
lang_def = ('PYTHON=builtin_python R=builtin_r JAVA=builtin_java '
f'PYTHON3=builtin_python3 {language_alias}=builtin_python3')
alter_language_settings(alter_type, lang_def)
try:
yield
finally:
# Restore language settings.
for alter_type, lang_def in zip(alter_types, current_definitions):
alter_language_settings(alter_type, lang_def)

0 comments on commit c66d2e6

Please sign in to comment.