Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sagemaker extension wrapper #57

Merged
2 changes: 2 additions & 0 deletions doc/changes/changelog.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Changes

* [0.2.6](changes_0.2.6.md)
* [0.2.5](changes_0.2.5.md)
* [0.2.4](changes_0.2.4.md)
* [0.2.3](changes_0.2.3.md)
Expand All @@ -14,6 +15,7 @@
---
hidden:
---
changes_0.2.6
changes_0.2.5
changes_0.2.4
changes_0.2.3
Expand Down
14 changes: 14 additions & 0 deletions doc/changes/changes_0.2.6.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Exasol Notebook Connector 0.2.6, released T.B.C.

## Summary

This release adds the extension wrappers and makes full use of the configuration enumeration.

## Changes

* #50: [Add iterable features to the secret store](https://github.com/exasol/notebook-connector/issues/50)
* #52: [Make data conversion utility for the secret store commonly accessible](https://github.com/exasol/notebook-connector/issues/52)
* #55: [Unified language activation SQL command](https://github.com/exasol/notebook-connector/pull/55)
* #56: [Transformers extension wrapper](https://github.com/exasol/notebook-connector/pull/56)
* #47: [Create a Sagemaker Extension wrapper](https://github.com/exasol/notebook-connector/issues/47)
* #60: [Start using the AILabConfig internally](https://github.com/exasol/notebook-connector/issues/60)
56 changes: 35 additions & 21 deletions exasol/ai_lab_config.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,38 @@
from enum import Enum
from enum import Enum, auto


class AILabConfig(Enum):
use_itde = "USE_ITDE"
db_host_name = "EXTERNAL_HOST_NAME"
db_port = "DB_PORT"
db_schema = "SCHEMA"
db_user = "USER"
db_password = "PASSWORD"
db_encryption = "ENCRYPTION"
cert_vld = "CERTIFICATE_VALIDATION"
trusted_ca = "TRUSTED_CA"
client_cert = "CLIENT_CERTIFICATE"
client_key = "PRIVATE_KEY"
bfs_host_name = "BUCKETFS_HOST_NAME"
bfs_port = "BUCKETFS_PORT"
bfs_service = "BUCKETFS_SERVICE"
bfs_bucket = "BUCKETFS_BUCKET"
bfs_user = "BUCKETFS_USER"
bfs_password = "BUCKETFS_PASSWORD"
bfs_encryption = "BUCKETFS_ENCRYPTION"
mem_size = "MEMORY_SIZE"
disk_size = "DISK_SIZE"
use_itde = auto()
db_host_name = auto()
db_port = auto()
db_schema = auto()
db_user = auto()
db_password = auto()
db_encryption = auto()
cert_vld = auto()
trusted_ca = auto()
client_cert = auto()
client_key = auto()
bfs_host_name = auto()
bfs_port = auto()
bfs_service = auto()
bfs_bucket = auto()
bfs_user = auto()
bfs_password = auto()
bfs_encryption = auto()
mem_size = auto()
disk_size = auto()
huggingface_token = auto()
aws_bucket = auto()
aws_region = auto()
aws_access_key_id = auto()
aws_secret_access_key = auto()
aws_role = auto()
itde_container = auto()
itde_volume = auto()
itde_network = auto()
te_bfs_connection = auto()
te_models_bfs_dir = auto()
te_hf_connection = auto()
te_models_cache_dir = auto()
sme_aws_connection = auto()
60 changes: 31 additions & 29 deletions exasol/connections.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,10 @@
import exasol.bucketfs as bfs # type: ignore
from exasol.secret_store import Secrets
from exasol.utils import optional_str_to_bool
from exasol.ai_lab_config import AILabConfig as CKey


def _optional_encryption(conf: Secrets, key: str = "ENCRYPTION") -> Optional[bool]:
def _optional_encryption(conf: Secrets, key: CKey = CKey.db_encryption) -> Optional[bool]:
return optional_str_to_bool(conf.get(key))


Expand All @@ -26,14 +27,14 @@ def _extract_ssl_options(conf: Secrets) -> dict:
sslopt: dict[str, object] = {}

# Is server certificate validation required?
certificate_validation = optional_str_to_bool(conf.get("CERTIFICATE_VALIDATION"))
certificate_validation = optional_str_to_bool(conf.get(CKey.cert_vld))
if certificate_validation is not None:
sslopt["cert_reqs"] = (
ssl.CERT_REQUIRED if certificate_validation else ssl.CERT_NONE
)

# Is a bundle with trusted CAs provided?
trusted_ca = conf.get("TRUSTED_CA")
trusted_ca = conf.get(CKey.trusted_ca)
if trusted_ca:
trusted_ca_path = Path(trusted_ca)
if trusted_ca_path.is_dir():
Expand All @@ -44,12 +45,12 @@ def _extract_ssl_options(conf: Secrets) -> dict:
raise ValueError(f"Trusted CA location {trusted_ca} doesn't exist.")

# Is client's own certificate provided?
client_certificate = conf.get("CLIENT_CERTIFICATE")
client_certificate = conf.get(CKey.client_cert)
if client_certificate:
if not Path(client_certificate).is_file():
raise ValueError(f"Certificate file {client_certificate} doesn't exist.")
sslopt["certfile"] = client_certificate
private_key = conf.get("PRIVATE_KEY")
private_key = conf.get(CKey.client_key)
if private_key:
if not Path(private_key).is_file():
raise ValueError(f"Private key file {private_key} doesn't exist.")
Expand All @@ -60,15 +61,15 @@ def _extract_ssl_options(conf: Secrets) -> dict:

def get_external_host(conf: Secrets) -> str:
"""Constructs the host part of a DB URL using provided configuration parameters."""
return f"{conf.EXTERNAL_HOST_NAME}:{conf.DB_PORT}"
return f"{conf.get(CKey.db_host_name)}:{conf.get(CKey.db_port)}"


def get_udf_bucket_path(conf: Secrets) -> str:
"""
Builds the path of the BucketFS bucket specified in the configuration,
as it's seen in the udf's file system.
"""
return f"/buckets/{conf.BUCKETFS_SERVICE}/{conf.BUCKETFS_BUCKET}"
return f"/buckets/{conf.get(CKey.bfs_service)}/{conf.get(CKey.bfs_bucket)}"


def open_pyexasol_connection(conf: Secrets, **kwargs) -> pyexasol.ExaConnection:
Expand All @@ -80,19 +81,19 @@ def open_pyexasol_connection(conf: Secrets, **kwargs) -> pyexasol.ExaConnection:
Parameters in kwargs override the correspondent values in the configuration.

The configuration should provide the following parameters:
- Server address and port (EXTERNAL_HOST_NAME, DB_PORT),
- Client security credentials (USER, PASSWORD).
- Server address and port (db_host_name, db_port),
- Client security credentials (db_user, db_password).
Optional parameters include:
- Secured comm flag (ENCRYPTION),
- Some of the SSL options (CERTIFICATE_VALIDATION, TRUSTED_CA, CLIENT_CERTIFICATE).
- Secured comm flag (db_encryption),
- Some of the SSL options (cert_vld, trusted_ca, client_cert).
If the schema is not provided then it should be set explicitly in every SQL statement.
For other optional parameters the default settings are as per the pyexasol interface.
"""

conn_params: dict[str, Any] = {
"dsn": get_external_host(conf),
"user": conf.USER,
"password": conf.PASSWORD,
"user": conf.get(CKey.db_user),
"password": conf.get(CKey.db_password),
}

encryption = _optional_encryption(conf)
Expand All @@ -113,19 +114,19 @@ def open_sqlalchemy_connection(conf: Secrets):
Does NOT set the default schema, even if it is defined in the configuration.

The configuration should provide the following parameters:
- Server address and port (EXTERNAL_HOST_NAME, DB_PORT),
- Client security credentials (USER, PASSWORD).
- Server address and port (db_host_name, db_port),
- Client security credentials (db_user, db_password).
Optional parameters include:
- Secured comm flag (ENCRYPTION).
- Validation of the server's TLS/SSL certificate by the client (CERTIFICATE_VALIDATION).
- Secured comm flag (db_encryption).
- Validation of the server's TLS/SSL certificate by the client (cert_vld).
If the schema is not provided then it should be set explicitly in every SQL statement.
For other optional parameters the default settings are as per the Exasol SQLAlchemy interface.
Currently, it's not possible to use a bundle of trusted CAs other than the default. Neither
it is possible to set the client TLS/SSL certificate.
"""

websocket_url = (
f"exa+websocket://{conf.USER}:{conf.PASSWORD}@{get_external_host(conf)}"
f"exa+websocket://{conf.get(CKey.db_user)}:{conf.get(CKey.db_password)}@{get_external_host(conf)}"
)

delimiter = "?"
Expand All @@ -149,29 +150,30 @@ def open_bucketfs_connection(conf: Secrets) -> bfs.Bucket:
Returns the Bucket object for the bucket selected in the configuration.

The configuration should provide the following parameters;
- Host name and port of the BucketFS service (EXTERNAL_HOST_NAME, BUCKETFS_PORT),
- Client security credentials (BUCKETFS_USER, BUCKETFS_PASSWORD).
- Bucket name (BUCKETFS_BUCKET)
- Host name and port of the BucketFS service (bfs_host_name or db_host_name, bfs_port),
- Client security credentials (bfs_user, bfs_password).
- Bucket name (bfs_bucket)
Optional parameters include:
- Secured comm flag (ENCRYPTION), defaults to False.
- Secured comm flag (bfs_encryption), defaults to False.
Currently, it's not possible to set any of the TLS/SSL parameters. If secured comm
is selected it automatically sets the certificate validation on.
"""

# Set up the connection parameters.
# For now, just use the http. Once the exasol.bucketfs is capable of using the
# https without validating the server certificate choose between the http and
# https depending on the ENCRYPTION setting like in the code below:
# buckfs_url_prefix = "https" if _optional_encryption(conf) else "http"
# https depending on the bfs_encryption setting like in the code below:
# buckfs_url_prefix = "https" if _optional_encryption(conf, CKey.bfs_encryption) else "http"
buckfs_url_prefix = "http"
buckfs_url = f"{buckfs_url_prefix}://{conf.EXTERNAL_HOST_NAME}:{conf.BUCKETFS_PORT}"
buckfs_host = conf.get(CKey.bfs_host_name, conf.get(CKey.db_host_name))
buckfs_url = f"{buckfs_url_prefix}://{buckfs_host}:{conf.get(CKey.bfs_port)}"
buckfs_credentials = {
conf.BUCKETFS_BUCKET: {
"username": conf.BUCKETFS_USER,
"password": conf.BUCKETFS_PASSWORD,
conf.get(CKey.bfs_bucket): {
"username": conf.get(CKey.bfs_user),
"password": conf.get(CKey.bfs_password),
}
}

# Connect to the BucketFS service and navigate to the bucket of choice.
bucketfs = bfs.Service(buckfs_url, buckfs_credentials)
return bucketfs[conf.BUCKETFS_BUCKET]
return bucketfs[conf.get(CKey.bfs_bucket)]
55 changes: 42 additions & 13 deletions exasol/extension_wrapper_common.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
from exasol.connections import open_pyexasol_connection
from exasol.secret_store import Secrets
from exasol.utils import optional_str_to_bool
from exasol.ai_lab_config import AILabConfig as CKey


def str_to_bool(conf: Secrets, key: str, default_value: bool) -> bool:
def str_to_bool(conf: Secrets, key: CKey, default_value: bool) -> bool:
"""
Tries to read a binary (i.e. yes/no) value from the secret store. If found
returns the correspondent boolean. Otherwise, returns the provided default
Expand Down Expand Up @@ -31,26 +32,26 @@ def encapsulate_bucketfs_credentials(
Parameters:
conf:
The secret store. The store must hold the bucket-fs service
parameters (BUCKETFS_HOST_NAME or EXTERNAL_HOST_NAME, BUCKETFS_PORT,
BUCKETFS_SERVICE), the access credentials (BUCKETFS_USER,
BUCKETFS_PASSWORD), and the bucket name (BUCKETFS_BUCKET), as well
parameters (bfs_host_name or db_host_name, bfs_port,
bfs_service), the access credentials (bfs_user,
bfs_password), and the bucket name (bfs_bucket), as well
as the DB connection parameters.
path_in_bucket:
Path identifying a location in the bucket.
connection_name:
Name for the connection object to be created.
"""

bfs_host = conf.get("BUCKETFS_HOST_NAME", conf.EXTERNAL_HOST_NAME)
bfs_host = conf.get(CKey.bfs_host_name, conf.get(CKey.db_host_name))
# For now, just use the http. Once the exasol.bucketfs is capable of using
# the https without validating the server certificate choose between the
# http and https depending on the BUCKETFS_ENCRYPTION setting, like this:
# bfs_protocol = "https" if str_to_bool(conf, 'BUCKETFS_ENCRYPTION', True)
# http and https depending on the bfs_encryption setting, like this:
# bfs_protocol = "https" if str_to_bool(conf, CKey.bfs_encryption, True)
# else "http"
bfs_protocol = "http"
bfs_dest = (
f"{bfs_protocol}://{bfs_host}:{conf.BUCKETFS_PORT}/"
f"{conf.BUCKETFS_BUCKET}/{path_in_bucket};{conf.BUCKETFS_SERVICE}"
f"{bfs_protocol}://{bfs_host}:{conf.get(CKey.bfs_port)}/"
f"{conf.get(CKey.bfs_bucket)}/{path_in_bucket};{conf.get(CKey.bfs_service)}"
)

sql = f"""
Expand All @@ -60,8 +61,8 @@ def encapsulate_bucketfs_credentials(
IDENTIFIED BY {{BUCKETFS_PASSWORD!s}}
"""
query_params = {
"BUCKETFS_USER": conf.BUCKETFS_USER,
"BUCKETFS_PASSWORD": conf.BUCKETFS_PASSWORD,
"BUCKETFS_USER": conf.get(CKey.bfs_user),
"BUCKETFS_PASSWORD": conf.get(CKey.bfs_password),
}
with open_pyexasol_connection(conf, compression=True) as conn:
conn.execute(query=sql, query_params=query_params)
Expand All @@ -73,7 +74,7 @@ def encapsulate_huggingface_token(conf: Secrets, connection_name: str) -> None:

Parameters:
conf:
The secret store. The store must hold the Huggingface token (HF_TOKEN),
The secret store. The store must hold the Huggingface token (huggingface_token),
as well as the DB connection parameters.
connection_name:
Name for the connection object to be created.
Expand All @@ -84,6 +85,34 @@ def encapsulate_huggingface_token(conf: Secrets, connection_name: str) -> None:
TO ''
IDENTIFIED BY {{TOKEN!s}}
"""
query_params = {"TOKEN": conf.HF_TOKEN}
query_params = {"TOKEN": conf.get(CKey.huggingface_token)}
with open_pyexasol_connection(conf, compression=True) as conn:
conn.execute(query=sql, query_params=query_params)


def encapsulate_aws_credentials(conf: Secrets, connection_name: str) -> None:
"""
Creates a connection object in the database encapsulating the address of
an AWS S3 bucket and AWS access credentials.

Parameters:
conf:
The secret store. The store must hold the S3 bucket parameters
(aws_bucket, aws_region) and AWS access credentials (aws_access_key_id,
aws_secret_access_key), as well as the DB connection parameters.
connection_name:
Name for the connection object to be created.
"""

sql = f"""
CREATE OR REPLACE CONNECTION [{connection_name}]
TO 'https://{conf.get(CKey.aws_bucket)}.s3.{conf.get(CKey.aws_region)}.amazonaws.com/'
USER {{ACCESS_ID!s}}
IDENTIFIED BY {{SECRET_KEY!s}}
"""
query_params = {
"ACCESS_ID": conf.get(CKey.aws_access_key_id),
"SECRET_KEY": conf.get(CKey.aws_secret_access_key),
}
with open_pyexasol_connection(conf, compression=True) as conn:
conn.execute(query=sql, query_params=query_params)
Loading