-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Refactoring/7 use new bfs interface #9
Changes from 30 commits
17f44ac
7875564
31cca39
ead6d48
ddcee78
be02924
62c1c38
6dd801e
7cc9680
38561ed
5bfc307
215e697
fc3e33f
6d69ad3
5ab3976
4a37da2
54932d2
f9b431c
ce813fd
be07355
400fb0e
2268e5e
9ce0144
607c51b
2d9c24f
9baacb7
cb35e98
ce9edeb
489a5f3
5b17d7d
2518dc7
8ee8389
203a9be
7a45edb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,27 +7,11 @@ | |
import ssl | ||
import requests # type: ignore | ||
import pyexasol # type: ignore | ||
from exasol_bucketfs_utils_python.bucketfs_location import BucketFSLocation # type: ignore | ||
from exasol_bucketfs_utils_python.bucket_config import BucketConfig, BucketFSConfig # type: ignore | ||
from exasol_bucketfs_utils_python.bucketfs_connection_config import BucketFSConnectionConfig # type: ignore | ||
import exasol.bucketfs as bfs # type: ignore | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
def create_bucketfs_location( | ||
bucketfs_name: str, bucketfs_host: str, bucketfs_port: int, | ||
bucketfs_use_https: bool, bucketfs_user: str, bucketfs_password: str, | ||
bucket: str, path_in_bucket: str) -> BucketFSLocation: | ||
_bucketfs_connection = BucketFSConnectionConfig( | ||
host=bucketfs_host, port=bucketfs_port, user=bucketfs_user, | ||
pwd=bucketfs_password, is_https=bucketfs_use_https) | ||
_bucketfs_config = BucketFSConfig( | ||
bucketfs_name=bucketfs_name, connection_config=_bucketfs_connection) | ||
_bucket_config = BucketConfig( | ||
bucket_name=bucket, bucketfs_config=_bucketfs_config) | ||
return BucketFSLocation( | ||
bucket_config=_bucket_config, | ||
base_path=PurePosixPath(path_in_bucket)) | ||
ARCHIVE_EXTENSIONS = [".tar.gz", ".tgz", ".zip", ".tar"] | ||
|
||
|
||
def get_websocket_sslopt(use_ssl_cert_validation: bool = True, | ||
|
@@ -87,14 +71,32 @@ def get_language_settings(pyexasol_conn: pyexasol.ExaConnection, alter_type: Lan | |
return result[0][0] | ||
|
||
|
||
def get_udf_path(bucket_base_path: bfs.path.PathLike, bucket_file: str) -> PurePosixPath: | ||
""" | ||
Returns the path of the specified file in a bucket, as it's seen from a UDF | ||
For known types of archives removes the archive extension from the file name. | ||
|
||
bucket_base_path - Base directory in the bucket | ||
bucket_file - File path in the bucket, relative to the base directory. | ||
""" | ||
|
||
for extension in ARCHIVE_EXTENSIONS: | ||
if bucket_file.endswith(extension): | ||
bucket_file = bucket_file[: -len(extension)] | ||
break | ||
|
||
file_path = bucket_base_path / bucket_file | ||
return PurePosixPath(file_path.as_udf_path()) | ||
|
||
|
||
class LanguageContainerDeployer: | ||
|
||
def __init__(self, | ||
pyexasol_connection: pyexasol.ExaConnection, | ||
language_alias: str, | ||
bucketfs_location: BucketFSLocation) -> None: | ||
bucketfs_path: bfs.path.PathLike) -> None: | ||
|
||
self._bucketfs_location = bucketfs_location | ||
self._bucketfs_path = bucketfs_path | ||
self._language_alias = language_alias | ||
self._pyexasol_conn = pyexasol_connection | ||
logger.debug("Init %s", LanguageContainerDeployer.__name__) | ||
|
@@ -177,8 +179,8 @@ def upload_container(self, container_file: Path, | |
raise RuntimeError(f"Container file {container_file} " | ||
f"is not a file.") | ||
with open(container_file, "br") as f: | ||
self._bucketfs_location.upload_fileobj_to_bucketfs( | ||
fileobj=f, bucket_file_path=bucket_file_path) | ||
file_path = self._bucketfs_path / bucket_file_path | ||
file_path.write(f) | ||
logging.debug("Container is uploaded to bucketfs") | ||
|
||
def activate_container(self, bucket_file_path: str, | ||
|
@@ -209,7 +211,7 @@ def generate_activation_command(self, bucket_file_path: str, | |
allow_override - If True the activation of a language container with the same alias will be overriden, | ||
otherwise a RuntimeException will be thrown. | ||
""" | ||
path_in_udf = self._bucketfs_location.generate_bucket_udf_path(bucket_file_path) | ||
path_in_udf = get_udf_path(self._bucketfs_path, bucket_file_path) | ||
new_settings = \ | ||
self._update_previous_language_settings(alter_type, allow_override, path_in_udf) | ||
alter_command = \ | ||
|
@@ -233,7 +235,7 @@ def get_language_definition(self, bucket_file_path: str): | |
|
||
bucket_file_path - Path within the designated bucket where the container is uploaded. | ||
""" | ||
path_in_udf = self._bucketfs_location.generate_bucket_udf_path(bucket_file_path) | ||
path_in_udf = get_udf_path(self._bucketfs_path, bucket_file_path) | ||
result = self._generate_new_language_settings(path_in_udf=path_in_udf, prev_lang_aliases=[]) | ||
return result | ||
|
||
|
@@ -265,10 +267,16 @@ def _check_if_requested_language_alias_already_exists( | |
raise RuntimeError(warning_message) | ||
|
||
@classmethod | ||
def create(cls, bucketfs_name: str, bucketfs_host: str, bucketfs_port: int, | ||
bucketfs_use_https: bool, bucketfs_user: str, | ||
bucketfs_password: str, bucket: str, path_in_bucket: str, | ||
def create(cls, | ||
dsn: str, db_user: str, db_password: str, language_alias: str, | ||
bucketfs_host: Optional[str] = None, bucketfs_port: Optional[int] = None, | ||
bucketfs_name: Optional[str] = None, bucket: Optional[str] = None, | ||
bucketfs_user: Optional[str] = None, bucketfs_password: Optional[str] = None, | ||
bucketfs_use_https: bool = True, | ||
saas_url: Optional[str] = None, | ||
saas_account_id: Optional[str] = None, saas_database_id: Optional[str] = None, | ||
saas_token: Optional[str] = None, | ||
path_in_bucket: str = '', | ||
Comment on lines
+275
to
+282
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just an idea: Would it be better to have two different class methods: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not sure. The decision of which way to go needs to be taken somewhere. By making two different methods we are pushing this decision to the caller. From the caller's perspective, it might be easier to grab all available parameters, e.g. defined in the config, pass them over to the single point of call and let it make sense of these parameters. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree that we would move the responsibility to the client. I'am not sure about the client's case. However, if the client knows that the connection is on-prem or SaaS, it would be cumbersome to pass all the unnecessary parameters. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You don't have to pass the unnecessary parameters. They are optional. For example, if you know that you work with SaaS the call might look like the following:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Well :), except that the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Or maybe I should merge it first and then do things property in this PR, |
||
use_ssl_cert_validation: bool = True, ssl_trusted_ca: Optional[str] = None, | ||
ssl_client_certificate: Optional[str] = None, | ||
ssl_private_key: Optional[str] = None) -> "LanguageContainerDeployer": | ||
|
@@ -284,8 +292,28 @@ def create(cls, bucketfs_name: str, bucketfs_host: str, bucketfs_port: int, | |
websocket_sslopt=websocket_sslopt | ||
) | ||
|
||
bucketfs_location = create_bucketfs_location( | ||
bucketfs_name, bucketfs_host, bucketfs_port, bucketfs_use_https, | ||
bucketfs_user, bucketfs_password, bucket, path_in_bucket) | ||
# Infer where the BucketFS is - on-prem or SaaS. | ||
if all((bucketfs_host, bucketfs_port, bucketfs_name, bucket, | ||
bucketfs_user, bucketfs_password)): | ||
bfs_url = (f"{'https' if bucketfs_use_https else 'http'}://" | ||
f"{bucketfs_host}:{bucketfs_port}") | ||
verify = ssl_trusted_ca or use_ssl_cert_validation | ||
bucketfs_path = bfs.path.build_path(backend=bfs.path.StorageBackend.onprem, | ||
url=bfs_url, | ||
username=bucketfs_user, | ||
password=bucketfs_password, | ||
service_name=bucketfs_name, | ||
bucket_name=bucket, | ||
verify=verify, | ||
path=path_in_bucket) | ||
elif all((saas_url, saas_account_id, saas_database_id, saas_token)): | ||
bucketfs_path = bfs.path.build_path(backend=bfs.path.StorageBackend.saas, | ||
url=saas_url, | ||
account_id=saas_account_id, | ||
database_id=saas_database_id, | ||
pat=saas_token, | ||
path=path_in_bucket) | ||
else: | ||
raise ValueError('Insufficient parameters to access the BucketFS service') | ||
ahsimb marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
return cls(pyexasol_conn, language_alias, bucketfs_location) | ||
return cls(pyexasol_conn, language_alias, bucketfs_path) |
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same here, maybe have two different commands, one for OnPrem and one for SaaS There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Again, I think from the caller's (a human in this case) point of view it's better to have one cli command and different parameters, rather than two. One less name to remember, the rest is the same. |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,9 @@ | |
|
||
DB_PASSWORD_ENVIRONMENT_VARIABLE = "DB_PASSWORD" | ||
BUCKETFS_PASSWORD_ENVIRONMENT_VARIABLE = "BUCKETFS_PASSWORD" | ||
SAAS_ACCOUNT_ID_ENVIRONMENT_VARIABLE = "SAAS_ACCOUNT_ID" | ||
SAAS_DATABASE_ID_ENVIRONMENT_VARIABLE = "SAAS_DATABASE_ID" | ||
SAAS_TOKEN_ENVIRONMENT_VARIABLE = "SAAS_TOKEN" | ||
|
||
|
||
class CustomizableParameters(Enum): | ||
|
@@ -80,24 +83,32 @@ def clear_formatters(self): | |
|
||
|
||
@click.command(name="language-container") | ||
@click.option('--bucketfs-name', type=str, required=True) | ||
@click.option('--bucketfs-host', type=str, required=True) | ||
@click.option('--bucketfs-port', type=int, required=True) | ||
@click.option('--bucketfs-name', type=str) | ||
@click.option('--bucketfs-host', type=str) | ||
@click.option('--bucketfs-port', type=int) | ||
@click.option('--bucketfs-use-https', type=bool, default=False) | ||
@click.option('--bucketfs-user', type=str, required=True, default="w") | ||
@click.option('--bucketfs-password', prompt='bucketFS password', hide_input=True, | ||
default=lambda: os.environ.get(BUCKETFS_PASSWORD_ENVIRONMENT_VARIABLE, "")) | ||
@click.option('--bucket', type=str, required=True) | ||
@click.option('--path-in-bucket', type=str, required=True, default=None) | ||
@click.option('--bucketfs-user', type=str) | ||
@click.option('--bucketfs-password', type=str, | ||
default=lambda: os.environ.get(BUCKETFS_PASSWORD_ENVIRONMENT_VARIABLE)) | ||
@click.option('--bucket', type=str) | ||
@click.option('--saas-url', type=str, | ||
default='https://cloud.exasol.com') | ||
@click.option('--saas-account-id', type=str, | ||
default=lambda: os.environ.get(SAAS_ACCOUNT_ID_ENVIRONMENT_VARIABLE)) | ||
@click.option('--saas-database-id', type=str, | ||
default=lambda: os.environ.get(SAAS_DATABASE_ID_ENVIRONMENT_VARIABLE)) | ||
@click.option('--saas-token', type=str, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. for the token and the passwords, we should maybe add an input prompt in another PR, when they are not provided as cli argument or environment variable There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These values are quite long, entering them manually would be torture. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you can copy them into the input prompt. The input prompt is usually used, when you don't want to leak your credentials in your shell history. However, as I said, this can be done later. I think, it simply enough to create a ticket for it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am not sure I understood. According to the click doc, you can offer the user a choice, either to accept the default value stored in a config or env. variable, or enter it manually from a terminal. My point is that entering the saas parameters manually is not practical because they are too long. Hence we don't need a prompt here. It may be useful for the db password though. |
||
default=lambda: os.environ.get(SAAS_TOKEN_ENVIRONMENT_VARIABLE)) | ||
@click.option('--path-in-bucket', type=str) | ||
tkilias marked this conversation as resolved.
Show resolved
Hide resolved
|
||
@click.option('--container-file', | ||
type=click.Path(exists=True, file_okay=True), default=None) | ||
@click.option('--version', type=str, default=None, expose_value=False, | ||
type=click.Path(exists=True, file_okay=True)) | ||
@click.option('--version', type=str, expose_value=False, | ||
callback=slc_parameter_formatters) | ||
@click.option('--dsn', type=str, required=True) | ||
@click.option('--db-user', type=str, required=True) | ||
@click.option('--db-pass', prompt='db password', hide_input=True, | ||
default=lambda: os.environ.get(DB_PASSWORD_ENVIRONMENT_VARIABLE, "")) | ||
@click.option('--language-alias', type=str, default="PYTHON3_TE") | ||
default=lambda: os.environ.get(DB_PASSWORD_ENVIRONMENT_VARIABLE)) | ||
@click.option('--language-alias', type=str, default="PYTHON3_EXT") | ||
@click.option('--ssl-cert-path', type=str, default="") | ||
@click.option('--ssl-client-cert-path', type=str, default="") | ||
@click.option('--ssl-client-private-key', type=str, default="") | ||
|
@@ -113,6 +124,10 @@ def language_container_deployer_main( | |
bucketfs_user: str, | ||
bucketfs_password: str, | ||
bucket: str, | ||
saas_url: str, | ||
saas_account_id: str, | ||
saas_database_id: str, | ||
saas_token: str, | ||
path_in_bucket: str, | ||
container_file: str, | ||
dsn: str, | ||
|
@@ -137,6 +152,10 @@ def language_container_deployer_main( | |
bucketfs_user=bucketfs_user, | ||
bucketfs_password=bucketfs_password, | ||
bucket=bucket, | ||
saas_url=saas_url, | ||
saas_account_id=saas_account_id, | ||
saas_database_id=saas_database_id, | ||
saas_token=saas_token, | ||
path_in_bucket=path_in_bucket, | ||
dsn=dsn, | ||
db_user=db_user, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
hm, this might be actually logic that needs to go into PathLike.as_udf_path()
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not sure about that. The PathLike functions return either a string or another PathLike. The PathLike != PurePosixPath.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ah, I didn't mean the PurePosixPath part, I meant the extension part for archives
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I am a bit confused about what happens with an archive. It gets unzipped into the
.dist
directory. What do we actually see in the udf, the zipped or unzipped file? If it's the latter, then yes we should move that code to the backetfs. Does thedist
get hidden in this case, somehow? I think it's better to do this fix in a separate PR because it requires a change in another repo.