From fd9a70f1c03d5cd9464f83fcdf48d0b7700f25de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20G=C3=A9r=C3=B4me?= Date: Mon, 19 Aug 2024 12:00:14 +0200 Subject: [PATCH] wip: up & dl works --- config/settings/base.py | 7 ++++++- docker-compose.yaml | 1 + hexa/files/backends/base.py | 4 ++++ hexa/files/backends/fs.py | 17 +++++++---------- hexa/files/backends/gcp.py | 8 +++++--- hexa/files/backends/s3.py | 5 ++--- hexa/files/views.py | 16 ++++++++++------ hexa/workspaces/views.py | 19 ++++++------------- 8 files changed, 41 insertions(+), 36 deletions(-) diff --git a/config/settings/base.py b/config/settings/base.py index e1c5b3c4d..b269031ff 100644 --- a/config/settings/base.py +++ b/config/settings/base.py @@ -349,11 +349,16 @@ # Datasets config WORKSPACE_DATASETS_BUCKET = os.environ.get("WORKSPACE_DATASETS_BUCKET") +# Disabling the check on the size of the request body when using the file system storage backend +# This is needed to allow the upload of large files when they are not stored by an external storage backend +if os.environ.get("DISABLE_UPLOAD_MAX_SIZE_CHECK", "false") == "true": + DATA_UPLOAD_MAX_MEMORY_SIZE = None + + # Filesystem configuration WORKSPACE_STORAGE_BACKEND = { "engine": "hexa.files.backends.fs.FileSystemStorage", "options": { - "prefix": os.environ.get("WORKSPACE_BUCKET_PREFIX", "hexa-"), "folder": os.environ.get("WORKSPACE_BUCKET_FOLDER", "/data"), }, } diff --git a/docker-compose.yaml b/docker-compose.yaml index 76befb59f..f88e67cc2 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -11,6 +11,7 @@ x-app: &common environment: - DEBUG=true - DJANGO_SETTINGS_MODULE=config.settings.dev + - DISABLE_UPLOAD_MAX_SIZE_CHECK=true - DATABASE_HOST=db - DATABASE_PORT=5432 - DATABASE_NAME=hexa-app diff --git a/hexa/files/backends/base.py b/hexa/files/backends/base.py index bb1667d7a..dbac9078e 100644 --- a/hexa/files/backends/base.py +++ b/hexa/files/backends/base.py @@ -120,3 +120,7 @@ def generate_upload_url( @abstractmethod def get_token_as_env_variables(self, token): pass + + @abstractmethod + def get_bucket_mount_config(self, bucket_name): + pass diff --git a/hexa/files/backends/fs.py b/hexa/files/backends/fs.py index 039828b3a..3e9affb06 100644 --- a/hexa/files/backends/fs.py +++ b/hexa/files/backends/fs.py @@ -18,9 +18,8 @@ class FileSystemStorage(Storage): - def __init__(self, folder, prefix=""): + def __init__(self, folder): self.location = Path(folder) - self.prefix = prefix # TODO: Use the prefix here to create the bucket path and not in workspace model self._token_max_age = 60 * 60 # 1 hour def load_bucket_sample_data(self, bucket_name: str): @@ -58,7 +57,6 @@ def _get_payload_from_token(self, token): def _create_token_for_payload(self, payload: dict): signer = TimestampSigner() signed_payload = signer.sign_object(payload, compress=True) - print(signed_payload, len(signed_payload), flush=True) return urlsafe_base64_encode(force_bytes(signed_payload)) def to_storage_object(self, bucket_name: str, object_key: Path): @@ -212,14 +210,11 @@ def delete_object(self, bucket_name: str, object_key: str): else: os.remove(full_path) - def get_short_lived_downscoped_access_token(self, bucket_name): - return super().get_short_lived_downscoped_access_token(bucket_name) - def generate_upload_url( self, bucket_name: str, target_key: str, - content_type: str, + content_type: str | None = None, request: HttpRequest | None = None, raise_if_exists=False, ): @@ -253,11 +248,13 @@ def generate_download_url( token = self._create_token_for_payload( {"bucket_name": bucket_name, "target_key": target_key} ) - internal_url = reverse("files:download_file", args=(token,)) + url = reverse("files:download_file", args=(token,)) if request is not None: - return request.build_absolute_uri(internal_url) - return internal_url + url = request.build_absolute_uri(url) + if force_attachment: + url += "?attachment=true" + return url def get_token_as_env_variables(self, token): raise NotImplementedError( diff --git a/hexa/files/backends/gcp.py b/hexa/files/backends/gcp.py index a38793280..34cedc511 100644 --- a/hexa/files/backends/gcp.py +++ b/hexa/files/backends/gcp.py @@ -338,7 +338,7 @@ def get_short_lived_downscoped_access_token(self, bucket_name): }, ) payload = response.json() - return [payload["access_token"], payload["expires_in"], "gcp"] + return payload["access_token"], payload["expires_in"] def delete_bucket(self, bucket_name: str, fully: bool = False): return get_storage_client().delete_bucket(bucket_name) @@ -357,8 +357,10 @@ def delete_object(self, bucket_name: str, file_name: str): def load_bucket_sample_data(self, bucket_name: str): return load_bucket_sample_data_with(bucket_name, self) - def get_token_as_env_variables(self, token): + def get_bucket_mount_config(self, bucket_name): + token, _ = self.get_short_lived_downscoped_access_token(bucket_name) return { - "GCS_TOKEN": token, # FIXME: Once we have deployed the new openhexa-bslq-environment image and upgraded the openhexa-app, we can remove this line + "WORKSPACE_STORAGE_ENGINE": "gcp", + "WORKSPACE_STORAGE_ENGINE_GCP_BUCKET_NAME": bucket_name, "WORKSPACE_STORAGE_ENGINE_GCP_ACCESS_TOKEN": token, } diff --git a/hexa/files/backends/s3.py b/hexa/files/backends/s3.py index 6c560bf93..affb69af0 100644 --- a/hexa/files/backends/s3.py +++ b/hexa/files/backends/s3.py @@ -377,7 +377,7 @@ def get_short_lived_downscoped_access_token(self, bucket_name): DurationSeconds=token_lifetime, ) - return [ + return ( { "endpoint_url": sts_service.__dict__["meta"].__dict__["_endpoint_url"], "aws_access_key_id": response["Credentials"]["AccessKeyId"], @@ -385,8 +385,7 @@ def get_short_lived_downscoped_access_token(self, bucket_name): "aws_session_token": response["Credentials"]["SessionToken"], }, response["Credentials"]["Expiration"], - "s3", - ] + ) def delete_bucket(self, bucket_name: str, fully: bool = False): s3 = get_storage_client() diff --git a/hexa/files/views.py b/hexa/files/views.py index 6f6c659b3..113b065c9 100644 --- a/hexa/files/views.py +++ b/hexa/files/views.py @@ -1,23 +1,27 @@ from django.http import FileResponse, HttpRequest, HttpResponse, HttpResponseBadRequest +from django.views.decorators.csrf import csrf_exempt from django.views.decorators.http import require_http_methods from hexa.files import storage def download_file(request: HttpRequest, token: str) -> HttpResponse: - print("download_file", "coucou", flush=True) + if hasattr(storage, "get_bucket_object_by_token") is False: + return HttpResponseBadRequest("Storage does not support token-based access") object = storage.get_bucket_object_by_token(token) full_path = storage.path(object.path) - return FileResponse(open(full_path, "rb"), as_attachment=True) + as_attachment = request.GET.get("attachment", "false") + return FileResponse(open(full_path, "rb"), as_attachment=as_attachment == "true") @require_http_methods(["POST", "PUT"]) +@csrf_exempt def upload_file(request: HttpRequest, token: str) -> HttpResponse: - file = request.FILES.get("file", None) - if file is None: - return HttpResponseBadRequest("Missing 'file' parameter") + if hasattr(storage, "save_object_by_token") is False: + return HttpResponseBadRequest("Storage does not support token-based access") + try: - storage.save_object_by_token(token, file) + storage.save_object_by_token(token, request.body) return HttpResponse(status=201) except storage.exceptions.AlreadyExists: return HttpResponseBadRequest("Object already exists") diff --git a/hexa/workspaces/views.py b/hexa/workspaces/views.py index 3a41973b6..739229014 100644 --- a/hexa/workspaces/views.py +++ b/hexa/workspaces/views.py @@ -12,8 +12,8 @@ # ease patching -def get_short_lived_downscoped_access_token(bucket_name): - return storage.get_short_lived_downscoped_access_token(bucket_name=bucket_name) +def get_bucket_mount_config(bucket_name): + return storage.get_bucket_mount_config(bucket_name=bucket_name) def get_token_as_env_variables(token): @@ -88,7 +88,9 @@ def credentials(request: HttpRequest, workspace_slug: str = None) -> HttpRespons ) # Populate the environment variables with the connections of the workspace - env = {} + env = { + "WORKSPACE_BUCKET_NAME": workspace.bucket_name, + } # Database credentials db_credentials = get_db_server_credentials() @@ -104,16 +106,7 @@ def credentials(request: HttpRequest, workspace_slug: str = None) -> HttpRespons ) # Bucket credentials - token, _expires_in, engine_key = get_short_lived_downscoped_access_token( - workspace.bucket_name - ) - env.update(get_token_as_env_variables(token)) - env.update( - { - "WORKSPACE_STORAGE_ENGINE": engine_key, - "WORKSPACE_BUCKET_NAME": workspace.bucket_name, - } - ) + env.update(storage.get_bucket_mount_config(workspace.bucket_name)) # Custom Docker image for the workspace if appropriate image = (