Skip to content

Commit

Permalink
wip: up & dl works
Browse files Browse the repository at this point in the history
  • Loading branch information
qgerome committed Aug 19, 2024
1 parent d0db040 commit fd9a70f
Show file tree
Hide file tree
Showing 8 changed files with 41 additions and 36 deletions.
7 changes: 6 additions & 1 deletion config/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,11 +349,16 @@
# Datasets config
WORKSPACE_DATASETS_BUCKET = os.environ.get("WORKSPACE_DATASETS_BUCKET")

# Disabling the check on the size of the request body when using the file system storage backend
# This is needed to allow the upload of large files when they are not stored by an external storage backend
if os.environ.get("DISABLE_UPLOAD_MAX_SIZE_CHECK", "false") == "true":
DATA_UPLOAD_MAX_MEMORY_SIZE = None


# Filesystem configuration
WORKSPACE_STORAGE_BACKEND = {
"engine": "hexa.files.backends.fs.FileSystemStorage",
"options": {
"prefix": os.environ.get("WORKSPACE_BUCKET_PREFIX", "hexa-"),
"folder": os.environ.get("WORKSPACE_BUCKET_FOLDER", "/data"),
},
}
Expand Down
1 change: 1 addition & 0 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ x-app: &common
environment:
- DEBUG=true
- DJANGO_SETTINGS_MODULE=config.settings.dev
- DISABLE_UPLOAD_MAX_SIZE_CHECK=true
- DATABASE_HOST=db
- DATABASE_PORT=5432
- DATABASE_NAME=hexa-app
Expand Down
4 changes: 4 additions & 0 deletions hexa/files/backends/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,3 +120,7 @@ def generate_upload_url(
@abstractmethod
def get_token_as_env_variables(self, token):
pass

@abstractmethod
def get_bucket_mount_config(self, bucket_name):
pass
17 changes: 7 additions & 10 deletions hexa/files/backends/fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,8 @@


class FileSystemStorage(Storage):
def __init__(self, folder, prefix=""):
def __init__(self, folder):
self.location = Path(folder)
self.prefix = prefix # TODO: Use the prefix here to create the bucket path and not in workspace model
self._token_max_age = 60 * 60 # 1 hour

def load_bucket_sample_data(self, bucket_name: str):
Expand Down Expand Up @@ -58,7 +57,6 @@ def _get_payload_from_token(self, token):
def _create_token_for_payload(self, payload: dict):
signer = TimestampSigner()
signed_payload = signer.sign_object(payload, compress=True)
print(signed_payload, len(signed_payload), flush=True)
return urlsafe_base64_encode(force_bytes(signed_payload))

def to_storage_object(self, bucket_name: str, object_key: Path):
Expand Down Expand Up @@ -212,14 +210,11 @@ def delete_object(self, bucket_name: str, object_key: str):
else:
os.remove(full_path)

def get_short_lived_downscoped_access_token(self, bucket_name):
return super().get_short_lived_downscoped_access_token(bucket_name)

def generate_upload_url(
self,
bucket_name: str,
target_key: str,
content_type: str,
content_type: str | None = None,
request: HttpRequest | None = None,
raise_if_exists=False,
):
Expand Down Expand Up @@ -253,11 +248,13 @@ def generate_download_url(
token = self._create_token_for_payload(
{"bucket_name": bucket_name, "target_key": target_key}
)
internal_url = reverse("files:download_file", args=(token,))
url = reverse("files:download_file", args=(token,))

if request is not None:
return request.build_absolute_uri(internal_url)
return internal_url
url = request.build_absolute_uri(url)
if force_attachment:
url += "?attachment=true"
return url

def get_token_as_env_variables(self, token):
raise NotImplementedError(
Expand Down
8 changes: 5 additions & 3 deletions hexa/files/backends/gcp.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,7 @@ def get_short_lived_downscoped_access_token(self, bucket_name):
},
)
payload = response.json()
return [payload["access_token"], payload["expires_in"], "gcp"]
return payload["access_token"], payload["expires_in"]

def delete_bucket(self, bucket_name: str, fully: bool = False):
return get_storage_client().delete_bucket(bucket_name)
Expand All @@ -357,8 +357,10 @@ def delete_object(self, bucket_name: str, file_name: str):
def load_bucket_sample_data(self, bucket_name: str):
return load_bucket_sample_data_with(bucket_name, self)

def get_token_as_env_variables(self, token):
def get_bucket_mount_config(self, bucket_name):
token, _ = self.get_short_lived_downscoped_access_token(bucket_name)
return {
"GCS_TOKEN": token, # FIXME: Once we have deployed the new openhexa-bslq-environment image and upgraded the openhexa-app, we can remove this line
"WORKSPACE_STORAGE_ENGINE": "gcp",
"WORKSPACE_STORAGE_ENGINE_GCP_BUCKET_NAME": bucket_name,
"WORKSPACE_STORAGE_ENGINE_GCP_ACCESS_TOKEN": token,
}
5 changes: 2 additions & 3 deletions hexa/files/backends/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,16 +377,15 @@ def get_short_lived_downscoped_access_token(self, bucket_name):
DurationSeconds=token_lifetime,
)

return [
return (
{
"endpoint_url": sts_service.__dict__["meta"].__dict__["_endpoint_url"],
"aws_access_key_id": response["Credentials"]["AccessKeyId"],
"aws_secret_access_key": response["Credentials"]["SecretAccessKey"],
"aws_session_token": response["Credentials"]["SessionToken"],
},
response["Credentials"]["Expiration"],
"s3",
]
)

def delete_bucket(self, bucket_name: str, fully: bool = False):
s3 = get_storage_client()
Expand Down
16 changes: 10 additions & 6 deletions hexa/files/views.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,27 @@
from django.http import FileResponse, HttpRequest, HttpResponse, HttpResponseBadRequest
from django.views.decorators.csrf import csrf_exempt
from django.views.decorators.http import require_http_methods

from hexa.files import storage


def download_file(request: HttpRequest, token: str) -> HttpResponse:
print("download_file", "coucou", flush=True)
if hasattr(storage, "get_bucket_object_by_token") is False:
return HttpResponseBadRequest("Storage does not support token-based access")
object = storage.get_bucket_object_by_token(token)
full_path = storage.path(object.path)
return FileResponse(open(full_path, "rb"), as_attachment=True)
as_attachment = request.GET.get("attachment", "false")
return FileResponse(open(full_path, "rb"), as_attachment=as_attachment == "true")


@require_http_methods(["POST", "PUT"])
@csrf_exempt
def upload_file(request: HttpRequest, token: str) -> HttpResponse:
file = request.FILES.get("file", None)
if file is None:
return HttpResponseBadRequest("Missing 'file' parameter")
if hasattr(storage, "save_object_by_token") is False:
return HttpResponseBadRequest("Storage does not support token-based access")

try:
storage.save_object_by_token(token, file)
storage.save_object_by_token(token, request.body)
return HttpResponse(status=201)
except storage.exceptions.AlreadyExists:
return HttpResponseBadRequest("Object already exists")
Expand Down
19 changes: 6 additions & 13 deletions hexa/workspaces/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
# ease patching


def get_short_lived_downscoped_access_token(bucket_name):
return storage.get_short_lived_downscoped_access_token(bucket_name=bucket_name)
def get_bucket_mount_config(bucket_name):
return storage.get_bucket_mount_config(bucket_name=bucket_name)


def get_token_as_env_variables(token):
Expand Down Expand Up @@ -88,7 +88,9 @@ def credentials(request: HttpRequest, workspace_slug: str = None) -> HttpRespons
)

# Populate the environment variables with the connections of the workspace
env = {}
env = {
"WORKSPACE_BUCKET_NAME": workspace.bucket_name,
}

# Database credentials
db_credentials = get_db_server_credentials()
Expand All @@ -104,16 +106,7 @@ def credentials(request: HttpRequest, workspace_slug: str = None) -> HttpRespons
)

# Bucket credentials
token, _expires_in, engine_key = get_short_lived_downscoped_access_token(
workspace.bucket_name
)
env.update(get_token_as_env_variables(token))
env.update(
{
"WORKSPACE_STORAGE_ENGINE": engine_key,
"WORKSPACE_BUCKET_NAME": workspace.bucket_name,
}
)
env.update(storage.get_bucket_mount_config(workspace.bucket_name))

# Custom Docker image for the workspace if appropriate
image = (
Expand Down

0 comments on commit fd9a70f

Please sign in to comment.