From 550ff6d39f2fa37c83706c535012f66b7e227f8a Mon Sep 17 00:00:00 2001 From: Karl Czajkowski Date: Thu, 21 Mar 2024 15:42:55 -0700 Subject: [PATCH] introduce aux keys 'hname' and 'hversion' and copy_from test cases To be more future-proof, distinguish 'hname' and 'hversion' aux values that represent overrides of URL content from the existing 'version' aux value that overrides backend S3 version addressing. This way, we do not depend on an implementation characteristic where the version string embedded in URLs is passed through to the backend version parameter. This allows for the backend strategy to change without breaking the rename logic that happens higher in the stack. --- bin/hatrac-migrate | 11 +++-- docs/INTERNALS.md | 32 ++++++++----- hatrac/core.py | 14 ++++-- hatrac/model/directory/pgsql.py | 85 ++++++++++++++++++++++----------- hatrac/rest/metadata.py | 5 +- test/rest-smoketest.sh | 45 +++++++++++++++++ 6 files changed, 144 insertions(+), 48 deletions(-) diff --git a/bin/hatrac-migrate b/bin/hatrac-migrate index 0ce75b5..f3bab5c 100644 --- a/bin/hatrac-migrate +++ b/bin/hatrac-migrate @@ -361,12 +361,15 @@ class HatracMigrateCLI (BaseCLI): # 3.3 Post-transfer database updates # We only care about storing the returned aux version in the non-filesystem backend case - version = version if self.backend != "filesystem" else None - self.directory.version_aux_update(obj, {"version": version}) + if self.backend != "filesystem": + self.directory.version_aux_update(obj, {"version": version}) + else: + self.directory.version_aux_pop(obj, "version", None) # On a successful transfer, we will automatically delete the aux redirect url # We will also forget about any legacy storage name inherited from the original system - self.directory.version_aux_pop(obj, "name") - self.directory.version_aux_pop(obj, "url") + self.directory.version_aux_pop(obj, "hname", None) + self.directory.version_aux_pop(obj, "hversion", None) + self.directory.version_aux_pop(obj, "url", None) success = True except Exception as e: logger.error("Error during transfer of [%s] to backend storage: %s" % diff --git a/docs/INTERNALS.md b/docs/INTERNALS.md index a4586c1..bf233b9 100644 --- a/docs/INTERNALS.md +++ b/docs/INTERNALS.md @@ -16,13 +16,14 @@ fields can be processed: 1. `rename_to`: preferred name and version key to service content. 2. `url`: a URL to the version content at a remote hatrac service. -3. `name` and `version`: name and version key to use with the storage backend. +3. `hname` and `hversion`: name and version to override URL parsed values. +4. `version`: version to override backend storage version keying. -The `rename_to` field stores a pair `[` _name_ `,` _version_ `]` which +The `rename_to` field stores a pair `[` _hname_ `,` _hversion_ `]` which is used to lookup a preferred object version that obsoletes the -annotated object version. The service follows this reference (similar +annotated object version. The service resolves this reference (similar to a symbolic link in a filesystem) and performs the actual content -retrieval via the version found with that _name_ and _version_. Access +retrieval via the record found with that _hname_ and _hversion_. Access control is processed using the preferred version and the HTTP `Location` response header is also set to identify the preferred name. @@ -31,10 +32,20 @@ version that should have the same content. This is primarily used during an online migration from an old to new server with the `hatrac-migrate` utility script. -The `name` and `version` fields override the default behavior when +The `hname` and `hversion` fields override the default behavior when retrieving content from the storage backend. The default behavior is to use the actual `name` and `version` columns of the respective -Hatrac database records when addressing the storage backend. +Hatrac database records as input to the addressing function of the +storage backend. The `h` prefix means the "Hatrac" value as parsed +from URLs. + +The `version` field overrides the backend storage version ID, +currently only meaningful in the S3 backend. This is relevant when +accessing a versioned bucket, where the addressing function maps the +Hatrac name and version values (e.g. from the URL) to an object key +but there might be a different version ID to access the correct +version of the backend object. + ## Object Renaming @@ -44,9 +55,9 @@ implemented by making coordinated changes to the `aux` column fields described above: 1. A new version record is created under the new/preferred name with -its `name` and `version` aux fields set to refer to the existing -backend storage content addressed by the old/legacy name in use when -it was actually stored. +its `hname`, `hversion`, and `version` aux fields set to refer to the +existing backend storage content addressed by the old/legacy name in +use when it was actually stored. 2. The old version record has its `rename_to` aux field set to point to the new/preferred version record. @@ -57,5 +68,4 @@ During migration, existing object renaming is slightly normalized: rather than recreating the content under the old/legacy storage address. 2. The old/legacy records are kept with `rename_to` so that they -continue to allow HTTP access via the same logic as the pre-migration -system. +continue to allow HTTP access via legacy URLs. diff --git a/hatrac/core.py b/hatrac/core.py index b74f8e8..43a03bc 100644 --- a/hatrac/core.py +++ b/hatrac/core.py @@ -264,6 +264,11 @@ def get_content(self, client_context, get_data=True): def encode(self): return self + +class _Sentinel (object): pass +_sentinel = _Sentinel() + + class Metadata (dict): _all_keys = { @@ -400,10 +405,13 @@ def update(self, updates): k = k.lower() if k not in self or self[k] != v: self[k.lower()] = v - - def pop(self, k): + + def pop(self, k, default=_sentinel): k = k.lower() - return dict.pop(self, k) + if not isinstance(default, _Sentinel): + return dict.pop(self, k, default) + else: + return dict.pop(self, k) class Redirect(object): diff --git a/hatrac/model/directory/pgsql.py b/hatrac/model/directory/pgsql.py index 5e86d62..728ff43 100644 --- a/hatrac/model/directory/pgsql.py +++ b/hatrac/model/directory/pgsql.py @@ -58,6 +58,9 @@ from ...core import coalesce, Metadata, sql_literal, sql_identifier, Redirect, hatrac_debug, web_storage, negotiated_content_type from ... import core +class _Sentinel (object): pass +_sentinel = _Sentinel() + def regexp_escape(s): safe = set( [ chr(i) for i in range(ord('a'), ord('z')+1) ] @@ -221,11 +224,11 @@ def delete(self, client_context): """Delete resource and its children.""" return self.directory.delete_name(self, client_context) - def update_metadata(self, updates, client_context): - self.directory.update_resource_metadata(self, updates, client_context) + def update_metadata(self, client_context, updates): + self.directory.update_resource_metadata(self, client_context, updates) - def pop_metadata(self, fieldname, client_context): - self.directory.pop_resource_metadata(self, fieldname, client_context) + def pop_metadata(self, client_context, fieldname, default=_sentinel): + self.directory.pop_resource_metadata(self, client_context, fieldname, default) def set_acl(self, access, acl, client_context): self.directory.set_resource_acl(self, access, acl, client_context) @@ -1074,20 +1077,38 @@ def object_rename_from(self, dst_object, src_object, src_version_ids, copy_acls, dst_versions = [] for src_version in src_versions: - # flatten dst aux to refer to src storage config + # sanity check while allowing idempotent repetition of same rename request + if 'rename_to' in src_version.aux: + hname, hversion = src_version.aux['rename_to'] + if hname != dst_object.name or hversion != src_version.version: + raise core.Conflict("Source %s has conflicting rename target %s:%s" % ( + src_version.asurl(), + hname, + hversion, + )) + + # fabricate dst aux to refer to src storage aux = dict(src_version.aux) + aux.setdefault('hname', src_object.name) + if aux.get('hversion', src_version.version) == src_version.version: + # avoid copying hversion when it will match synchronized dst_version.version + aux.pop('hversion', None) + else: + # for safety, if an alien hversion has been configured by some other means... + aux['hversion'] = src_version.version + # never copy over the rename_to... aux.pop('rename_to', None) - aux.setdefault('name', src_object.name) - aux.setdefault('version', src_version.version) - if aux['version'] == src_version.version: - aux.pop('version') - # idempotent if the stored-as config is consistent... try: + # sanity check existing dst_version for idempotent creation dst_version = self.version_resolve(dst_object, src_version.version, False, conn=conn, cur=cur) - if (dst_version.aux.get('name'), dst_version.aux.get('version', dst_version.version)) != (aux['name'], aux.get('version', dst_version.version)): + if ( (dst_version.aux.get('hname', dst_version.name), + dst_version.aux.get('hversion', dst_version.version)) + != (src_object.name, + src_version.version) ): raise core.Conflict('Existing destination version %s conflicts with rename request' % (dst_version.asurl(),)) except core.NotFound as ev: + # create dst_version dst_version = HatracObjectVersion( self, dst_object, **self._create_version(conn, cur, dst_object, src_version.nbytes, src_version.metadata, aux) @@ -1112,8 +1133,8 @@ def version_aux_update(self, resource, updates, client_context=None, conn=None, self._update_reource_aux(conn, cur, resource, updates) @db_wrap() - def version_aux_pop(self, resource, fieldname, client_context=None, conn=None, cur=None): - self._pop_resource_aux(conn, cur, resource, fieldname) + def version_aux_pop(self, resource, fieldname, default=_sentinel, client_context=None, conn=None, cur=None): + self._pop_resource_aux(conn, cur, resource, fieldname, default) @db_wrap(enforce_acl=(1, 3, ['owner', 'update', 'ancestor_owner', 'ancestor_update'])) def create_version_upload_job(self, object, chunksize, client_context, nbytes=None, metadata={}, conn=None, cur=None): @@ -1179,12 +1200,12 @@ def get_version_content_range(self, object, objversion, get_slice, client_contex """Return (nbytes, data_generator) pair for specific version.""" if objversion.is_deleted: raise core.NotFound('Resource %s is not available.' % objversion) - name = objversion.aux.get('name', object.name) - version = objversion.aux.get('version', objversion.version) + hname = objversion.aux.get('hname', object.name) + hversion = objversion.aux.get('hversion', objversion.version) if get_data: nbytes, metadata, data = self.storage.get_content_range( - name, - version, + hname, + hversion, objversion.metadata, get_slice, objversion.aux, @@ -1237,13 +1258,13 @@ def get_current_version(self, object, conn=None, cur=None): else: raise core.Conflict('Object %s currently has no content.' % object) - @db_wrap(enforce_acl=(1, 3, ['owner', 'ancestor_owner'])) - def update_resource_metadata(self, resource, updates, client_context, conn=None, cur=None): + @db_wrap(enforce_acl=(1, 2, ['owner', 'ancestor_owner'])) + def update_resource_metadata(self, resource, client_context, updates, conn=None, cur=None): self._update_resource_metadata(conn, cur, resource, updates) - @db_wrap(enforce_acl=(1, 3, ['owner', 'ancestor_owner'])) - def pop_resource_metadata(self, resource, fieldname, client_context, conn=None, cur=None): - self._pop_resource_metadata(conn, cur, resource, fieldname) + @db_wrap(enforce_acl=(1, 2, ['owner', 'ancestor_owner'])) + def pop_resource_metadata(self, resource, client_context, fieldname, default=_sentinel, conn=None, cur=None): + self._pop_resource_metadata(conn, cur, resource, fieldname, default) @db_wrap(enforce_acl=(1, 4, ['owner', 'ancestor_owner'])) def set_resource_acl_role(self, resource, access, role, client_context, conn=None, cur=None): @@ -1314,10 +1335,13 @@ def _update_resource_aux(self, conn, cur, resource, updates): ) ) - def _pop_resource_metadata(self, conn, cur, resource, fieldname): - if resource.aux is None: - resource.aux = dict() - resource.metadata.pop(fieldname) + def _pop_resource_metadata(self, conn, cur, resource, fieldname, default=_sentinel): + if resource.metadata is None: + resource.metadata = dict() + if not isinstance(default, _Sentinel): + resource.metadata.pop(fieldname, default) + else: + resource.metadata.pop(fieldname) cur.execute(""" UPDATE hatrac.%(table)s n SET metadata = %(metadata)s @@ -1329,8 +1353,13 @@ def _pop_resource_metadata(self, conn, cur, resource, fieldname): ) ) - def _pop_resource_aux(self, conn, cur, resource, fieldname): - resource.aux.pop(fieldname) + def _pop_resource_aux(self, conn, cur, resource, fieldname, default=_sentinel): + if resource.aux is None: + resource.aux = dict() + if not isinstance(default, _Sentinel): + resource.aux.pop(fieldname, default) + else: + resource.aux.pop(fieldname) cur.execute(""" UPDATE hatrac.%(table)s n SET aux = %(metadata)s diff --git a/hatrac/rest/metadata.py b/hatrac/rest/metadata.py index 6954e42..ef93499 100644 --- a/hatrac/rest/metadata.py +++ b/hatrac/rest/metadata.py @@ -37,8 +37,8 @@ def put(self, fieldname, path="/", name="", version=""): self.http_check_preconditions('PUT') resource.update_metadata( + hatrac_ctx.webauthn2_context, hatrac_ctx.hatrac_directory.metadata_from_http({ fieldname: value }), - hatrac_ctx.webauthn2_context ) return self.update_response() @@ -57,8 +57,9 @@ def delete(self, fieldname, path="/", name="", version=""): self.http_check_preconditions('DELETE') resource.pop_metadata( + hatrac_ctx.webauthn2_context, fieldname, - hatrac_ctx.webauthn2_context + None, ) return self.update_response() diff --git a/test/rest-smoketest.sh b/test/rest-smoketest.sh index 5712370..0429da4 100644 --- a/test/rest-smoketest.sh +++ b/test/rest-smoketest.sh @@ -377,6 +377,7 @@ md5=$(mymd5sum < $0) sha=$(mysha256sum < $0) script_size=$(stat -c "%s" $0) +dotest "201::text/uri-list::*" /ns-${RUNKEY}/foo/obj1 -X PUT -T $0 -H "Content-Type: application/x-bash" dotest "201::text/uri-list::*" /ns-${RUNKEY}/foo/obj1 -X PUT -T $0 -H "Content-Type: application/x-bash" obj1_vers0="$(cat ${RESPONSE_CONTENT})" obj1_vers0="${obj1_vers0#/hatrac}" @@ -426,6 +427,50 @@ dotest_anon "401::*::*" "/ns-${RUNKEY}/foo/obj1;versions" dotest_anon "200::*::*" "/ns-${RUNKEY}/foo/obj1;metadata/" dotest_anon "200::*::*" "${obj1_vers0};metadata/" +# test object rename w/ default ACLs +cat > ${TEST_DATA} < ${TEST_DATA} <