diff --git a/bin/hatrac-migrate b/bin/hatrac-migrate index 0ce75b5..f3bab5c 100644 --- a/bin/hatrac-migrate +++ b/bin/hatrac-migrate @@ -361,12 +361,15 @@ class HatracMigrateCLI (BaseCLI): # 3.3 Post-transfer database updates # We only care about storing the returned aux version in the non-filesystem backend case - version = version if self.backend != "filesystem" else None - self.directory.version_aux_update(obj, {"version": version}) + if self.backend != "filesystem": + self.directory.version_aux_update(obj, {"version": version}) + else: + self.directory.version_aux_pop(obj, "version", None) # On a successful transfer, we will automatically delete the aux redirect url # We will also forget about any legacy storage name inherited from the original system - self.directory.version_aux_pop(obj, "name") - self.directory.version_aux_pop(obj, "url") + self.directory.version_aux_pop(obj, "hname", None) + self.directory.version_aux_pop(obj, "hversion", None) + self.directory.version_aux_pop(obj, "url", None) success = True except Exception as e: logger.error("Error during transfer of [%s] to backend storage: %s" % diff --git a/docs/INTERNALS.md b/docs/INTERNALS.md index a4586c1..bf233b9 100644 --- a/docs/INTERNALS.md +++ b/docs/INTERNALS.md @@ -16,13 +16,14 @@ fields can be processed: 1. `rename_to`: preferred name and version key to service content. 2. `url`: a URL to the version content at a remote hatrac service. -3. `name` and `version`: name and version key to use with the storage backend. +3. `hname` and `hversion`: name and version to override URL parsed values. +4. `version`: version to override backend storage version keying. -The `rename_to` field stores a pair `[` _name_ `,` _version_ `]` which +The `rename_to` field stores a pair `[` _hname_ `,` _hversion_ `]` which is used to lookup a preferred object version that obsoletes the -annotated object version. The service follows this reference (similar +annotated object version. The service resolves this reference (similar to a symbolic link in a filesystem) and performs the actual content -retrieval via the version found with that _name_ and _version_. Access +retrieval via the record found with that _hname_ and _hversion_. Access control is processed using the preferred version and the HTTP `Location` response header is also set to identify the preferred name. @@ -31,10 +32,20 @@ version that should have the same content. This is primarily used during an online migration from an old to new server with the `hatrac-migrate` utility script. -The `name` and `version` fields override the default behavior when +The `hname` and `hversion` fields override the default behavior when retrieving content from the storage backend. The default behavior is to use the actual `name` and `version` columns of the respective -Hatrac database records when addressing the storage backend. +Hatrac database records as input to the addressing function of the +storage backend. The `h` prefix means the "Hatrac" value as parsed +from URLs. + +The `version` field overrides the backend storage version ID, +currently only meaningful in the S3 backend. This is relevant when +accessing a versioned bucket, where the addressing function maps the +Hatrac name and version values (e.g. from the URL) to an object key +but there might be a different version ID to access the correct +version of the backend object. + ## Object Renaming @@ -44,9 +55,9 @@ implemented by making coordinated changes to the `aux` column fields described above: 1. A new version record is created under the new/preferred name with -its `name` and `version` aux fields set to refer to the existing -backend storage content addressed by the old/legacy name in use when -it was actually stored. +its `hname`, `hversion`, and `version` aux fields set to refer to the +existing backend storage content addressed by the old/legacy name in +use when it was actually stored. 2. The old version record has its `rename_to` aux field set to point to the new/preferred version record. @@ -57,5 +68,4 @@ During migration, existing object renaming is slightly normalized: rather than recreating the content under the old/legacy storage address. 2. The old/legacy records are kept with `rename_to` so that they -continue to allow HTTP access via the same logic as the pre-migration -system. +continue to allow HTTP access via legacy URLs. diff --git a/hatrac/core.py b/hatrac/core.py index b74f8e8..43a03bc 100644 --- a/hatrac/core.py +++ b/hatrac/core.py @@ -264,6 +264,11 @@ def get_content(self, client_context, get_data=True): def encode(self): return self + +class _Sentinel (object): pass +_sentinel = _Sentinel() + + class Metadata (dict): _all_keys = { @@ -400,10 +405,13 @@ def update(self, updates): k = k.lower() if k not in self or self[k] != v: self[k.lower()] = v - - def pop(self, k): + + def pop(self, k, default=_sentinel): k = k.lower() - return dict.pop(self, k) + if not isinstance(default, _Sentinel): + return dict.pop(self, k, default) + else: + return dict.pop(self, k) class Redirect(object): diff --git a/hatrac/model/directory/pgsql.py b/hatrac/model/directory/pgsql.py index 5e86d62..728ff43 100644 --- a/hatrac/model/directory/pgsql.py +++ b/hatrac/model/directory/pgsql.py @@ -58,6 +58,9 @@ from ...core import coalesce, Metadata, sql_literal, sql_identifier, Redirect, hatrac_debug, web_storage, negotiated_content_type from ... import core +class _Sentinel (object): pass +_sentinel = _Sentinel() + def regexp_escape(s): safe = set( [ chr(i) for i in range(ord('a'), ord('z')+1) ] @@ -221,11 +224,11 @@ def delete(self, client_context): """Delete resource and its children.""" return self.directory.delete_name(self, client_context) - def update_metadata(self, updates, client_context): - self.directory.update_resource_metadata(self, updates, client_context) + def update_metadata(self, client_context, updates): + self.directory.update_resource_metadata(self, client_context, updates) - def pop_metadata(self, fieldname, client_context): - self.directory.pop_resource_metadata(self, fieldname, client_context) + def pop_metadata(self, client_context, fieldname, default=_sentinel): + self.directory.pop_resource_metadata(self, client_context, fieldname, default) def set_acl(self, access, acl, client_context): self.directory.set_resource_acl(self, access, acl, client_context) @@ -1074,20 +1077,38 @@ def object_rename_from(self, dst_object, src_object, src_version_ids, copy_acls, dst_versions = [] for src_version in src_versions: - # flatten dst aux to refer to src storage config + # sanity check while allowing idempotent repetition of same rename request + if 'rename_to' in src_version.aux: + hname, hversion = src_version.aux['rename_to'] + if hname != dst_object.name or hversion != src_version.version: + raise core.Conflict("Source %s has conflicting rename target %s:%s" % ( + src_version.asurl(), + hname, + hversion, + )) + + # fabricate dst aux to refer to src storage aux = dict(src_version.aux) + aux.setdefault('hname', src_object.name) + if aux.get('hversion', src_version.version) == src_version.version: + # avoid copying hversion when it will match synchronized dst_version.version + aux.pop('hversion', None) + else: + # for safety, if an alien hversion has been configured by some other means... + aux['hversion'] = src_version.version + # never copy over the rename_to... aux.pop('rename_to', None) - aux.setdefault('name', src_object.name) - aux.setdefault('version', src_version.version) - if aux['version'] == src_version.version: - aux.pop('version') - # idempotent if the stored-as config is consistent... try: + # sanity check existing dst_version for idempotent creation dst_version = self.version_resolve(dst_object, src_version.version, False, conn=conn, cur=cur) - if (dst_version.aux.get('name'), dst_version.aux.get('version', dst_version.version)) != (aux['name'], aux.get('version', dst_version.version)): + if ( (dst_version.aux.get('hname', dst_version.name), + dst_version.aux.get('hversion', dst_version.version)) + != (src_object.name, + src_version.version) ): raise core.Conflict('Existing destination version %s conflicts with rename request' % (dst_version.asurl(),)) except core.NotFound as ev: + # create dst_version dst_version = HatracObjectVersion( self, dst_object, **self._create_version(conn, cur, dst_object, src_version.nbytes, src_version.metadata, aux) @@ -1112,8 +1133,8 @@ def version_aux_update(self, resource, updates, client_context=None, conn=None, self._update_reource_aux(conn, cur, resource, updates) @db_wrap() - def version_aux_pop(self, resource, fieldname, client_context=None, conn=None, cur=None): - self._pop_resource_aux(conn, cur, resource, fieldname) + def version_aux_pop(self, resource, fieldname, default=_sentinel, client_context=None, conn=None, cur=None): + self._pop_resource_aux(conn, cur, resource, fieldname, default) @db_wrap(enforce_acl=(1, 3, ['owner', 'update', 'ancestor_owner', 'ancestor_update'])) def create_version_upload_job(self, object, chunksize, client_context, nbytes=None, metadata={}, conn=None, cur=None): @@ -1179,12 +1200,12 @@ def get_version_content_range(self, object, objversion, get_slice, client_contex """Return (nbytes, data_generator) pair for specific version.""" if objversion.is_deleted: raise core.NotFound('Resource %s is not available.' % objversion) - name = objversion.aux.get('name', object.name) - version = objversion.aux.get('version', objversion.version) + hname = objversion.aux.get('hname', object.name) + hversion = objversion.aux.get('hversion', objversion.version) if get_data: nbytes, metadata, data = self.storage.get_content_range( - name, - version, + hname, + hversion, objversion.metadata, get_slice, objversion.aux, @@ -1237,13 +1258,13 @@ def get_current_version(self, object, conn=None, cur=None): else: raise core.Conflict('Object %s currently has no content.' % object) - @db_wrap(enforce_acl=(1, 3, ['owner', 'ancestor_owner'])) - def update_resource_metadata(self, resource, updates, client_context, conn=None, cur=None): + @db_wrap(enforce_acl=(1, 2, ['owner', 'ancestor_owner'])) + def update_resource_metadata(self, resource, client_context, updates, conn=None, cur=None): self._update_resource_metadata(conn, cur, resource, updates) - @db_wrap(enforce_acl=(1, 3, ['owner', 'ancestor_owner'])) - def pop_resource_metadata(self, resource, fieldname, client_context, conn=None, cur=None): - self._pop_resource_metadata(conn, cur, resource, fieldname) + @db_wrap(enforce_acl=(1, 2, ['owner', 'ancestor_owner'])) + def pop_resource_metadata(self, resource, client_context, fieldname, default=_sentinel, conn=None, cur=None): + self._pop_resource_metadata(conn, cur, resource, fieldname, default) @db_wrap(enforce_acl=(1, 4, ['owner', 'ancestor_owner'])) def set_resource_acl_role(self, resource, access, role, client_context, conn=None, cur=None): @@ -1314,10 +1335,13 @@ def _update_resource_aux(self, conn, cur, resource, updates): ) ) - def _pop_resource_metadata(self, conn, cur, resource, fieldname): - if resource.aux is None: - resource.aux = dict() - resource.metadata.pop(fieldname) + def _pop_resource_metadata(self, conn, cur, resource, fieldname, default=_sentinel): + if resource.metadata is None: + resource.metadata = dict() + if not isinstance(default, _Sentinel): + resource.metadata.pop(fieldname, default) + else: + resource.metadata.pop(fieldname) cur.execute(""" UPDATE hatrac.%(table)s n SET metadata = %(metadata)s @@ -1329,8 +1353,13 @@ def _pop_resource_metadata(self, conn, cur, resource, fieldname): ) ) - def _pop_resource_aux(self, conn, cur, resource, fieldname): - resource.aux.pop(fieldname) + def _pop_resource_aux(self, conn, cur, resource, fieldname, default=_sentinel): + if resource.aux is None: + resource.aux = dict() + if not isinstance(default, _Sentinel): + resource.aux.pop(fieldname, default) + else: + resource.aux.pop(fieldname) cur.execute(""" UPDATE hatrac.%(table)s n SET aux = %(metadata)s diff --git a/hatrac/rest/metadata.py b/hatrac/rest/metadata.py index 6954e42..ef93499 100644 --- a/hatrac/rest/metadata.py +++ b/hatrac/rest/metadata.py @@ -37,8 +37,8 @@ def put(self, fieldname, path="/", name="", version=""): self.http_check_preconditions('PUT') resource.update_metadata( + hatrac_ctx.webauthn2_context, hatrac_ctx.hatrac_directory.metadata_from_http({ fieldname: value }), - hatrac_ctx.webauthn2_context ) return self.update_response() @@ -57,8 +57,9 @@ def delete(self, fieldname, path="/", name="", version=""): self.http_check_preconditions('DELETE') resource.pop_metadata( + hatrac_ctx.webauthn2_context, fieldname, - hatrac_ctx.webauthn2_context + None, ) return self.update_response() diff --git a/test/rest-smoketest.sh b/test/rest-smoketest.sh index 5712370..0429da4 100644 --- a/test/rest-smoketest.sh +++ b/test/rest-smoketest.sh @@ -377,6 +377,7 @@ md5=$(mymd5sum < $0) sha=$(mysha256sum < $0) script_size=$(stat -c "%s" $0) +dotest "201::text/uri-list::*" /ns-${RUNKEY}/foo/obj1 -X PUT -T $0 -H "Content-Type: application/x-bash" dotest "201::text/uri-list::*" /ns-${RUNKEY}/foo/obj1 -X PUT -T $0 -H "Content-Type: application/x-bash" obj1_vers0="$(cat ${RESPONSE_CONTENT})" obj1_vers0="${obj1_vers0#/hatrac}" @@ -426,6 +427,50 @@ dotest_anon "401::*::*" "/ns-${RUNKEY}/foo/obj1;versions" dotest_anon "200::*::*" "/ns-${RUNKEY}/foo/obj1;metadata/" dotest_anon "200::*::*" "${obj1_vers0};metadata/" +# test object rename w/ default ACLs +cat > ${TEST_DATA} < ${TEST_DATA} <