Skip to content

Commit

Permalink
Merge pull request #232 from metno/issue_228
Browse files Browse the repository at this point in the history
reject if namespace contains string mismatched with env
  • Loading branch information
mortenwh authored Nov 13, 2024
2 parents 8b47573 + 90301d7 commit 012da77
Show file tree
Hide file tree
Showing 3 changed files with 260 additions and 41 deletions.
77 changes: 42 additions & 35 deletions dmci/api/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,7 @@

class Worker:

CALL_MAP = {
"file": FileDist,
"pycsw": PyCSWDist,
"solr": SolRDist
}
CALL_MAP = {"file": FileDist, "pycsw": PyCSWDist, "solr": SolRDist}

def __init__(self, cmd, xml_file, xsd_validator, **kwargs):

Expand Down Expand Up @@ -87,7 +83,7 @@ def validate(self, data):
# Takes in bytes-object data
# Gives msg when both validating and not validating
valid = False
msg = ''
msg = ""
if not isinstance(data, bytes):
return False, "Input must be bytes type", data

Expand All @@ -104,48 +100,63 @@ def validate(self, data):
if not valid:
return valid, msg, data

# Make sure that datasets in dev (e.g., no.met.dev) and
# staging (e.g., no.met.staging) cannot be added to wrong
# environments
if (".dev" in self._namespace and self._conf.env_string != "dev") or (
".staging" in self._namespace and self._conf.env_string != "staging"
):
msg = (
f"Namespace {self._namespace} does not match "
f"the env {self._conf.env_string}"
)
return False, msg, data

if self._conf.env_string:

# Append env string to namespace in metadata_identifier
logger.debug("Identifier namespace: %s" % self._namespace)
logger.debug("Environment customization: %s" %
self._conf.env_string)
logger.debug("Environment customization: %s" % self._conf.env_string)
ns_re_pattern = re.compile(r"\w.\w." + self._conf.env_string)

if re.search(ns_re_pattern, self._namespace) is None:
full_namespace = f"{self._namespace}.{self._conf.env_string}"
data = re.sub(
str.encode(
f"<mmd:metadata_identifier>{self._namespace}"),
str.encode(
f"<mmd:metadata_identifier>{full_namespace}"),
str.encode(f"<mmd:metadata_identifier>{self._namespace}"),
str.encode(f"<mmd:metadata_identifier>{full_namespace}"),
data,
)
self._namespace = full_namespace

# Append env string to the namespace in the parent block, if present
if bool(re.search(b'<mmd:related_dataset relation_type="parent">', data)):
if bool(
re.search(b'<mmd:related_dataset relation_type="parent">', data)
):
match_parent_block = re.search(
b'<mmd:related_dataset relation_type="parent">(.+?)</mmd:related_dataset>',
data
data,
)
found_parent_block_content = match_parent_block.group(1)
found_parent_block_content = found_parent_block_content.split(
b":")
found_parent_block_content = found_parent_block_content.split(b":")
if len(found_parent_block_content) != 2:
err = f"Malformed parent dataset identifier {found_parent_block_content}"
logger.error(err)
return False, err, data
old_parent_namespace = found_parent_block_content[0].decode()
logger.debug("Parent dataset namespace: %s" %
old_parent_namespace)
logger.debug("Parent dataset namespace: %s" % old_parent_namespace)
if re.search(ns_re_pattern, old_parent_namespace) is None:
new_parent_namespace = f"{old_parent_namespace}.{self._conf.env_string}"
new_parent_namespace = (
f"{old_parent_namespace}.{self._conf.env_string}"
)
data = re.sub(
str.encode(f'<mmd:related_dataset '
f'relation_type="parent">{old_parent_namespace}'),
str.encode(f'<mmd:related_dataset '
f'relation_type="parent">{new_parent_namespace}'),
str.encode(
f"<mmd:related_dataset "
f'relation_type="parent">{old_parent_namespace}'
),
str.encode(
f"<mmd:related_dataset "
f'relation_type="parent">{new_parent_namespace}'
),
data,
)

Expand Down Expand Up @@ -191,8 +202,7 @@ def distribute(self):
xml_file=self._dist_xml_file,
metadata_UUID=self._dist_metadata_id_uuid,
worker=self,
path_to_parent_list=self._kwargs.get(
"path_to_parent_list", None),
path_to_parent_list=self._kwargs.get("path_to_parent_list", None),
)
valid &= obj.is_valid()
if obj.is_valid():
Expand Down Expand Up @@ -278,8 +288,7 @@ def _add_landing_page(self, data, catalog_url, uuid):
f"\n <mmd:description/>\n "
f"<mmd:resource>{catalog_url}/dataset/{uuid}</mmd:resource>\n "
)
data_mod = re.sub(found_datasetlandingpage,
datasetlandingpage_mod, data)
data_mod = re.sub(found_datasetlandingpage, datasetlandingpage_mod, data)

return data_mod

Expand All @@ -304,17 +313,15 @@ def _extract_metadata_id(self, xml_doc):
# only accept if format is uri:UUID, both need to be present
words = xml_entry.text.split(":")
if len(words) != 2:
logger.error(
"metadata_identifier not formed as namespace:UUID")
logger.error("metadata_identifier not formed as namespace:UUID")
return False
namespace, file_uuid = words

logger.info("XML file metadata_identifier: %s:%s" %
(namespace, file_uuid))
logger.debug(
"XML file metadata_identifier namespace: %s", namespace)
logger.debug(
"XML file metadata_identifier UUID: %s", file_uuid)
logger.info(
"XML file metadata_identifier: %s:%s" % (namespace, file_uuid)
)
logger.debug("XML file metadata_identifier namespace: %s", namespace)
logger.debug("XML file metadata_identifier UUID: %s", file_uuid)
break

if file_uuid == "":
Expand Down
156 changes: 156 additions & 0 deletions tests/files/api/staging.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
<mmd:mmd xmlns:mmd="http://www.met.no/schema/mmd" xmlns:gml="http://www.opengis.net/gml">
<mmd:metadata_identifier>test.no.staging:a1ddaf0f-cae0-4a15-9b37-3468e9cb1a2b</mmd:metadata_identifier>
<mmd:title xml:lang="en">Direct Broadcast data processed in satellite swath to L1C</mmd:title>
<mmd:title xml:lang="nor">Direktesendte satellittdata prosessert i satellittsveip til L1C.</mmd:title>
<mmd:abstract xml:lang="en">Direct Broadcast data received at MET NORWAY Oslo. Processed by standard processing software to geolocated and calibrated values in satellite swath in received instrument resolution.</mmd:abstract>
<mmd:abstract xml:lang="nor">Direktesendte satellittdata mottatt ved Meteorologisk Institutt Oslo. Prosessert med standard prosesseringssoftware til geolokaliserte og kalibrerte verdier i satellitsveip i mottatt instrument oppløsning.</mmd:abstract>
<mmd:metadata_status>Active</mmd:metadata_status>
<mmd:dataset_production_status>Complete</mmd:dataset_production_status>
<mmd:collection>METNCS</mmd:collection>
<mmd:last_metadata_update>
<mmd:update>
<mmd:datetime>2021-04-29T00:46:05Z</mmd:datetime>
<mmd:type>Created</mmd:type>
</mmd:update>
</mmd:last_metadata_update>
<mmd:temporal_extent>
<mmd:start_date>2021-04-29T00:28:44.977627Z</mmd:start_date>
<mmd:end_date>2021-04-29T00:39:55.000000Z</mmd:end_date>
</mmd:temporal_extent>
<mmd:iso_topic_category>climatologyMeteorologyAtmosphere</mmd:iso_topic_category>
<mmd:iso_topic_category>environment</mmd:iso_topic_category>
<mmd:iso_topic_category>oceans</mmd:iso_topic_category>
<mmd:keywords vocabulary="GCMD">
<mmd:keyword>Earth Science &gt; Atmosphere &gt; Atmospheric radiation</mmd:keyword>
<mmd:resource>https://gcmdservices.gsfc.nasa.gov/static/kms/</mmd:resource>
<mmd:separator></mmd:separator>
</mmd:keywords>
<mmd:keywords vocabulary="GEMET">
<mmd:keyword>Meteorological geographical features</mmd:keyword>
<mmd:keyword>Atmospheric conditions</mmd:keyword>
<mmd:keyword>Oceanographic geographical features</mmd:keyword>
<mmd:resource>http://inspire.ec.europa.eu/theme</mmd:resource>
<mmd:separator></mmd:separator>
</mmd:keywords>
<mmd:keywords vocabulary="Norwegian thematic categories">
<mmd:keyword>Weather and climate</mmd:keyword>
<mmd:resource>https://register.geonorge.no/subregister/metadata-kodelister/kartverket/nasjonal-temainndeling</mmd:resource>
<mmd:separator></mmd:separator>
</mmd:keywords>
<mmd:geographic_extent>
<mmd:rectangle srsName="EPSG:4326">
<mmd:north>80.49233</mmd:north>
<mmd:south>36.540688</mmd:south>
<mmd:east>79.40124</mmd:east>
<mmd:west>1.5549301</mmd:west>
</mmd:rectangle>
</mmd:geographic_extent>
<mmd:dataset_language>en</mmd:dataset_language>
<mmd:operational_status>Operational</mmd:operational_status>
<mmd:use_constraint>
<mmd:identifier>CC-BY-4.0</mmd:identifier>
<mmd:resource>http://spdx.org/licenses/CC-BY-4.0</mmd:resource>
</mmd:use_constraint>
<mmd:personnel>
<mmd:role>Technical contact</mmd:role>
<mmd:name>DIVISION FOR OBSERVATION QUALITY AND DATA PROCESSING</mmd:name>
<mmd:email>[email protected]</mmd:email>
<mmd:organisation>MET NORWAY</mmd:organisation>
</mmd:personnel>
<mmd:personnel>
<mmd:role>Metadata author</mmd:role>
<mmd:name>DIVISION FOR OBSERVATION QUALITY AND DATA PROCESSING</mmd:name>
<mmd:email>[email protected]</mmd:email>
<mmd:organisation>unknown</mmd:organisation>
</mmd:personnel>
<mmd:data_center>
<mmd:data_center_name>
<mmd:short_name>MET NORWAY</mmd:short_name>
<mmd:long_name>MET NORWAY</mmd:long_name>
</mmd:data_center_name>
<mmd:data_center_url>met.no</mmd:data_center_url>
</mmd:data_center>
<mmd:data_access>
<mmd:type>OPeNDAP</mmd:type>
<mmd:description>Open-source Project for a Network Data Access Protocol</mmd:description>
<mmd:resource>https://thredds.met.no/thredds/dodsC/remotesensingsatellite/polar-swath/2021/04/29/aqua-modis-1km-20210429002844-20210429003955.nc</mmd:resource>
</mmd:data_access>
<mmd:data_access>
<mmd:type>OGC WMS</mmd:type>
<mmd:description>OGC Web Mapping Service, URI to GetCapabilities Document.</mmd:description>
<mmd:resource>https://thredds.met.no/thredds/wms/remotesensingsatellite/polar-swath/2021/04/29/aqua-modis-1km-20210429002844-20210429003955.nc?service=WMS&amp;version=1.3.0&amp;request=GetCapabilities</mmd:resource>
<mmd:wms_layers>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_brightness_temperature</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
<mmd:wms_layer>toa_bidirectional_reflectance</mmd:wms_layer>
</mmd:wms_layers>
</mmd:data_access>
<mmd:data_access>
<mmd:type>HTTP</mmd:type>
<mmd:description>Direct download of file</mmd:description>
<mmd:resource>https://thredds.met.no/thredds/fileServer/remotesensingsatellite/polar-swath/2021/04/29/aqua-modis-1km-20210429002844-20210429003955.nc</mmd:resource>
</mmd:data_access>
<mmd:related_dataset relation_type="parent">test.no.staging:64db6102-14ce-41e9-b93b-61dbb2cb8b4e</mmd:related_dataset>
<mmd:storage_information>
<mmd:file_name>aqua-modis-1km-20210429002844-20210429003955.nc</mmd:file_name>
<mmd:file_location>/lustre/storeB/immutable/archive/projects/remotesensing/satellite-thredds/polar-swath/2021/04/29/aqua-modis-1km-20210429002844-20210429003955.nc</mmd:file_location>
<mmd:file_format>NetCDF-CF</mmd:file_format>
<mmd:file_size unit="MB">1862.00</mmd:file_size>
<mmd:checksum type="md5sum">4e1833610272ee63228f575d1c875fbe</mmd:checksum>
</mmd:storage_information>
<mmd:project>
<mmd:short_name>Govermental core service</mmd:short_name>
<mmd:long_name>Govermental core service</mmd:long_name>
</mmd:project>
<mmd:platform>
<mmd:short_name>Aqua</mmd:short_name>
<mmd:long_name>Aqua</mmd:long_name>
<mmd:resource>https://www.wmo-sat.info/oscar/satellites/view/aqua</mmd:resource>
<mmd:instrument>
<mmd:short_name>MODIS</mmd:short_name>
<mmd:long_name>MODIS</mmd:long_name>
<mmd:resource>https://www.wmo-sat.info/oscar/instruments/view/modis</mmd:resource>
</mmd:instrument>
</mmd:platform>
<mmd:activity_type>Space Borne Instrument</mmd:activity_type>
<mmd:dataset_citation>
<mmd:author>DIVISION FOR OBSERVATION QUALITY AND DATA PROCESSING</mmd:author>
<mmd:publication_date>2021-04-29</mmd:publication_date>
<mmd:title>Direct Broadcast data processed in satellite swath to L1C</mmd:title>
</mmd:dataset_citation>
</mmd:mmd>
Loading

0 comments on commit 012da77

Please sign in to comment.