Skip to content

Commit

Permalink
feat: map codelists value from codelists mapping template file
Browse files Browse the repository at this point in the history
  • Loading branch information
yshalenyk committed Aug 8, 2024
1 parent 367bb94 commit fdc6892
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 33 deletions.
58 changes: 31 additions & 27 deletions nightingale/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ def load_config(config_file):
)
@click.option("--datasource", type=str, help="Datasource connection string")
@click.option("--mapping-file", type=click_pathlib.Path(exists=True), help="Mapping file path")
@click.option("--codelists-file", type=click_pathlib.Path(exists=True), help="Codelists mapping file path")
@click.option("--ocid-prefix", type=str, help="OCID prefix")
@click.option("--selector", type=click_pathlib.Path(exists=True), help="Path to selector SQL script")
@click.option("--force-publish", is_flag=True, help="Force publish")
Expand All @@ -76,6 +77,7 @@ def run(
loglevel,
datasource,
mapping_file,
codelists_file,
ocid_prefix,
selector,
force_publish,
Expand All @@ -101,37 +103,39 @@ def run(

try:
logger.debug(f"Loading configuration from {config_file}")
config_data = load_config(config_file)
config_data = {}
if config_file:
config_data = load_config(config_file)

# Apply CLI overrides
if datasource:
config_data["datasource"] = {"connection": datasource}
if mapping_file or ocid_prefix or selector or force_publish:
selector_content = config_data["mapping"]["selector"]
if selector:
try:
with open(selector, "r") as f:
selector_content = f.read()
except (OSError, IOError) as e:
raise click.ClickException(f"Error reading selector file {selector}: {e}")
config_data["mapping"] = {
"file": mapping_file or config_data["mapping"]["file"],
"ocid_prefix": ocid_prefix or config_data["mapping"]["ocid_prefix"],
"selector": selector_content,
"force_publish": force_publish
if force_publish is not None
else config_data["mapping"].get("force_publish", False),
}
if publisher or base_uri or version or publisher_uid or publisher_scheme or publisher_uri or extensions:
config_data["publishing"] = {
"publisher": publisher or config_data["publishing"]["publisher"],
"base_uri": base_uri or config_data["publishing"]["base_uri"],
"version": version or config_data["publishing"].get("version", ""),
"publisher_uid": publisher_uid or config_data["publishing"].get("publisher_uid", ""),
"publisher_scheme": publisher_scheme or config_data["publishing"].get("publisher_scheme", ""),
"publisher_uri": publisher_uri or config_data["publishing"].get("publisher_uri", ""),
"extensions": list(extensions) if extensions else config_data["publishing"].get("extensions", []),
}
selector_content = config_data["mapping"]["selector"]
if selector:
try:
with open(selector, "r") as f:
selector_content = f.read()
except (OSError, IOError) as e:
raise click.ClickException(f"Error reading selector file {selector}: {e}")
# TODO: simplify this
config_data["mapping"] = {
"file": mapping_file or config_data["mapping"]["file"],
"codelists": codelists_file or config_data["mapping"]["codelists"],
"ocid_prefix": ocid_prefix or config_data["mapping"]["ocid_prefix"],
"selector": selector_content,
"force_publish": force_publish
if force_publish is not None
else config_data["mapping"].get("force_publish", False),
}
config_data["publishing"] = {
"publisher": publisher or config_data["publishing"]["publisher"],
"base_uri": base_uri or config_data["publishing"]["base_uri"],
"version": version or config_data["publishing"].get("version", ""),
"publisher_uid": publisher_uid or config_data["publishing"].get("publisher_uid", ""),
"publisher_scheme": publisher_scheme or config_data["publishing"].get("publisher_scheme", ""),
"publisher_uri": publisher_uri or config_data["publishing"].get("publisher_uri", ""),
"extensions": list(extensions) if extensions else config_data["publishing"].get("extensions", []),
}
if output_directory:
config_data["output"] = {"directory": output_directory}

Expand Down
1 change: 1 addition & 0 deletions nightingale/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ class Mapping:
ocid_prefix: str
selector: str
force_publish: Optional[bool] = False
codelists: Optional[Path] = None


@dataclass(frozen=True)
Expand Down
27 changes: 22 additions & 5 deletions nightingale/mapper.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import logging
from typing import Any
from typing import Any, Optional

import dict_hash

from .codelists import CodelistsMapping
from .config import Config
from .mapping_template.v09 import MappingTemplate, MappingTemplateValidator
from .utils import get_iso_now, is_new_array, remove_dicts_without_id
Expand Down Expand Up @@ -50,6 +51,9 @@ def map(self, loader: Any, validate_mapping: bool = False) -> list[dict[str, Any
"""
config = self.config.mapping
mapping = MappingTemplate(config)
codelists = None
if config.codelists:
codelists = CodelistsMapping(config)
logger.info("MappingTemplate data loaded")
data = loader.load(config.selector)
logger.info("Source data is loaded...")
Expand All @@ -59,9 +63,11 @@ def map(self, loader: Any, validate_mapping: bool = False) -> list[dict[str, Any
validator.validate_data_elements()
validator.validate_selector(data[0])
logger.info("Start mapping data")
return self.transform_data(data, mapping)
return self.transform_data(data, mapping, codelists=codelists)

def transform_data(self, data: list[dict[Any, Any]], mapping: MappingTemplate) -> list[dict[str, Any]]:
def transform_data(
self, data: list[dict[Any, Any]], mapping: MappingTemplate, codelists: Optional[CodelistsMapping] = None
) -> list[dict[str, Any]]:
"""
Transform the input data to the OCDS format.
Expand Down Expand Up @@ -89,7 +95,7 @@ def transform_data(self, data: list[dict[Any, Any]], mapping: MappingTemplate) -
curr_ocid = ocid
curr_release = {}

curr_release = self.transform_row(row, mapping, mapping.get_schema(), curr_release)
curr_release = self.transform_row(row, mapping, mapping.get_schema(), curr_release, codelists=codelists)

if curr_release:
self.finish_release(curr_ocid, curr_release, mapped)
Expand All @@ -111,6 +117,7 @@ def transform_row(
mapping_config: MappingTemplate,
flattened_schema: dict[str, Any],
result: dict = None,
codelists: Optional[CodelistsMapping] = None,
) -> dict:
"""
Transform a single row of input data to the OCDS format.
Expand All @@ -128,6 +135,7 @@ def transform_row(
"""

def set_nested_value(nested_dict, keys, value, schema, add_new=False):
value = self.map_codelist_value(keys, schema, codelists, value)
for i, key in enumerate(keys[:-1]):
if isinstance(nested_dict, list):
if not nested_dict:
Expand Down Expand Up @@ -200,7 +208,7 @@ def set_nested_value(nested_dict, keys, value, schema, add_new=False):
current[key] = [] if flattened_schema.get(key_path, {}).get("type") == "array" else {}

current = current[key]

value = self.map_codelist_value(keys, flattened_schema, codelists, value)
if isinstance(current, list):
if not current:
current.append({})
Expand Down Expand Up @@ -270,3 +278,12 @@ def remove_empty_id_arrays(self, data: Any) -> Any:
"""

return remove_dicts_without_id(data)

def map_codelist_value(self, keys, schema, codelists, value):
path = "/" + "/".join(keys)
if codelist := schema.get(path, {}).get("codelist"):
codelist = codelists.get_mapping_for_codelist(codelist)
if codelist:
if new_value := codelist.get(value):
return new_value
return value
3 changes: 2 additions & 1 deletion nightingale/mapping_template/v09/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def read_schema_sheet(self):
schema = {}
for sheet in sheets:
for row in sheet.iter_rows(min_row=2, values_only=True):
_, path, title, description, type, range, values, links, *_ = row
_, path, title, description, type, range, values, links, codelist, *_ = row
if not path:
continue
path = "/" + path
Expand All @@ -133,6 +133,7 @@ def read_schema_sheet(self):
"range": range,
"values": values,
"links": links,
"codelist": codelist,
}
return schema

Expand Down

0 comments on commit fdc6892

Please sign in to comment.