diff --git a/simple/sample/output/observations_countries.csv b/simple/sample/output/observations_countries.csv index cbf51675..94fd2d0c 100644 --- a/simple/sample/output/observations_countries.csv +++ b/simple/sample/output/observations_countries.csv @@ -1,4 +1,4 @@ -dcid,date,var1,var2 +dcid,date,custom/statvar_1,custom/statvar_2 country/AFG,2023,0.19,6 country/YEM,2023,0.21,56 country/AGO,2023,0.29,6 diff --git a/simple/sample/output/observations_geoids.csv b/simple/sample/output/observations_geoids.csv index 57fdec35..c968a271 100644 --- a/simple/sample/output/observations_geoids.csv +++ b/simple/sample/output/observations_geoids.csv @@ -1,3 +1,3 @@ -dcid,date,Variable 1,Variable 2 +dcid,date,custom/statvar_3,custom/statvar_4 geoId/01,2021,555,666 geoId/122,2022,321,123456 diff --git a/simple/sample/output/observations_latlng.csv b/simple/sample/output/observations_latlng.csv index abbe5b65..cdeb76db 100644 --- a/simple/sample/output/observations_latlng.csv +++ b/simple/sample/output/observations_latlng.csv @@ -1,3 +1,3 @@ -dcid,date,var1,var2 +dcid,date,custom/statvar_1,custom/statvar_2 country/USA,2021,555,666 country/IND,2022,321,123 diff --git a/simple/sample/output/observations_powerplants.csv b/simple/sample/output/observations_powerplants.csv index 35e759a9..ed5415a6 100644 --- a/simple/sample/output/observations_powerplants.csv +++ b/simple/sample/output/observations_powerplants.csv @@ -1,4 +1,4 @@ -dcid,date,var1,var2 +dcid,date,custom/statvar_1,custom/statvar_2 dc/000qxlm93vn93,2023,0.19,6 dc/5c7tz3lbln3p,2023,0.21,56 dc/8zmh7ctlkbsc4,2023,0.29,6 diff --git a/simple/sample/output/observations_s2cells.csv b/simple/sample/output/observations_s2cells.csv index c425a2f6..5266d8c1 100644 --- a/simple/sample/output/observations_s2cells.csv +++ b/simple/sample/output/observations_s2cells.csv @@ -1,3 +1,3 @@ -dcid,date,var1,var2 +dcid,date,custom/statvar_1,custom/statvar_2 s2CellId/0x80982b0000000000,2021,555,666 s2CellId/0x3be7c90000000000,2022,321,123 diff --git a/simple/sample/output/observations_wikidataids.csv b/simple/sample/output/observations_wikidataids.csv index abbe5b65..cdeb76db 100644 --- a/simple/sample/output/observations_wikidataids.csv +++ b/simple/sample/output/observations_wikidataids.csv @@ -1,3 +1,3 @@ -dcid,date,var1,var2 +dcid,date,custom/statvar_1,custom/statvar_2 country/USA,2021,555,666 country/IND,2022,321,123 diff --git a/simple/sample/output/process/debug_resolve_powerplants.csv b/simple/sample/output/process/debug_resolve_powerplants.csv index 98b6d0b1..a5b03a0e 100644 --- a/simple/sample/output/process/debug_resolve_powerplants.csv +++ b/simple/sample/output/process/debug_resolve_powerplants.csv @@ -1,6 +1,6 @@ input,dcid,link -FOO BAR,*UNRESOLVED*, BAZ BAR,*UNRESOLVED*, +FOO BAR,*UNRESOLVED*, Suzlon Project,dc/000qxlm93vn93,https://datacommons.org/browser/dc/000qxlm93vn93 Crete Energy Venture,dc/5c7tz3lbln3p,https://datacommons.org/browser/dc/5c7tz3lbln3p Watchtower Educational Center,dc/8zmh7ctlkbsc4,https://datacommons.org/browser/dc/8zmh7ctlkbsc4 diff --git a/simple/sample/output/process/report.json b/simple/sample/output/process/report.json index ec45b91a..52d31941 100644 --- a/simple/sample/output/process/report.json +++ b/simple/sample/output/process/report.json @@ -1,37 +1,37 @@ { "status": "SUCCESS", - "startTime": "2023-10-23 10:50:22.644858", - "lastUpdate": "2023-10-23 10:50:30.534262", + "startTime": "2023-10-23 14:43:10.816166", + "lastUpdate": "2023-10-23 14:43:17.797630", "importFiles": { "countries.csv": { "status": "SUCCESS", - "startTime": "2023-10-23 10:50:22.645832", - "lastUpdate": "2023-10-23 10:50:23.689136" + "startTime": "2023-10-23 14:43:10.816848", + "lastUpdate": "2023-10-23 14:43:11.638482" }, "geoids.csv": { "status": "SUCCESS", - "startTime": "2023-10-23 10:50:23.693980", - "lastUpdate": "2023-10-23 10:50:23.697093" + "startTime": "2023-10-23 14:43:11.641372", + "lastUpdate": "2023-10-23 14:43:11.644468" }, "latlng.csv": { "status": "SUCCESS", - "startTime": "2023-10-23 10:50:23.698691", - "lastUpdate": "2023-10-23 10:50:24.975047" + "startTime": "2023-10-23 14:43:11.645726", + "lastUpdate": "2023-10-23 14:43:12.729606" }, "powerplants.csv": { "status": "SUCCESS", - "startTime": "2023-10-23 10:50:24.977544", - "lastUpdate": "2023-10-23 10:50:30.267462" + "startTime": "2023-10-23 14:43:12.732431", + "lastUpdate": "2023-10-23 14:43:17.590325" }, "s2cells.csv": { "status": "SUCCESS", - "startTime": "2023-10-23 10:50:30.268803", - "lastUpdate": "2023-10-23 10:50:30.270341" + "startTime": "2023-10-23 14:43:17.591729", + "lastUpdate": "2023-10-23 14:43:17.593462" }, "wikidataids.csv": { "status": "SUCCESS", - "startTime": "2023-10-23 10:50:30.271283", - "lastUpdate": "2023-10-23 10:50:30.529614" + "startTime": "2023-10-23 14:43:17.594229", + "lastUpdate": "2023-10-23 14:43:17.794165" } } } \ No newline at end of file diff --git a/simple/sample/output/triples.csv b/simple/sample/output/triples.csv index 0a22caf6..45105d1e 100644 --- a/simple/sample/output/triples.csv +++ b/simple/sample/output/triples.csv @@ -9,15 +9,15 @@ custom/g/group_3,typeOf,StatVarGroup, custom/g/group_3,name,,Child Group 2 custom/g/group_3,specializationOf,custom/g/group_1, custom/statvar_1,typeOf,StatisticalVariable, -custom/statvar_1,name,,Variable 1 -custom/statvar_1,memberOf,custom/g/group_1, +custom/statvar_1,name,,Good var1 name +custom/statvar_1,description,,Good var1 description +custom/statvar_1,memberOf,custom/g/group_2, custom/statvar_2,typeOf,StatisticalVariable, -custom/statvar_2,name,,Variable 2 -custom/statvar_2,memberOf,dc/g/Root, +custom/statvar_2,name,,Good var2 name +custom/statvar_2,memberOf,custom/g/group_3, custom/statvar_3,typeOf,StatisticalVariable, -custom/statvar_3,name,,Good var1 name -custom/statvar_3,description,,Good var1 description -custom/statvar_3,memberOf,custom/g/group_2, +custom/statvar_3,name,,Variable 1 +custom/statvar_3,memberOf,custom/g/group_1, custom/statvar_4,typeOf,StatisticalVariable, -custom/statvar_4,name,,Good var2 name -custom/statvar_4,memberOf,custom/g/group_3, +custom/statvar_4,name,,Variable 2 +custom/statvar_4,memberOf,dc/g/Root, diff --git a/simple/stats/config.py b/simple/stats/config.py index 19b179ba..8389ed4e 100644 --- a/simple/stats/config.py +++ b/simple/stats/config.py @@ -38,71 +38,22 @@ class Config: def __init__(self, data: dict) -> None: self.data = data - def variables_and_groups( - self, sv_names: list[str]) -> (list[StatVar], list[StatVarGroup]): - groups = self._groups(sv_names) - variables = self._variables(sv_names, groups=groups) - return list(variables.values()), list(groups.values()) - - def _groups(self, sv_names: list[str]): - # Group Path to SVGroup - groups: dict[str, StatVarGroup] = {} - - for sv_name in sv_names: - group_path = self._variable_group(sv_name) - if group_path: - tokens = group_path.split("/") - for index in range(len(tokens)): - path = "/".join(tokens[:index + 1]) - if path not in groups: - id = f"{_CUSTOM_GROUP_ID_PREFIX}{len(groups) + 1}" - name = tokens[index] - parent_path = "" if "/" not in path else path[:path.rindex("/")] - parent_id = groups[ - parent_path].id if parent_path in groups else _ROOT_GROUP_ID - groups[path] = StatVarGroup(id, name, parent_id) - - return groups - - def _variables(self, - sv_names: list[str], - groups: dict[str, StatVarGroup] = None): - if not groups: - groups = self._groups(sv_names) - - # Variable column name to SV - svs: dict[str, StatVar] = {} - - for sv_name in sv_names: - if sv_name in svs: - continue - - var = self._variable(sv_name) - id = f"{_CUSTOM_SV_ID_PREFIX}{len(svs) + 1}" - name = var.get(_NAME_FIELD, sv_name) - group_path = self._variable_group(sv_name) - group_id = groups[ - group_path].id if group_path and group_path in groups else _ROOT_GROUP_ID - svs[sv_name] = StatVar(id, - name, - description=var.get(_DESCRIPTION_FIELD, ""), - nl_sentences=var.get(_NL_SENTENCES_FIELD, []), - group_id=group_id) - - return svs + def variable(self, variable_name: str) -> StatVar: + var_cfg = self.data.get(_VARIABLES_FIELD, {}).get(variable_name, {}) + return StatVar( + "", + var_cfg.get(_NAME_FIELD, variable_name), + description=var_cfg.get(_DESCRIPTION_FIELD, ""), + nl_sentences=var_cfg.get(_NL_SENTENCES_FIELD, []), + group_path=var_cfg.get(_GROUP_FIELD, ""), + ) def entity_type(self, input_file_name: str) -> str: - return self.data.get(_INPUT_FILES_FIELD, - {}).get(input_file_name, - {}).get(_ENTITY_TYPE_FIELD, "") + return (self.data.get(_INPUT_FILES_FIELD, + {}).get(input_file_name, + {}).get(_ENTITY_TYPE_FIELD, "")) def ignore_columns(self, input_file_name: str) -> str: - return self.data.get(_INPUT_FILES_FIELD, - {}).get(input_file_name, - {}).get(_IGNORE_COLUMNS_FIELD, []) - - def _variable(self, variable_name: str) -> dict: - return self.data.get(_VARIABLES_FIELD, {}).get(variable_name, {}) - - def _variable_group(self, variable_name: str) -> str: - return self._variable(variable_name).get(_GROUP_FIELD, "") + return (self.data.get(_INPUT_FILES_FIELD, + {}).get(input_file_name, + {}).get(_IGNORE_COLUMNS_FIELD, [])) diff --git a/simple/stats/data.py b/simple/stats/data.py index 01ba5cdd..66863083 100644 --- a/simple/stats/data.py +++ b/simple/stats/data.py @@ -56,6 +56,7 @@ class StatVar: description: str = "" nl_sentences: list[str] = field(default_factory=list) group_id: str = "" + group_path: str = "" def triples(self) -> list[Triple]: triples: list[Triple] = [] diff --git a/simple/stats/importer.py b/simple/stats/importer.py index 7c9db31a..9c526072 100644 --- a/simple/stats/importer.py +++ b/simple/stats/importer.py @@ -16,6 +16,7 @@ import pandas as pd from stats import constants +from stats.nodes import Nodes from stats.reporter import FileImportReporter from util.filehandler import FileHandler @@ -33,6 +34,7 @@ def __init__( observations_fh: FileHandler, debug_resolve_fh: FileHandler, reporter: FileImportReporter, + nodes: Nodes, entity_type: str, ignore_columns: list[str] = list(), ) -> None: @@ -40,11 +42,11 @@ def __init__( self.observations_fh = observations_fh self.debug_resolve_fh = debug_resolve_fh self.reporter = reporter + self.nodes = nodes self.entity_type = entity_type self.ignore_columns = ignore_columns self.df = pd.DataFrame() self.debug_resolve_df = None - self.sv_names = [] def do_import(self) -> None: self.reporter.report_started() @@ -54,7 +56,6 @@ def do_import(self) -> None: self._sanitize_values() self._resolve_entities() self._rename_columns() - self._extract_sv_names() self.reporter.report_success() except Exception as e: self.reporter.report_failure(str(e)) @@ -82,12 +83,20 @@ def _sanitize_values(self): self.df = self.df.astype({self.df.columns[1]: str}) def _rename_columns(self) -> None: - df = self.df - df.columns.values[0] = constants.COLUMN_DCID - df.columns.values[1] = constants.COLUMN_DATE - - def _extract_sv_names(self): - self.sv_names = list(self.df.columns[2:]) + renamed = {} + # Rename dcid and date columns + renamed[self.df.columns[0]] = constants.COLUMN_DCID + renamed[self.df.columns[1]] = constants.COLUMN_DATE + + # Rename SV columns to their IDs + sv_column_names = self.df.columns[2:] + sv_ids = [ + self.nodes.variable(sv_column_name).id + for sv_column_name in sv_column_names + ] + renamed.update({col: id for col, id in zip(sv_column_names, sv_ids)}) + + self.df = self.df.rename(columns=renamed) def _resolve_entities(self) -> None: df = self.df diff --git a/simple/stats/nodes.py b/simple/stats/nodes.py new file mode 100644 index 00000000..d4c4d2d4 --- /dev/null +++ b/simple/stats/nodes.py @@ -0,0 +1,85 @@ +# Copyright 2023 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from dataclasses import dataclass +import logging + +import pandas as pd +from stats.config import Config +from stats.data import StatVar +from stats.data import StatVarGroup +from stats.data import Triple +from util.filehandler import FileHandler + +_CUSTOM_SV_ID_PREFIX = "custom/statvar_" +_CUSTOM_GROUP_ID_PREFIX = "custom/g/group_" +_ROOT_GROUP_ID = "dc/g/Root" + + +class Nodes: + + def __init__(self, config: Config) -> None: + self.config = config + # Dictionary of SVs from column name to SV + self.variables: dict[str, StatVar] = {} + # Dictionary of SVGs from SVG path to SVG + self.groups: dict[str, StatVarGroup] = {} + + def variable(self, sv_column_name: str) -> StatVar: + if sv_column_name in self.variables: + return self.variables[sv_column_name] + + var_cfg = self.config.variable(sv_column_name) + group = self.group(var_cfg.group_path) + group_id = group.id if group else _ROOT_GROUP_ID + self.variables[sv_column_name] = StatVar( + f"{_CUSTOM_SV_ID_PREFIX}{len(self.variables) + 1}", + var_cfg.name, + description=var_cfg.description, + nl_sentences=var_cfg.nl_sentences, + group_id=group_id, + ) + return self.variables[sv_column_name] + + def group(self, group_path: str) -> StatVarGroup | None: + if not group_path: + return None + if group_path in self.groups: + return self.groups[group_path] + + tokens = group_path.split("/") + for index in range(len(tokens)): + path = "/".join(tokens[:index + 1]) + if path not in self.groups: + parent_path = "" if "/" not in path else path[:path.rindex("/")] + parent_id = (self.groups[parent_path].id + if parent_path in self.groups else _ROOT_GROUP_ID) + self.groups[path] = StatVarGroup( + f"{_CUSTOM_GROUP_ID_PREFIX}{len(self.groups) + 1}", tokens[index], + parent_id) + + return self.groups[group_path] + + def triples(self, triples_fh: FileHandler | None = None) -> list[Triple]: + triples: list[Triple] = [] + for group in self.groups.values(): + triples.extend(group.triples()) + for variable in self.variables.values(): + triples.extend(variable.triples()) + + if triples_fh: + logging.info("Writing %s triples to: %s", len(triples), str(triples_fh)) + triples_fh.write_string(pd.DataFrame(triples).to_csv(index=False)) + + return triples diff --git a/simple/stats/runner.py b/simple/stats/runner.py index 81a1305e..275103e4 100644 --- a/simple/stats/runner.py +++ b/simple/stats/runner.py @@ -18,9 +18,9 @@ from stats import constants from stats.config import Config from stats.importer import SimpleStatsImporter +from stats.nodes import Nodes from stats.reporter import FileImportReporter from stats.reporter import ImportReporter -import stats.triples as triples from util.filehandler import create_file_handler from util.filehandler import FileHandler @@ -44,7 +44,6 @@ def __init__( constants.REPORT_JSON_FILE_NAME)) self.entity_type = entity_type self.ignore_columns = ignore_columns - self.sv_names = [] self.config = Config(data={}) if self.input_fh.isdir: @@ -54,6 +53,8 @@ def __init__( "Config file must be provided for importing directories.") self.config = Config(data=json.loads(config_fh.read_string())) + self.nodes = Nodes(self.config) + self.output_dir_fh.make_dirs() self.process_dir_fh.make_dirs() @@ -78,19 +79,15 @@ def run(self): entity_type=self.config.entity_type(input_file), ignore_columns=self.config.ignore_columns(input_file)) - self._generate_triples() - - self.reporter.report_done() + # Generate triples. + self.nodes.triples( + self.output_dir_fh.make_file(constants.TRIPLES_FILE_NAME)) + # Report done. + self.reporter.report_done() except Exception as e: logging.exception("Error running import") self.reporter.report_failure(error=str(e)) - def _generate_triples(self): - sv_names = sorted(list(set(self.sv_names))) - triples_fh = self.output_dir_fh.make_file(constants.TRIPLES_FILE_NAME) - variables, groups = self.config.variables_and_groups(sv_names) - triples.generate_and_save(variables, groups, triples_fh) - def _run_single_import(self, input_file_fh: FileHandler, reporter: FileImportReporter, @@ -106,7 +103,7 @@ def _run_single_import(self, observations_fh=observations_fh, debug_resolve_fh=debug_resolve_fh, reporter=reporter, + nodes=self.nodes, entity_type=entity_type, ignore_columns=ignore_columns) importer.do_import() - self.sv_names.extend(importer.sv_names) diff --git a/simple/stats/triples.py b/simple/stats/triples.py deleted file mode 100644 index 6c9d6da8..00000000 --- a/simple/stats/triples.py +++ /dev/null @@ -1,39 +0,0 @@ -# Copyright 2023 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from dataclasses import dataclass - -import pandas as pd -from stats.config import Config -from stats.data import StatVar -from stats.data import StatVarGroup -from stats.data import Triple -from util.filehandler import FileHandler - - -def generate_and_save(variables: list[StatVar], groups: list[StatVarGroup], - triples_fh: FileHandler): - triples = generate(variables, groups) - df = pd.DataFrame(triples) - triples_fh.write_string(df.to_csv(index=False)) - - -def generate(variables: list[StatVar], - groups: list[StatVarGroup]) -> list[Triple]: - triples: list[Triple] = [] - for group in groups: - triples.extend(group.triples()) - for variable in variables: - triples.extend(variable.triples()) - return triples diff --git a/simple/tests/stats/config_test.py b/simple/tests/stats/config_test.py index 762fd57a..ab9f633d 100644 --- a/simple/tests/stats/config_test.py +++ b/simple/tests/stats/config_test.py @@ -16,7 +16,6 @@ from stats.config import Config from stats.data import StatVar -from stats.data import StatVarGroup CONFIG_DATA = { "inputFiles": { @@ -44,48 +43,42 @@ }, } -TEST_SV_COLUMN_NAMES = [ - "Variable 1", "Variable 2", "var3", "Variable with no config" -] - -EXPECTED_GROUPS = [ - StatVarGroup("custom/g/group_1", "Parent Group", "dc/g/Root"), - StatVarGroup("custom/g/group_2", "Child Group 1", "custom/g/group_1"), - StatVarGroup("custom/g/group_3", "Child Group 2", "custom/g/group_1"), -] - -EXPECTED_VARIABLES = [ - StatVar( - "custom/statvar_1", - "Variable 1", - group_id="custom/g/group_2", - ), - StatVar( - "custom/statvar_2", - "Variable 2", - group_id="custom/g/group_2", - ), - StatVar( - "custom/statvar_3", - "Var 3 Name", - description="Var 3 Description", - nl_sentences=["Sentence 1", "Sentence 2"], - group_id="custom/g/group_3", - ), - StatVar( - "custom/statvar_4", - "Variable with no config", - group_id="dc/g/Root", - ), -] - class TestConfig(unittest.TestCase): - def test_variables_and_groups(self): - self.maxDiff = None + def test_variable(self): + config = Config(CONFIG_DATA) + self.assertEqual( + config.variable("Variable 1"), + StatVar("", "Variable 1", group_path="Parent Group/Child Group 1"), + ) + self.assertEqual( + config.variable("Variable 2"), + StatVar("", "Variable 2", group_path="Parent Group/Child Group 1"), + ) + self.assertEqual( + config.variable("var3"), + StatVar( + "", + "Var 3 Name", + description="Var 3 Description", + nl_sentences=["Sentence 1", "Sentence 2"], + group_path="Parent Group/Child Group 2", + ), + ) + self.assertEqual( + config.variable("Variable with no config"), + StatVar("", "Variable with no config"), + ) + + def test_entity_type(self): + config = Config(CONFIG_DATA) + self.assertEqual(config.entity_type("a.csv"), "Country") + self.assertEqual(config.entity_type("b.csv"), "") + self.assertEqual(config.entity_type("not-in-config.csv"), "") + def test_ignore_columns(self): config = Config(CONFIG_DATA) - variables, groups = config.variables_and_groups(TEST_SV_COLUMN_NAMES) - self.assertListEqual(variables, EXPECTED_VARIABLES) - self.assertListEqual(groups, EXPECTED_GROUPS) + self.assertEqual(config.ignore_columns("a.csv"), []) + self.assertEqual(config.ignore_columns("b.csv"), ["ignore1", "ignore2"]) + self.assertEqual(config.ignore_columns("not-in-config.csv"), []) diff --git a/simple/tests/stats/nodes_test.py b/simple/tests/stats/nodes_test.py new file mode 100644 index 00000000..eb9ce5a5 --- /dev/null +++ b/simple/tests/stats/nodes_test.py @@ -0,0 +1,242 @@ +# Copyright 2023 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +from stats.config import Config +from stats.data import StatVar +from stats.data import StatVarGroup +from stats.data import Triple +from stats.nodes import Nodes + +CONFIG_DATA = { + "inputFiles": { + "a.csv": { + "entityType": "Country" + }, + "b.csv": { + "entityType": "", + "ignoreColumns": ["ignore1", "ignore2"] + }, + }, + "variables": { + "Variable 1": { + "group": "Parent Group/Child Group 1" + }, + "Variable 2": { + "group": "Parent Group/Child Group 1" + }, + "var3": { + "name": "Var 3 Name", + "description": "Var 3 Description", + "nlSentences": ["Sentence 1", "Sentence 2"], + "group": "Parent Group/Child Group 2", + }, + }, +} + +CONFIG = Config(CONFIG_DATA) + +TEST_COLUMN_NAMES = [ + "Variable 1", + "Variable 2", + "var3", + "Variable with no config", +] + +EXPECTED_TRIPLES = [ + Triple( + "custom/g/group_1", + "typeOf", + object_id="StatVarGroup", + ), + Triple( + "custom/g/group_1", + "name", + object_value="Parent Group", + ), + Triple( + "custom/g/group_1", + "specializationOf", + object_id="dc/g/Root", + ), + Triple( + "custom/g/group_2", + "typeOf", + object_id="StatVarGroup", + ), + Triple( + "custom/g/group_2", + "name", + object_value="Child Group 1", + ), + Triple( + "custom/g/group_2", + "specializationOf", + object_id="custom/g/group_1", + ), + Triple( + "custom/g/group_3", + "typeOf", + object_id="StatVarGroup", + ), + Triple( + "custom/g/group_3", + "name", + object_value="Child Group 2", + ), + Triple( + "custom/g/group_3", + "specializationOf", + object_id="custom/g/group_1", + ), + Triple( + "custom/statvar_1", + "typeOf", + object_id="StatisticalVariable", + ), + Triple( + "custom/statvar_1", + "name", + object_value="Variable 1", + ), + Triple( + "custom/statvar_1", + "memberOf", + object_id="custom/g/group_2", + ), + Triple( + "custom/statvar_2", + "typeOf", + object_id="StatisticalVariable", + ), + Triple( + "custom/statvar_2", + "name", + object_value="Variable 2", + ), + Triple( + "custom/statvar_2", + "memberOf", + object_id="custom/g/group_2", + ), + Triple( + "custom/statvar_3", + "typeOf", + object_id="StatisticalVariable", + ), + Triple( + "custom/statvar_3", + "name", + object_value="Var 3 Name", + ), + Triple( + "custom/statvar_3", + "description", + object_value="Var 3 Description", + ), + Triple( + "custom/statvar_3", + "memberOf", + object_id="custom/g/group_3", + ), + Triple( + "custom/statvar_4", + "typeOf", + object_id="StatisticalVariable", + ), + Triple( + "custom/statvar_4", + "name", + object_value="Variable with no config", + ), + Triple( + "custom/statvar_4", + "memberOf", + object_id="dc/g/Root", + ), +] + + +class TestNodes(unittest.TestCase): + + def test_variable_with_no_config(self): + nodes = Nodes(CONFIG) + sv = nodes.variable("Variable with no config") + self.assertEqual( + sv, + StatVar("custom/statvar_1", + "Variable with no config", + group_id="dc/g/Root"), + ) + + def test_variable_with_config(self): + nodes = Nodes(CONFIG) + sv = nodes.variable("var3") + self.assertEqual( + sv, + StatVar( + "custom/statvar_1", + "Var 3 Name", + description="Var 3 Description", + nl_sentences=["Sentence 1", "Sentence 2"], + group_id="custom/g/group_2", + ), + ) + + def test_variable_with_group(self): + nodes = Nodes(CONFIG) + sv = nodes.variable("Variable 1") + self.assertEqual( + sv, + StatVar("custom/statvar_1", "Variable 1", group_id="custom/g/group_2"), + ) + self.assertListEqual( + list(nodes.groups.values()), + [ + StatVarGroup("custom/g/group_1", "Parent Group", "dc/g/Root"), + StatVarGroup("custom/g/group_2", "Child Group 1", + "custom/g/group_1"), + ], + ) + + def test_multiple_variables_in_same_group(self): + nodes = Nodes(CONFIG) + sv = nodes.variable("Variable 1") + self.assertEqual( + sv, + StatVar("custom/statvar_1", "Variable 1", group_id="custom/g/group_2"), + ) + sv = nodes.variable("Variable 2") + self.assertEqual( + sv, + StatVar("custom/statvar_2", "Variable 2", group_id="custom/g/group_2"), + ) + self.assertListEqual( + list(nodes.groups.values()), + [ + StatVarGroup("custom/g/group_1", "Parent Group", "dc/g/Root"), + StatVarGroup("custom/g/group_2", "Child Group 1", + "custom/g/group_1"), + ], + ) + + def test_triples(self): + nodes = Nodes(CONFIG) + for sv_column_name in TEST_COLUMN_NAMES: + nodes.variable(sv_column_name) + + triples = nodes.triples() + + self.assertListEqual(triples, EXPECTED_TRIPLES)