From fab89e365f0e38cbc87c528f2c00746c04be2d6c Mon Sep 17 00:00:00 2001 From: Keyur Shah Date: Mon, 30 Sep 2024 12:22:08 -0700 Subject: [PATCH] Use separate columns for standard observation properties. (#341) --- simple/stats/data.py | 70 ++++++++++++++----- simple/stats/db.py | 37 +++++++--- simple/stats/observations_importer.py | 4 +- simple/stats/schema_constants.py | 10 +++ simple/stats/variable_per_row_importer.py | 4 +- simple/tests/stats/data_test.py | 11 +++ simple/tests/stats/db_test.py | 11 ++- .../countryalpha3codes.observations.db.csv | 8 +-- .../expected/idcolumns.observations.db.csv | 12 ++-- .../countryalpha3codes/observations.db.csv | 14 ++-- .../expected/obs_props/observations.db.csv | 14 ++-- .../input/obs_props/config.json | 3 +- .../config_driven/observations.db.csv | 62 ++++++++-------- .../config_with_wildcards/observations.db.csv | 62 ++++++++-------- .../observations.db.csv | 10 +-- .../input_dir_driven/observations.db.csv | 62 ++++++++-------- .../remote_entity_types/observations.db.csv | 18 ++--- .../sv_nl_sentences/observations.db.csv | 10 +-- .../topic_nl_sentences/observations.db.csv | 10 +-- .../custom_column_names/observations.db.csv | 14 ++-- .../default_column_names/observations.db.csv | 14 ++-- .../namespace_prefixes/observations.db.csv | 14 ++-- .../expected/obs_props/observations.db.csv | 14 ++-- 23 files changed, 287 insertions(+), 201 deletions(-) diff --git a/simple/stats/data.py b/simple/stats/data.py index a71c6900..20060c2d 100644 --- a/simple/stats/data.py +++ b/simple/stats/data.py @@ -11,12 +11,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +"""This file includes data model classes used across the simple importer.""" from collections import defaultdict from dataclasses import dataclass from dataclasses import field from dataclasses import fields +from dataclasses import is_dataclass from enum import StrEnum +import json from typing import Self from urllib.parse import urlparse @@ -227,6 +230,30 @@ def triples(self) -> list[Triple]: return triples +@dataclass +class ObservationProperties: + unit: str = "" + scaling_factor: str = "" + measurement_method: str = "" + observation_period: str = "" + # All custom properties other than the standard ones above go in this field. + properties: dict[str, str] = field(default_factory=dict) + + @classmethod + def new(cls: Self, all_properties: dict[str, str] = {}) -> Self: + unit = all_properties.get(sc.PREDICATE_UNIT, "") + scaling_factor = all_properties.get(sc.PREDICATE_SCALING_FACTOR, "") + measurement_method = all_properties.get(sc.PREDICATE_MEASUREMENT_METHOD, "") + observation_period = all_properties.get(sc.PREDICATE_OBSERVATION_PERIOD, "") + custom_properties = { + p: v + for p, v in all_properties.items() + if p not in sc.STANDARD_OBSERVATION_PROPERTIES + } + return cls(unit, scaling_factor, measurement_method, observation_period, + custom_properties) + + @dataclass class Observation: entity: str @@ -234,28 +261,35 @@ class Observation: date: str value: str provenance: str - properties: dict[str, str] = field(default_factory=dict) - - def __post_init__(self): - if not self.properties: - self.properties = {} - # Properties in the DB are stored as gzipped and base64 encoded strings. - # Convert it to json / dict so it is available as a dict in code. - elif isinstance(self.properties, str): - self.properties = base64_decode_and_gunzip_json(self.properties) - - def properties_string(self) -> str: - if not self.properties: - return "" - return gzip_and_base64_encode_json(self.properties) + properties: ObservationProperties = field( + default_factory=ObservationProperties.new) def db_tuple(self): return (_strip_namespace(self.entity), _strip_namespace(self.variable), self.date, self.value, _strip_namespace(self.provenance), - self.properties_string()) - - -OBSERVATION_FIELD_NAMES = list(map(lambda x: x.name, fields(Observation))) + _strip_namespace(self.properties.unit), + self.properties.scaling_factor, + _strip_namespace(self.properties.measurement_method), + _strip_namespace(self.properties.observation_period), + json.dumps(self.properties.properties) + if self.properties.properties else "") + + +def _get_flattened_dataclass_field_names(cls) -> list[str]: + """Flattens the field names from the specified class and any of it's nested member classes. + In practice, this is used to flatten the fields from the Observation and ObservationProperties classes + which is used to populate test CSVs. + """ + field_names: list[str] = [] + for field in fields(cls): + if is_dataclass(field.type): + field_names.extend(_get_flattened_dataclass_field_names(field.type)) + else: + field_names.append(field.name) + return field_names + + +OBSERVATION_FIELD_NAMES = _get_flattened_dataclass_field_names(Observation) @dataclass diff --git a/simple/stats/db.py b/simple/stats/db.py index df588216..d231aafb 100644 --- a/simple/stats/db.py +++ b/simple/stats/db.py @@ -59,6 +59,11 @@ MAIN_DC_OUTPUT_DIR = "mainDcOutputDir" +_OBSERVATION_PROPERTY_COLUMNS = [ + "unit", "scaling_factor", "measurement_method", "observation_period", + "properties" +] + _CREATE_TRIPLES_TABLE = """ create table if not exists triples ( subject_id varchar(255), @@ -78,12 +83,16 @@ date varchar(255), value varchar(255), provenance varchar(255), + unit varchar(255), + scaling_factor varchar(255), + measurement_method varchar(255), + observation_period varchar(255), properties TEXT ); """ _DELETE_OBSERVATIONS_STATEMENT = "delete from observations" -_INSERT_OBSERVATIONS_STATEMENT = "insert into observations values(?, ?, ?, ?, ?, ?)" +_INSERT_OBSERVATIONS_STATEMENT = "insert into observations values(?, ?, ?, ?, ?, ?, ?, ?, ?, ?)" _CREATE_KEY_VALUE_STORE_TABLE = """ create table if not exists key_value_store ( @@ -122,11 +131,17 @@ # Schema update statements. -# The properties column was not part of the observations table originally. -# This statement adds the column. +# Various property columns not part of the observations table originally. +# These statements add those columns. # Neither sqlite nor mysql support an 'if not exists' statement for altering tables universally, -# so the code needs to check for existence separately before applying this statement. -_ALTER_OBSERVATIONS_TABLE_STATEMENT = "alter table observations add column properties TEXT;" +# so the code needs to check for existence separately before applying these statements. +_ALTER_OBSERVATIONS_TABLE_STATEMENTS = [ + "alter table observations add column unit varchar(255);", + "alter table observations add column scaling_factor varchar(255);", + "alter table observations add column measurement_method varchar(255);", + "alter table observations add column observation_period varchar(255);", + "alter table observations add column properties text;" +] OBSERVATIONS_TMCF = """Node: E:Table->E0 typeOf: dcs:StatVarObservation @@ -376,14 +391,15 @@ def _schema_updates(self) -> None: Add any sqlite schema updates here. Ensure that all schema updates always check if the update is necessary before applying it. """ - # Add properties column to observations table if it does not exist. + # Add property columns to observations table if it does not exist. rows = self.fetch_all(_SQLITE_OBSERVATIONS_TABLE_INFO_STATEMENT) existing_columns = set([columns[1] for columns in rows]) if "properties" not in existing_columns: logging.info( - "properties column does not exist in the observations table. Altering table to add it." + f"properties column does not exist in the observations table. Altering table to the following property columns: {', '.join(_OBSERVATION_PROPERTY_COLUMNS)}" ) - self.execute(_ALTER_OBSERVATIONS_TABLE_STATEMENT) + for statement in _ALTER_OBSERVATIONS_TABLE_STATEMENTS: + self.cursor.execute(statement) def _drop_indexes(self) -> None: for index in _DB_INDEXES: @@ -487,9 +503,10 @@ def _schema_updates(self) -> None: properties_column_exists = rows is not None and len(rows) > 0 if not properties_column_exists: logging.info( - "properties column does not exist in the observations table. Altering table to add it." + f"properties column does not exist in the observations table. Altering table to the following property columns: {', '.join(_OBSERVATION_PROPERTY_COLUMNS)}" ) - self.execute(_ALTER_OBSERVATIONS_TABLE_STATEMENT) + for statement in _ALTER_OBSERVATIONS_TABLE_STATEMENTS: + self.cursor.execute(statement) def _drop_indexes(self) -> None: for index in _DB_INDEXES: diff --git a/simple/stats/observations_importer.py b/simple/stats/observations_importer.py index 3d211ff4..e937f7c3 100644 --- a/simple/stats/observations_importer.py +++ b/simple/stats/observations_importer.py @@ -18,6 +18,7 @@ from stats import constants from stats import schema_constants as sc from stats.data import Observation +from stats.data import ObservationProperties from stats.db import Db from stats.importer import Importer from stats.nodes import Nodes @@ -115,7 +116,8 @@ def _write_observations(self) -> None: ) provenance = self.nodes.provenance(self.input_file_name).id - obs_props = self.config.observation_properties(self.input_file_name) + obs_props = ObservationProperties.new( + self.config.observation_properties(self.input_file_name)) observations: list[Observation] = [] for _, row in observations_df.iterrows(): diff --git a/simple/stats/schema_constants.py b/simple/stats/schema_constants.py index 8a86458c..beef9e90 100644 --- a/simple/stats/schema_constants.py +++ b/simple/stats/schema_constants.py @@ -34,6 +34,16 @@ PREDICATE_SEARCH_DESCRIPTION = "searchDescription" PREDICATE_MEASURED_PROPERTY = "measuredProperty" PREDICATE_DESCRIPTION = "description" +PREDICATE_UNIT = "unit" +PREDICATE_SCALING_FACTOR = "scalingFactor" +PREDICATE_MEASUREMENT_METHOD = "measurementMethod" +PREDICATE_OBSERVATION_PERIOD = "observationPeriod" + +# The set of standard observation properties with first class support in our APIs and FE. +STANDARD_OBSERVATION_PROPERTIES: set[str] = { + PREDICATE_UNIT, PREDICATE_SCALING_FACTOR, PREDICATE_MEASUREMENT_METHOD, + PREDICATE_OBSERVATION_PERIOD +} TYPE_STATISTICAL_VARIABLE = "StatisticalVariable" TYPE_STATISTICAL_VARIABLE_GROUP = "StatVarGroup" diff --git a/simple/stats/variable_per_row_importer.py b/simple/stats/variable_per_row_importer.py index 567d1c48..6dcd8280 100644 --- a/simple/stats/variable_per_row_importer.py +++ b/simple/stats/variable_per_row_importer.py @@ -18,6 +18,7 @@ from stats import constants from stats import schema_constants as sc from stats.data import Observation +from stats.data import ObservationProperties from stats.db import Db from stats.importer import Importer from stats.nodes import Nodes @@ -90,7 +91,8 @@ def _map_columns(self): def _write_observations(self) -> None: provenance = self.nodes.provenance(self.input_file_name).id - obs_props = self.config.observation_properties(self.input_file_name) + obs_props = ObservationProperties.new( + self.config.observation_properties(self.input_file_name)) observations: list[Observation] = [] for row in self.reader: diff --git a/simple/tests/stats/data_test.py b/simple/tests/stats/data_test.py index 221deec2..83b529fc 100644 --- a/simple/tests/stats/data_test.py +++ b/simple/tests/stats/data_test.py @@ -14,8 +14,10 @@ import unittest +from stats.data import _get_flattened_dataclass_field_names from stats.data import Event from stats.data import McfNode +from stats.data import Observation from stats.data import Provenance from stats.data import StatVar from stats.data import StatVarGroup @@ -193,3 +195,12 @@ def test_mcf_node(self): memberOf: svg1""".strip() self.assertEqual(node.to_mcf(), expected) + + def test_get_flattened_dataclass_field_names(self): + expected = [ + "entity", "variable", "date", "value", "provenance", "unit", + "scaling_factor", "measurement_method", "observation_period", + "properties" + ] + self.assertListEqual(_get_flattened_dataclass_field_names(Observation), + expected) diff --git a/simple/tests/stats/db_test.py b/simple/tests/stats/db_test.py index e2455bf8..b8546f8c 100644 --- a/simple/tests/stats/db_test.py +++ b/simple/tests/stats/db_test.py @@ -21,6 +21,7 @@ from freezegun import freeze_time from stats.data import Observation +from stats.data import ObservationProperties from stats.data import Triple from stats.db import create_db from stats.db import create_main_dc_config @@ -46,7 +47,15 @@ _OBSERVATIONS = [ Observation("e1", "v1", "2023", "123", "p1"), Observation("e2", "v1", "2023", "456", "p1"), - Observation("e3", "v1", "2023", "789", "p1", {"prop1": "val1"}) + Observation("e3", + "v1", + "2023", + "789", + "p1", + properties=ObservationProperties.new({ + "unit": "USD", + "prop1": "val1" + })) ] _KEY_VALUE = ("k1", "v1") diff --git a/simple/tests/stats/test_data/events_importer/expected/countryalpha3codes.observations.db.csv b/simple/tests/stats/test_data/events_importer/expected/countryalpha3codes.observations.db.csv index f786ef74..5d30145a 100644 --- a/simple/tests/stats/test_data/events_importer/expected/countryalpha3codes.observations.db.csv +++ b/simple/tests/stats/test_data/events_importer/expected/countryalpha3codes.observations.db.csv @@ -1,4 +1,4 @@ -entity,variable,date,value,provenance,properties -country/USA,Count_CrimeEvent,2023,3,c/p/1, -country/BRA,Count_CrimeEvent,2023,2,c/p/1, -country/CHN,Count_CrimeEvent,2023,2,c/p/1, +entity,variable,date,value,provenance,unit,scaling_factor,measurement_method,observation_period,properties +country/USA,Count_CrimeEvent,2023,3,c/p/1,,,,, +country/BRA,Count_CrimeEvent,2023,2,c/p/1,,,,, +country/CHN,Count_CrimeEvent,2023,2,c/p/1,,,,, diff --git a/simple/tests/stats/test_data/events_importer/expected/idcolumns.observations.db.csv b/simple/tests/stats/test_data/events_importer/expected/idcolumns.observations.db.csv index c3d59efe..bc140b6c 100644 --- a/simple/tests/stats/test_data/events_importer/expected/idcolumns.observations.db.csv +++ b/simple/tests/stats/test_data/events_importer/expected/idcolumns.observations.db.csv @@ -1,6 +1,6 @@ -entity,variable,date,value,provenance,properties -country/USA,Crime_Event2_Count,2023-11-08,2,c/p/1, -country/BRA,Crime_Event2_Count,2023-11-08,2,c/p/1, -country/CHN,Crime_Event2_Count,2023-11-08,1,c/p/1, -country/CHN,Crime_Event2_Count,2023-09-17,1,c/p/1, -country/USA,Crime_Event2_Count,2023-08-02,1,c/p/1, +entity,variable,date,value,provenance,unit,scaling_factor,measurement_method,observation_period,properties +country/USA,Crime_Event2_Count,2023-11-08,2,c/p/1,,,,, +country/BRA,Crime_Event2_Count,2023-11-08,2,c/p/1,,,,, +country/CHN,Crime_Event2_Count,2023-11-08,1,c/p/1,,,,, +country/CHN,Crime_Event2_Count,2023-09-17,1,c/p/1,,,,, +country/USA,Crime_Event2_Count,2023-08-02,1,c/p/1,,,,, diff --git a/simple/tests/stats/test_data/observations_importer/expected/countryalpha3codes/observations.db.csv b/simple/tests/stats/test_data/observations_importer/expected/countryalpha3codes/observations.db.csv index 39c81fcf..0ac85634 100644 --- a/simple/tests/stats/test_data/observations_importer/expected/countryalpha3codes/observations.db.csv +++ b/simple/tests/stats/test_data/observations_importer/expected/countryalpha3codes/observations.db.csv @@ -1,7 +1,7 @@ -entity,variable,date,value,provenance,properties -country/BRA,var1,2023,0.19,c/p/default, -country/JPN,var1,2023,0.21,c/p/default, -country/CHN,var1,2022,-123.456,c/p/default, -country/BRA,var2,2023,6,c/p/default, -country/JPN,var2,2023,56,c/p/default, -country/USA,var2,2023,66,c/p/default, +entity,variable,date,value,provenance,unit,scaling_factor,measurement_method,observation_period,properties +country/BRA,var1,2023,0.19,c/p/default,,,,, +country/JPN,var1,2023,0.21,c/p/default,,,,, +country/CHN,var1,2022,-123.456,c/p/default,,,,, +country/BRA,var2,2023,6,c/p/default,,,,, +country/JPN,var2,2023,56,c/p/default,,,,, +country/USA,var2,2023,66,c/p/default,,,,, diff --git a/simple/tests/stats/test_data/observations_importer/expected/obs_props/observations.db.csv b/simple/tests/stats/test_data/observations_importer/expected/obs_props/observations.db.csv index 17eaaf29..24497f73 100644 --- a/simple/tests/stats/test_data/observations_importer/expected/obs_props/observations.db.csv +++ b/simple/tests/stats/test_data/observations_importer/expected/obs_props/observations.db.csv @@ -1,7 +1,7 @@ -entity,variable,date,value,provenance,properties -country/BRA,var1,2023,0.19,c/p/default,H4sIAAAAAAAC/6tWKs3LLFGyUlAKDXZR0lFQyk8qTi0qSyzJzM8LSC3KzE8ByQUYRirVAgAA6g2KKwAAAA== -country/JPN,var1,2023,0.21,c/p/default,H4sIAAAAAAAC/6tWKs3LLFGyUlAKDXZR0lFQyk8qTi0qSyzJzM8LSC3KzE8ByQUYRirVAgAA6g2KKwAAAA== -country/CHN,var1,2022,-123.456,c/p/default,H4sIAAAAAAAC/6tWKs3LLFGyUlAKDXZR0lFQyk8qTi0qSyzJzM8LSC3KzE8ByQUYRirVAgAA6g2KKwAAAA== -country/BRA,var2,2023,6,c/p/default,H4sIAAAAAAAC/6tWKs3LLFGyUlAKDXZR0lFQyk8qTi0qSyzJzM8LSC3KzE8ByQUYRirVAgAA6g2KKwAAAA== -country/JPN,var2,2023,56,c/p/default,H4sIAAAAAAAC/6tWKs3LLFGyUlAKDXZR0lFQyk8qTi0qSyzJzM8LSC3KzE8ByQUYRirVAgAA6g2KKwAAAA== -country/USA,var2,2023,66,c/p/default,H4sIAAAAAAAC/6tWKs3LLFGyUlAKDXZR0lFQyk8qTi0qSyzJzM8LSC3KzE8ByQUYRirVAgAA6g2KKwAAAA== +entity,variable,date,value,provenance,unit,scaling_factor,measurement_method,observation_period,properties +country/BRA,var1,2023,0.19,c/p/default,USD,,,P1Y,"{""customProperty1"": ""customValue1""}" +country/JPN,var1,2023,0.21,c/p/default,USD,,,P1Y,"{""customProperty1"": ""customValue1""}" +country/CHN,var1,2022,-123.456,c/p/default,USD,,,P1Y,"{""customProperty1"": ""customValue1""}" +country/BRA,var2,2023,6,c/p/default,USD,,,P1Y,"{""customProperty1"": ""customValue1""}" +country/JPN,var2,2023,56,c/p/default,USD,,,P1Y,"{""customProperty1"": ""customValue1""}" +country/USA,var2,2023,66,c/p/default,USD,,,P1Y,"{""customProperty1"": ""customValue1""}" diff --git a/simple/tests/stats/test_data/observations_importer/input/obs_props/config.json b/simple/tests/stats/test_data/observations_importer/input/obs_props/config.json index 1073708a..61e60711 100644 --- a/simple/tests/stats/test_data/observations_importer/input/obs_props/config.json +++ b/simple/tests/stats/test_data/observations_importer/input/obs_props/config.json @@ -4,7 +4,8 @@ "entityType": "Dummy", "observationProperties": { "unit": "USD", - "observationPeriod": "P1Y" + "observationPeriod": "P1Y", + "customProperty1": "customValue1" } } } diff --git a/simple/tests/stats/test_data/runner/expected/config_driven/observations.db.csv b/simple/tests/stats/test_data/runner/expected/config_driven/observations.db.csv index 5f5f45a5..bea81291 100644 --- a/simple/tests/stats/test_data/runner/expected/config_driven/observations.db.csv +++ b/simple/tests/stats/test_data/runner/expected/config_driven/observations.db.csv @@ -1,31 +1,31 @@ -entity,variable,date,value,provenance,properties -country/AFG,var1,2023,0.19,c/p/1, -country/YEM,var1,2023,0.21,c/p/1, -country/AGO,var1,2023,0.29,c/p/1, -country/ZMB,var1,2023,0.31,c/p/1, -country/ZWE,var1,2023,0.37,c/p/1, -country/ALB,var1,2023,0.5,c/p/1, -wikidataId/Q22062741,var1,2023,0.5,c/p/1, -country/DZA,var1,2023,0.52,c/p/1, -country/AND,var1,2023,0.76,c/p/1, -country/AFG,var2,2023,6,c/p/1, -country/YEM,var2,2023,56,c/p/1, -country/AGO,var2,2023,6,c/p/1, -country/ZMB,var2,2023,34,c/p/1, -country/ZWE,var2,2023,76,c/p/1, -country/ALB,var2,2023,34,c/p/1, -wikidataId/Q22062741,var2,2023,97,c/p/1, -country/DZA,var2,2023,92,c/p/1, -country/AND,var2,2023,9,c/p/1, -country/ASM,var2,2023,34,c/p/1, -country/AIA,var2,2023,42,c/p/1, -country/WLF,var2,2023,75,c/p/1, -country/ESH,var2,2023,65,c/p/1, -country/IND,var1,2020,0.16,c/p/1, -country/IND,var2,2020,53,c/p/1, -country/CHN,var1,2020,0.23,c/p/1, -country/CHN,var2,2020,67,c/p/1, -country/USA,var1,2021,555,c/p/1, -country/IND,var1,2022,321,c/p/1, -country/USA,var2,2021,666,c/p/1, -country/IND,var2,2022,123,c/p/1, +entity,variable,date,value,provenance,unit,scaling_factor,measurement_method,observation_period,properties +country/AFG,var1,2023,0.19,c/p/1,,,,, +country/YEM,var1,2023,0.21,c/p/1,,,,, +country/AGO,var1,2023,0.29,c/p/1,,,,, +country/ZMB,var1,2023,0.31,c/p/1,,,,, +country/ZWE,var1,2023,0.37,c/p/1,,,,, +country/ALB,var1,2023,0.5,c/p/1,,,,, +wikidataId/Q22062741,var1,2023,0.5,c/p/1,,,,, +country/DZA,var1,2023,0.52,c/p/1,,,,, +country/AND,var1,2023,0.76,c/p/1,,,,, +country/AFG,var2,2023,6,c/p/1,,,,, +country/YEM,var2,2023,56,c/p/1,,,,, +country/AGO,var2,2023,6,c/p/1,,,,, +country/ZMB,var2,2023,34,c/p/1,,,,, +country/ZWE,var2,2023,76,c/p/1,,,,, +country/ALB,var2,2023,34,c/p/1,,,,, +wikidataId/Q22062741,var2,2023,97,c/p/1,,,,, +country/DZA,var2,2023,92,c/p/1,,,,, +country/AND,var2,2023,9,c/p/1,,,,, +country/ASM,var2,2023,34,c/p/1,,,,, +country/AIA,var2,2023,42,c/p/1,,,,, +country/WLF,var2,2023,75,c/p/1,,,,, +country/ESH,var2,2023,65,c/p/1,,,,, +country/IND,var1,2020,0.16,c/p/1,,,,, +country/IND,var2,2020,53,c/p/1,,,,, +country/CHN,var1,2020,0.23,c/p/1,,,,, +country/CHN,var2,2020,67,c/p/1,,,,, +country/USA,var1,2021,555,c/p/1,,,,, +country/IND,var1,2022,321,c/p/1,,,,, +country/USA,var2,2021,666,c/p/1,,,,, +country/IND,var2,2022,123,c/p/1,,,,, diff --git a/simple/tests/stats/test_data/runner/expected/config_with_wildcards/observations.db.csv b/simple/tests/stats/test_data/runner/expected/config_with_wildcards/observations.db.csv index 5f5f45a5..bea81291 100644 --- a/simple/tests/stats/test_data/runner/expected/config_with_wildcards/observations.db.csv +++ b/simple/tests/stats/test_data/runner/expected/config_with_wildcards/observations.db.csv @@ -1,31 +1,31 @@ -entity,variable,date,value,provenance,properties -country/AFG,var1,2023,0.19,c/p/1, -country/YEM,var1,2023,0.21,c/p/1, -country/AGO,var1,2023,0.29,c/p/1, -country/ZMB,var1,2023,0.31,c/p/1, -country/ZWE,var1,2023,0.37,c/p/1, -country/ALB,var1,2023,0.5,c/p/1, -wikidataId/Q22062741,var1,2023,0.5,c/p/1, -country/DZA,var1,2023,0.52,c/p/1, -country/AND,var1,2023,0.76,c/p/1, -country/AFG,var2,2023,6,c/p/1, -country/YEM,var2,2023,56,c/p/1, -country/AGO,var2,2023,6,c/p/1, -country/ZMB,var2,2023,34,c/p/1, -country/ZWE,var2,2023,76,c/p/1, -country/ALB,var2,2023,34,c/p/1, -wikidataId/Q22062741,var2,2023,97,c/p/1, -country/DZA,var2,2023,92,c/p/1, -country/AND,var2,2023,9,c/p/1, -country/ASM,var2,2023,34,c/p/1, -country/AIA,var2,2023,42,c/p/1, -country/WLF,var2,2023,75,c/p/1, -country/ESH,var2,2023,65,c/p/1, -country/IND,var1,2020,0.16,c/p/1, -country/IND,var2,2020,53,c/p/1, -country/CHN,var1,2020,0.23,c/p/1, -country/CHN,var2,2020,67,c/p/1, -country/USA,var1,2021,555,c/p/1, -country/IND,var1,2022,321,c/p/1, -country/USA,var2,2021,666,c/p/1, -country/IND,var2,2022,123,c/p/1, +entity,variable,date,value,provenance,unit,scaling_factor,measurement_method,observation_period,properties +country/AFG,var1,2023,0.19,c/p/1,,,,, +country/YEM,var1,2023,0.21,c/p/1,,,,, +country/AGO,var1,2023,0.29,c/p/1,,,,, +country/ZMB,var1,2023,0.31,c/p/1,,,,, +country/ZWE,var1,2023,0.37,c/p/1,,,,, +country/ALB,var1,2023,0.5,c/p/1,,,,, +wikidataId/Q22062741,var1,2023,0.5,c/p/1,,,,, +country/DZA,var1,2023,0.52,c/p/1,,,,, +country/AND,var1,2023,0.76,c/p/1,,,,, +country/AFG,var2,2023,6,c/p/1,,,,, +country/YEM,var2,2023,56,c/p/1,,,,, +country/AGO,var2,2023,6,c/p/1,,,,, +country/ZMB,var2,2023,34,c/p/1,,,,, +country/ZWE,var2,2023,76,c/p/1,,,,, +country/ALB,var2,2023,34,c/p/1,,,,, +wikidataId/Q22062741,var2,2023,97,c/p/1,,,,, +country/DZA,var2,2023,92,c/p/1,,,,, +country/AND,var2,2023,9,c/p/1,,,,, +country/ASM,var2,2023,34,c/p/1,,,,, +country/AIA,var2,2023,42,c/p/1,,,,, +country/WLF,var2,2023,75,c/p/1,,,,, +country/ESH,var2,2023,65,c/p/1,,,,, +country/IND,var1,2020,0.16,c/p/1,,,,, +country/IND,var2,2020,53,c/p/1,,,,, +country/CHN,var1,2020,0.23,c/p/1,,,,, +country/CHN,var2,2020,67,c/p/1,,,,, +country/USA,var1,2021,555,c/p/1,,,,, +country/IND,var1,2022,321,c/p/1,,,,, +country/USA,var2,2021,666,c/p/1,,,,, +country/IND,var2,2022,123,c/p/1,,,,, diff --git a/simple/tests/stats/test_data/runner/expected/generate_svg_hierarchy/observations.db.csv b/simple/tests/stats/test_data/runner/expected/generate_svg_hierarchy/observations.db.csv index 7787e7b9..15102111 100644 --- a/simple/tests/stats/test_data/runner/expected/generate_svg_hierarchy/observations.db.csv +++ b/simple/tests/stats/test_data/runner/expected/generate_svg_hierarchy/observations.db.csv @@ -1,5 +1,5 @@ -entity,variable,date,value,provenance,properties -country/IND,var1,2020,0.16,c/p/1, -country/IND,var2,2020,53,c/p/1, -country/CHN,var1,2020,0.23,c/p/1, -country/CHN,var2,2020,67,c/p/1, +entity,variable,date,value,provenance,unit,scaling_factor,measurement_method,observation_period,properties +country/IND,var1,2020,0.16,c/p/1,,,,, +country/IND,var2,2020,53,c/p/1,,,,, +country/CHN,var1,2020,0.23,c/p/1,,,,, +country/CHN,var2,2020,67,c/p/1,,,,, diff --git a/simple/tests/stats/test_data/runner/expected/input_dir_driven/observations.db.csv b/simple/tests/stats/test_data/runner/expected/input_dir_driven/observations.db.csv index 5f5f45a5..bea81291 100644 --- a/simple/tests/stats/test_data/runner/expected/input_dir_driven/observations.db.csv +++ b/simple/tests/stats/test_data/runner/expected/input_dir_driven/observations.db.csv @@ -1,31 +1,31 @@ -entity,variable,date,value,provenance,properties -country/AFG,var1,2023,0.19,c/p/1, -country/YEM,var1,2023,0.21,c/p/1, -country/AGO,var1,2023,0.29,c/p/1, -country/ZMB,var1,2023,0.31,c/p/1, -country/ZWE,var1,2023,0.37,c/p/1, -country/ALB,var1,2023,0.5,c/p/1, -wikidataId/Q22062741,var1,2023,0.5,c/p/1, -country/DZA,var1,2023,0.52,c/p/1, -country/AND,var1,2023,0.76,c/p/1, -country/AFG,var2,2023,6,c/p/1, -country/YEM,var2,2023,56,c/p/1, -country/AGO,var2,2023,6,c/p/1, -country/ZMB,var2,2023,34,c/p/1, -country/ZWE,var2,2023,76,c/p/1, -country/ALB,var2,2023,34,c/p/1, -wikidataId/Q22062741,var2,2023,97,c/p/1, -country/DZA,var2,2023,92,c/p/1, -country/AND,var2,2023,9,c/p/1, -country/ASM,var2,2023,34,c/p/1, -country/AIA,var2,2023,42,c/p/1, -country/WLF,var2,2023,75,c/p/1, -country/ESH,var2,2023,65,c/p/1, -country/IND,var1,2020,0.16,c/p/1, -country/IND,var2,2020,53,c/p/1, -country/CHN,var1,2020,0.23,c/p/1, -country/CHN,var2,2020,67,c/p/1, -country/USA,var1,2021,555,c/p/1, -country/IND,var1,2022,321,c/p/1, -country/USA,var2,2021,666,c/p/1, -country/IND,var2,2022,123,c/p/1, +entity,variable,date,value,provenance,unit,scaling_factor,measurement_method,observation_period,properties +country/AFG,var1,2023,0.19,c/p/1,,,,, +country/YEM,var1,2023,0.21,c/p/1,,,,, +country/AGO,var1,2023,0.29,c/p/1,,,,, +country/ZMB,var1,2023,0.31,c/p/1,,,,, +country/ZWE,var1,2023,0.37,c/p/1,,,,, +country/ALB,var1,2023,0.5,c/p/1,,,,, +wikidataId/Q22062741,var1,2023,0.5,c/p/1,,,,, +country/DZA,var1,2023,0.52,c/p/1,,,,, +country/AND,var1,2023,0.76,c/p/1,,,,, +country/AFG,var2,2023,6,c/p/1,,,,, +country/YEM,var2,2023,56,c/p/1,,,,, +country/AGO,var2,2023,6,c/p/1,,,,, +country/ZMB,var2,2023,34,c/p/1,,,,, +country/ZWE,var2,2023,76,c/p/1,,,,, +country/ALB,var2,2023,34,c/p/1,,,,, +wikidataId/Q22062741,var2,2023,97,c/p/1,,,,, +country/DZA,var2,2023,92,c/p/1,,,,, +country/AND,var2,2023,9,c/p/1,,,,, +country/ASM,var2,2023,34,c/p/1,,,,, +country/AIA,var2,2023,42,c/p/1,,,,, +country/WLF,var2,2023,75,c/p/1,,,,, +country/ESH,var2,2023,65,c/p/1,,,,, +country/IND,var1,2020,0.16,c/p/1,,,,, +country/IND,var2,2020,53,c/p/1,,,,, +country/CHN,var1,2020,0.23,c/p/1,,,,, +country/CHN,var2,2020,67,c/p/1,,,,, +country/USA,var1,2021,555,c/p/1,,,,, +country/IND,var1,2022,321,c/p/1,,,,, +country/USA,var2,2021,666,c/p/1,,,,, +country/IND,var2,2022,123,c/p/1,,,,, diff --git a/simple/tests/stats/test_data/runner/expected/remote_entity_types/observations.db.csv b/simple/tests/stats/test_data/runner/expected/remote_entity_types/observations.db.csv index 8c5c6e97..455fcd96 100644 --- a/simple/tests/stats/test_data/runner/expected/remote_entity_types/observations.db.csv +++ b/simple/tests/stats/test_data/runner/expected/remote_entity_types/observations.db.csv @@ -1,9 +1,9 @@ -entity,variable,date,value,provenance,properties -country/FAKE1,var1,2024,1,c/p/1, -country/FAKE2,var1,2024,3,c/p/1, -country/FAKE1,var2,2024,2,c/p/1, -country/FAKE2,var2,2024,4,c/p/1, -country/FAKE3,var1,2024,5,c/p/default, -country/FAKE3,var2,2024,6,c/p/default, -country/FAKE4,var1,2024,7,c/p/default, -country/FAKE4,var2,2024,8,c/p/default, +entity,variable,date,value,provenance,unit,scaling_factor,measurement_method,observation_period,properties +country/FAKE1,var1,2024,1,c/p/1,,,,, +country/FAKE2,var1,2024,3,c/p/1,,,,, +country/FAKE1,var2,2024,2,c/p/1,,,,, +country/FAKE2,var2,2024,4,c/p/1,,,,, +country/FAKE3,var1,2024,5,c/p/default,,,,, +country/FAKE3,var2,2024,6,c/p/default,,,,, +country/FAKE4,var1,2024,7,c/p/default,,,,, +country/FAKE4,var2,2024,8,c/p/default,,,,, diff --git a/simple/tests/stats/test_data/runner/expected/sv_nl_sentences/observations.db.csv b/simple/tests/stats/test_data/runner/expected/sv_nl_sentences/observations.db.csv index 7787e7b9..15102111 100644 --- a/simple/tests/stats/test_data/runner/expected/sv_nl_sentences/observations.db.csv +++ b/simple/tests/stats/test_data/runner/expected/sv_nl_sentences/observations.db.csv @@ -1,5 +1,5 @@ -entity,variable,date,value,provenance,properties -country/IND,var1,2020,0.16,c/p/1, -country/IND,var2,2020,53,c/p/1, -country/CHN,var1,2020,0.23,c/p/1, -country/CHN,var2,2020,67,c/p/1, +entity,variable,date,value,provenance,unit,scaling_factor,measurement_method,observation_period,properties +country/IND,var1,2020,0.16,c/p/1,,,,, +country/IND,var2,2020,53,c/p/1,,,,, +country/CHN,var1,2020,0.23,c/p/1,,,,, +country/CHN,var2,2020,67,c/p/1,,,,, diff --git a/simple/tests/stats/test_data/runner/expected/topic_nl_sentences/observations.db.csv b/simple/tests/stats/test_data/runner/expected/topic_nl_sentences/observations.db.csv index 7787e7b9..15102111 100644 --- a/simple/tests/stats/test_data/runner/expected/topic_nl_sentences/observations.db.csv +++ b/simple/tests/stats/test_data/runner/expected/topic_nl_sentences/observations.db.csv @@ -1,5 +1,5 @@ -entity,variable,date,value,provenance,properties -country/IND,var1,2020,0.16,c/p/1, -country/IND,var2,2020,53,c/p/1, -country/CHN,var1,2020,0.23,c/p/1, -country/CHN,var2,2020,67,c/p/1, +entity,variable,date,value,provenance,unit,scaling_factor,measurement_method,observation_period,properties +country/IND,var1,2020,0.16,c/p/1,,,,, +country/IND,var2,2020,53,c/p/1,,,,, +country/CHN,var1,2020,0.23,c/p/1,,,,, +country/CHN,var2,2020,67,c/p/1,,,,, diff --git a/simple/tests/stats/test_data/variable_per_row_importer/expected/custom_column_names/observations.db.csv b/simple/tests/stats/test_data/variable_per_row_importer/expected/custom_column_names/observations.db.csv index ac5d8186..926c416d 100644 --- a/simple/tests/stats/test_data/variable_per_row_importer/expected/custom_column_names/observations.db.csv +++ b/simple/tests/stats/test_data/variable_per_row_importer/expected/custom_column_names/observations.db.csv @@ -1,7 +1,7 @@ -entity,variable,date,value,provenance,properties -country/BRA,var1,2023,0.19,c/p/default, -country/BRA,var2,2023,6,c/p/default, -country/JPN,var1,2023,0.21,c/p/default, -country/JPN,var2,2023,56,c/p/default, -country/USA,var2,2023,66,c/p/default, -country/CHN,var1,2022,-123.456,c/p/default, +entity,variable,date,value,provenance,unit,scaling_factor,measurement_method,observation_period,properties +country/BRA,var1,2023,0.19,c/p/default,,,,, +country/BRA,var2,2023,6,c/p/default,,,,, +country/JPN,var1,2023,0.21,c/p/default,,,,, +country/JPN,var2,2023,56,c/p/default,,,,, +country/USA,var2,2023,66,c/p/default,,,,, +country/CHN,var1,2022,-123.456,c/p/default,,,,, diff --git a/simple/tests/stats/test_data/variable_per_row_importer/expected/default_column_names/observations.db.csv b/simple/tests/stats/test_data/variable_per_row_importer/expected/default_column_names/observations.db.csv index ac5d8186..926c416d 100644 --- a/simple/tests/stats/test_data/variable_per_row_importer/expected/default_column_names/observations.db.csv +++ b/simple/tests/stats/test_data/variable_per_row_importer/expected/default_column_names/observations.db.csv @@ -1,7 +1,7 @@ -entity,variable,date,value,provenance,properties -country/BRA,var1,2023,0.19,c/p/default, -country/BRA,var2,2023,6,c/p/default, -country/JPN,var1,2023,0.21,c/p/default, -country/JPN,var2,2023,56,c/p/default, -country/USA,var2,2023,66,c/p/default, -country/CHN,var1,2022,-123.456,c/p/default, +entity,variable,date,value,provenance,unit,scaling_factor,measurement_method,observation_period,properties +country/BRA,var1,2023,0.19,c/p/default,,,,, +country/BRA,var2,2023,6,c/p/default,,,,, +country/JPN,var1,2023,0.21,c/p/default,,,,, +country/JPN,var2,2023,56,c/p/default,,,,, +country/USA,var2,2023,66,c/p/default,,,,, +country/CHN,var1,2022,-123.456,c/p/default,,,,, diff --git a/simple/tests/stats/test_data/variable_per_row_importer/expected/namespace_prefixes/observations.db.csv b/simple/tests/stats/test_data/variable_per_row_importer/expected/namespace_prefixes/observations.db.csv index ac5d8186..926c416d 100644 --- a/simple/tests/stats/test_data/variable_per_row_importer/expected/namespace_prefixes/observations.db.csv +++ b/simple/tests/stats/test_data/variable_per_row_importer/expected/namespace_prefixes/observations.db.csv @@ -1,7 +1,7 @@ -entity,variable,date,value,provenance,properties -country/BRA,var1,2023,0.19,c/p/default, -country/BRA,var2,2023,6,c/p/default, -country/JPN,var1,2023,0.21,c/p/default, -country/JPN,var2,2023,56,c/p/default, -country/USA,var2,2023,66,c/p/default, -country/CHN,var1,2022,-123.456,c/p/default, +entity,variable,date,value,provenance,unit,scaling_factor,measurement_method,observation_period,properties +country/BRA,var1,2023,0.19,c/p/default,,,,, +country/BRA,var2,2023,6,c/p/default,,,,, +country/JPN,var1,2023,0.21,c/p/default,,,,, +country/JPN,var2,2023,56,c/p/default,,,,, +country/USA,var2,2023,66,c/p/default,,,,, +country/CHN,var1,2022,-123.456,c/p/default,,,,, diff --git a/simple/tests/stats/test_data/variable_per_row_importer/expected/obs_props/observations.db.csv b/simple/tests/stats/test_data/variable_per_row_importer/expected/obs_props/observations.db.csv index e84b6f0c..3ea6bc16 100644 --- a/simple/tests/stats/test_data/variable_per_row_importer/expected/obs_props/observations.db.csv +++ b/simple/tests/stats/test_data/variable_per_row_importer/expected/obs_props/observations.db.csv @@ -1,7 +1,7 @@ -entity,variable,date,value,provenance,properties -country/BRA,var1,2023,0.19,c/p/default,H4sIAAAAAAAC/6tWKs3LLFGyUlAKDXZR0lFQyk8qTi0qSyzJzM8LSC3KzE8ByQUYRirVAgAA6g2KKwAAAA== -country/BRA,var2,2023,6,c/p/default,H4sIAAAAAAAC/6tWKs3LLFGyUlAKDXZR0lFQyk8qTi0qSyzJzM8LSC3KzE8ByQUYRirVAgAA6g2KKwAAAA== -country/JPN,var1,2023,0.21,c/p/default,H4sIAAAAAAAC/6tWKs3LLFGyUlAKDXZR0lFQyk8qTi0qSyzJzM8LSC3KzE8ByQUYRirVAgAA6g2KKwAAAA== -country/JPN,var2,2023,56,c/p/default,H4sIAAAAAAAC/6tWKs3LLFGyUlAKDXZR0lFQyk8qTi0qSyzJzM8LSC3KzE8ByQUYRirVAgAA6g2KKwAAAA== -country/USA,var2,2023,66,c/p/default,H4sIAAAAAAAC/6tWKs3LLFGyUlAKDXZR0lFQyk8qTi0qSyzJzM8LSC3KzE8ByQUYRirVAgAA6g2KKwAAAA== -country/CHN,var1,2022,-123.456,c/p/default,H4sIAAAAAAAC/6tWKs3LLFGyUlAKDXZR0lFQyk8qTi0qSyzJzM8LSC3KzE8ByQUYRirVAgAA6g2KKwAAAA== +entity,variable,date,value,provenance,unit,scaling_factor,measurement_method,observation_period,properties +country/BRA,var1,2023,0.19,c/p/default,USD,,,P1Y, +country/BRA,var2,2023,6,c/p/default,USD,,,P1Y, +country/JPN,var1,2023,0.21,c/p/default,USD,,,P1Y, +country/JPN,var2,2023,56,c/p/default,USD,,,P1Y, +country/USA,var2,2023,66,c/p/default,USD,,,P1Y, +country/CHN,var1,2022,-123.456,c/p/default,USD,,,P1Y,