Looked up values for the KNMI parameters that were not unique and imp…

…roved them to make them unique and more accurate.
EURODEO · Mar 4, 2024 · 9e766ea · 9e766ea · github-actions · Mar 4, 2024
1 parent 013bd63
commit 9e766ea
Show file tree

Hide file tree

Showing 9 changed files with 581 additions and 324 deletions.
diff --git a/api/formatters/covjson.py b/api/formatters/covjson.py
@@ -26,12 +26,11 @@
 
 
 def make_parameter(ts_mdata):
-    parameter_id = ts_mdata.parameter_name
     return Parameter(
         description={"en": ts_mdata.title},
         observedProperty=ObservedProperty(
             id=f"https://vocab.nerc.ac.uk/standard_name/{ts_mdata.standard_name}",
-            label={"en": parameter_id},
+            label={"en": ts_mdata.instrument},
         ),
         unit=Unit(label={"en": ts_mdata.unit}),
     )

diff --git a/api/metadata_endpoints.py b/api/metadata_endpoints.py
@@ -51,7 +51,7 @@ async def get_collection_metadata(request, is_self) -> Collection:
             description=obs.ts_mdata.title,
             observedProperty=ObservedProperty(
                 id=f"https://vocab.nerc.ac.uk/standard_name/{obs.ts_mdata.standard_name}",
-                label=obs.ts_mdata.parameter_name,
+                label=obs.ts_mdata.instrument,
             ),
             unit=Unit(label=obs.ts_mdata.unit),
         )

diff --git a/datastore/data-loader/client_knmi_station.py b/datastore/data-loader/client_knmi_station.py
@@ -19,17 +19,18 @@
 from google.protobuf.timestamp_pb2 import Timestamp
 from parameters import knmi_parameter_names
 
-regex_level = re.compile(
-    r"first|second|third|grass|[0-9]+(\.[0-9]+)?(?=m)|(Level )[0-9]+|(?<=\()[ A-z]+(?=\))", re.IGNORECASE
-)
+regex_level = re.compile(r"first|second|third|[0-9]+(\.[0-9]+)?(?=m)|(?<=Level )[0-9]+", re.IGNORECASE)
+regex_level_centimeters = re.compile(r"[0-9]+(\.[0-9]+)?(?=cm)")
+regex_time_period = re.compile(r"(\d+) (Hours|Min)", re.IGNORECASE)
 
 
 def netcdf_file_to_requests(file_path: Path | str) -> Tuple[List, List]:
     observation_request_messages = []
 
     with xr.open_dataset(file_path, engine="netcdf4", chunks=None) as file:  # chunks=None to disable dask
-        for station_id, latitude, longitude, height in zip(
+        for station_id, station_name, latitude, longitude, height in zip(
             file["station"].values,
+            file["stationname"].values[0],
             file["lat"].values[0],
             file["lon"].values[0],
             file["height"].values[0],
@@ -43,6 +44,9 @@ def netcdf_file_to_requests(file_path: Path | str) -> Tuple[List, List]:
                 standard_name, level, function, period = generate_parameter_name(
                     (param_file.standard_name if "standard_name" in param_file.attrs else "placeholder"),
                     param_file.long_name,
+                    station_id,
+                    station_name,
+                    param_id,
                 )
 
                 ts_mdata = dstore.TSMetadata(
@@ -78,39 +82,59 @@ def netcdf_file_to_requests(file_path: Path | str) -> Tuple[List, List]:
     return observation_request_messages
 
 
-def generate_parameter_name(standard_name, long_name):
+def generate_parameter_name(standard_name, long_name, station_id, station_name, param_id):
     # TODO: HACK To let the loader have a unique parameter ID and make the parameters distinguishable.
     level = "2.0"
+    long_name = long_name.lower()
+    station_name = station_name.lower()
     if level_raw := re.search(regex_level, long_name):
         level = level_raw[0]
-    elif "wawa" in long_name:
-        # WMO table 4680. Note: The sensor is not installed at equal heights at all types of measurement sites:
+    if level_raw := re.search(regex_level_centimeters, long_name):
+        level = str(float(level_raw[0]) / 100.0)
+    elif "grass" in long_name:
+        level = "0"
+    elif param_id in ["pg", "pr", "pwc", "vv", "W10", "W10-10", "ww", "ww-10", "za", "zm"]:
+        # https://english.knmidata.nl/open-data/actuele10mindataknmistations
+        # Comments code: 2, 3, 11
+        # Note: The sensor is not installed at equal heights at all types of measurement sites:
         # At 'AWS' sites the device is installed at 1.80m. At 'AWS/Aerodrome' and 'Mistpost'
         # (note that this includes site Voorschoten (06215) which is 'AWS/Mistpost')
         # the device is installed at 2.50m elevation. Exceptions are Berkhout AWS (06249),
         # De Bilt AWS (06260) and Twenthe AWS (06290) where the sensor is installed at 2.50m.
-        level = "1.80/2.50m"
+        # Since WaWa is automatic detection I asssumed that the others stations are AWS, thus 1.80m
+        if (
+            station_id in ["06215", "06249", "06260", "06290"]
+            or "aerodrome" in station_name
+            or "mistpost" in station_name
+        ):
+            level = "2.50"
+        else:
+            level = "1.80"
 
-    if "Minimum" in long_name:
+    if "minimum" in long_name:
         function = "minimum"
-    elif "Maximum" in long_name:
+    elif "maximum" in long_name:
         function = "maximum"
-    elif "Average" in long_name:
+    elif "average" in long_name:
         function = "mean"
     else:
         function = "point"
 
     period = "PT0S"
-    if period_raw := re.findall(r"(\d+) (Hours|Min)", long_name):
+    if period_raw := re.findall(regex_time_period, long_name):
         if len(period_raw) == 1:
             period_raw = period_raw[0]
         else:
             raise Exception(f"{period_raw}, {long_name}")
         time, scale = period_raw
-        if scale == "Hours":
+        if scale == "hours":
             period = f"PT{time}H"
-        elif scale == "Min":
+        elif scale == "min":
             period = f"PT{time}M"
+    elif param_id == "ww-10":
+        period = "PT10M"
+    elif param_id == "ww":
+        period = "PT01H"
 
     return standard_name, level, function, period
File	Stmts	Miss	Cover	Missing
\_\_init\_\_.py	0	0	100%
datastore_pb2.py	58	46	21%	24–69
datastore_pb2_grpc.py	43	23	47%	37–52, 85–87, 92–94, 99–101, 106–108, 112–136, 174, 191, 208, 225
dependencies.py	48	18	62%	16, 26–27, 34, 41, 52, 62–69, 77–84
grpc_getter.py	16	8	50%	15–16, 20–23, 27–29
locustfile.py	15	15	0%	1–31
main.py	22	3	86%	27, 37, 47
metadata_endpoints.py	34	12	65%	24, 42–103, 107
custom_geo_json
edr_feature_collection.py	6	0	100%
formatters
\_\_init\_\_.py	7	0	100%
covjson.py	50	4	92%	71, 86–89
routers
\_\_init\_\_.py	0	0	100%
edr.py	88	37	58%	45–104, 132–144, 173, 184–190, 229–246
records.py	0	0	100%
TOTAL	387	166	57%