Skip to content

Commit

Permalink
Looked up values for the KNMI parameters that were not unique and imp…
Browse files Browse the repository at this point in the history
…roved them to make them unique and more accurate.
  • Loading branch information
Jeffrey-Vervoort-KNMI committed Mar 4, 2024
1 parent 013bd63 commit 9e766ea
Show file tree
Hide file tree
Showing 9 changed files with 581 additions and 324 deletions.
3 changes: 1 addition & 2 deletions api/formatters/covjson.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,11 @@


def make_parameter(ts_mdata):
parameter_id = ts_mdata.parameter_name
return Parameter(
description={"en": ts_mdata.title},
observedProperty=ObservedProperty(
id=f"https://vocab.nerc.ac.uk/standard_name/{ts_mdata.standard_name}",
label={"en": parameter_id},
label={"en": ts_mdata.instrument},
),
unit=Unit(label={"en": ts_mdata.unit}),
)
Expand Down
2 changes: 1 addition & 1 deletion api/metadata_endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ async def get_collection_metadata(request, is_self) -> Collection:
description=obs.ts_mdata.title,
observedProperty=ObservedProperty(
id=f"https://vocab.nerc.ac.uk/standard_name/{obs.ts_mdata.standard_name}",
label=obs.ts_mdata.parameter_name,
label=obs.ts_mdata.instrument,
),
unit=Unit(label=obs.ts_mdata.unit),
)
Expand Down
52 changes: 38 additions & 14 deletions datastore/data-loader/client_knmi_station.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,18 @@
from google.protobuf.timestamp_pb2 import Timestamp
from parameters import knmi_parameter_names

regex_level = re.compile(
r"first|second|third|grass|[0-9]+(\.[0-9]+)?(?=m)|(Level )[0-9]+|(?<=\()[ A-z]+(?=\))", re.IGNORECASE
)
regex_level = re.compile(r"first|second|third|[0-9]+(\.[0-9]+)?(?=m)|(?<=Level )[0-9]+", re.IGNORECASE)
regex_level_centimeters = re.compile(r"[0-9]+(\.[0-9]+)?(?=cm)")
regex_time_period = re.compile(r"(\d+) (Hours|Min)", re.IGNORECASE)


def netcdf_file_to_requests(file_path: Path | str) -> Tuple[List, List]:
observation_request_messages = []

with xr.open_dataset(file_path, engine="netcdf4", chunks=None) as file: # chunks=None to disable dask
for station_id, latitude, longitude, height in zip(
for station_id, station_name, latitude, longitude, height in zip(
file["station"].values,
file["stationname"].values[0],
file["lat"].values[0],
file["lon"].values[0],
file["height"].values[0],
Expand All @@ -43,6 +44,9 @@ def netcdf_file_to_requests(file_path: Path | str) -> Tuple[List, List]:
standard_name, level, function, period = generate_parameter_name(
(param_file.standard_name if "standard_name" in param_file.attrs else "placeholder"),
param_file.long_name,
station_id,
station_name,
param_id,
)

ts_mdata = dstore.TSMetadata(
Expand Down Expand Up @@ -78,39 +82,59 @@ def netcdf_file_to_requests(file_path: Path | str) -> Tuple[List, List]:
return observation_request_messages


def generate_parameter_name(standard_name, long_name):
def generate_parameter_name(standard_name, long_name, station_id, station_name, param_id):
# TODO: HACK To let the loader have a unique parameter ID and make the parameters distinguishable.
level = "2.0"
long_name = long_name.lower()
station_name = station_name.lower()
if level_raw := re.search(regex_level, long_name):
level = level_raw[0]
elif "wawa" in long_name:
# WMO table 4680. Note: The sensor is not installed at equal heights at all types of measurement sites:
if level_raw := re.search(regex_level_centimeters, long_name):
level = str(float(level_raw[0]) / 100.0)
elif "grass" in long_name:
level = "0"
elif param_id in ["pg", "pr", "pwc", "vv", "W10", "W10-10", "ww", "ww-10", "za", "zm"]:
# https://english.knmidata.nl/open-data/actuele10mindataknmistations
# Comments code: 2, 3, 11
# Note: The sensor is not installed at equal heights at all types of measurement sites:
# At 'AWS' sites the device is installed at 1.80m. At 'AWS/Aerodrome' and 'Mistpost'
# (note that this includes site Voorschoten (06215) which is 'AWS/Mistpost')
# the device is installed at 2.50m elevation. Exceptions are Berkhout AWS (06249),
# De Bilt AWS (06260) and Twenthe AWS (06290) where the sensor is installed at 2.50m.
level = "1.80/2.50m"
# Since WaWa is automatic detection I asssumed that the others stations are AWS, thus 1.80m
if (
station_id in ["06215", "06249", "06260", "06290"]
or "aerodrome" in station_name
or "mistpost" in station_name
):
level = "2.50"
else:
level = "1.80"

if "Minimum" in long_name:
if "minimum" in long_name:
function = "minimum"
elif "Maximum" in long_name:
elif "maximum" in long_name:
function = "maximum"
elif "Average" in long_name:
elif "average" in long_name:
function = "mean"
else:
function = "point"

period = "PT0S"
if period_raw := re.findall(r"(\d+) (Hours|Min)", long_name):
if period_raw := re.findall(regex_time_period, long_name):
if len(period_raw) == 1:
period_raw = period_raw[0]
else:
raise Exception(f"{period_raw}, {long_name}")
time, scale = period_raw
if scale == "Hours":
if scale == "hours":
period = f"PT{time}H"
elif scale == "Min":
elif scale == "min":
period = f"PT{time}M"
elif param_id == "ww-10":
period = "PT10M"
elif param_id == "ww":
period = "PT01H"

return standard_name, level, function, period

Expand Down
Loading

1 comment on commit 9e766ea

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

API Unit Test Coverage Report
FileStmtsMissCoverMissing
\_\_init\_\_.py00100% 
datastore_pb2.py584621%24–69
datastore_pb2_grpc.py432347%37–52, 85–87, 92–94, 99–101, 106–108, 112–136, 174, 191, 208, 225
dependencies.py481862%16, 26–27, 34, 41, 52, 62–69, 77–84
grpc_getter.py16850%15–16, 20–23, 27–29
locustfile.py15150%1–31
main.py22386%27, 37, 47
metadata_endpoints.py341265%24, 42–103, 107
custom_geo_json
   edr_feature_collection.py60100% 
formatters
   \_\_init\_\_.py70100% 
   covjson.py50492%71, 86–89
routers
   \_\_init\_\_.py00100% 
   edr.py883758%45–104, 132–144, 173, 184–190, 229–246
   records.py00100% 
TOTAL38716657% 

API Unit Test Coverage Summary

Tests Skipped Failures Errors Time
16 0 💤 9 ❌ 0 🔥 2.238s ⏱️

Please sign in to comment.