From 1429bb4089e8e7a6fc2d6a8b1f3431d127ab0daf Mon Sep 17 00:00:00 2001 From: Lukas Phaf Date: Wed, 25 Oct 2023 17:47:25 +0200 Subject: [PATCH 1/2] Don't load time-series that consist only of NAN's into the store. This is quite common for KNMI, as the parameters are stored as a 2 dimensional array (time, station) in the NetCDF file. As not all stations measure all parameters... --- data-loader/client_knmi_station.py | 22 ++++++++++++---------- integration-test/test_knmi.py | 16 ++++++++-------- 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/data-loader/client_knmi_station.py b/data-loader/client_knmi_station.py index de949e7..ef266d6 100755 --- a/data-loader/client_knmi_station.py +++ b/data-loader/client_knmi_station.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 # tested with Python 3.11 import concurrent +import math import os import uuid from multiprocessing import cpu_count @@ -45,16 +46,17 @@ def netcdf_file_to_requests(file_path: Path | str) -> Tuple[List, List]: for time, obs_value in zip(pd.to_datetime(param_file["time"].data).to_pydatetime(), param_file.data): ts = Timestamp() ts.FromDatetime(time) - obs_mdata = dstore.ObsMetadata( - id=str(uuid.uuid4()), - geo_point=dstore.Point(lat=latitude, lon=longitude), - obstime_instant=ts, - value=str(obs_value), # TODO: Store float in DB - ) - observations.append(dstore.Metadata1(ts_mdata=ts_mdata, obs_mdata=obs_mdata)) - - # print(len(observations)) - observation_request_messages.append(dstore.PutObsRequest(observations=observations)) + if not math.isnan(obs_value): # Stations that don't have a parameter give them all as nan + obs_mdata = dstore.ObsMetadata( + id=str(uuid.uuid4()), + geo_point=dstore.Point(lat=latitude, lon=longitude), + obstime_instant=ts, + value=str(obs_value), # TODO: Store float in DB + ) + observations.append(dstore.Metadata1(ts_mdata=ts_mdata, obs_mdata=obs_mdata)) + + if len(observations) > 0: + observation_request_messages.append(dstore.PutObsRequest(observations=observations)) return observation_request_messages diff --git a/integration-test/test_knmi.py b/integration-test/test_knmi.py index 6d81eec..4836de8 100644 --- a/integration-test/test_knmi.py +++ b/integration-test/test_knmi.py @@ -31,14 +31,14 @@ def test_find_series_all_stations_single_parameter(grpc_stub): request = dstore.GetObsRequest(instruments=["rh"]) response = grpc_stub.GetObservations(request) - assert len(response.observations) == NUMBER_OF_STATIONS + assert len(response.observations) == 46 # Not all station have RH def test_find_series_single_station_all_parameters(grpc_stub): request = dstore.GetObsRequest(platforms=["06260"]) response = grpc_stub.GetObservations(request) - assert len(response.observations) == NUMBER_OF_PARAMETERS + assert len(response.observations) == 42 # Station 06260 doesn't have all parameters def test_get_values_single_station_single_parameters(grpc_stub): @@ -108,19 +108,19 @@ def test_get_values_single_station_single_parameters(grpc_stub): (51.75, 3.68), ), ["rh"], - ["06260", "06310", "06323", "06340", "06343", "06348", "06350", "06356"], + ["06260", "06310", "06323", "06340", "06348", "06350", "06356"], ), ( - # All stations in the Netherlands + # All stations in the Netherlands with have RH ((56.00, 2.85), (56.00, 7.22), (50.75, 7.22), (50.75, 2.85)), ["rh"], # fmt: off [ - "06201", "06203", "06204", "06205", "06207", "06208", "06211", "06214", "06215", - "06225", "06229", "06235", "06239", "06240", "06242", "06248", "06249", "06251", - "06252", "06257", "06258", "06260", "06267", "06269", "06270", "06273", "06275", + "06203", "06204", "06205", "06207", "06208", "06211", "06214", "06215", + "06235", "06239", "06240", "06242", "06249", "06251", + "06257", "06260", "06267", "06269", "06270", "06273", "06275", "06277", "06278", "06279", "06280", "06283", "06286", "06290", "06310", "06317", - "06319", "06320", "06321", "06323", "06330", "06340", "06343", "06344", "06348", + "06319", "06323", "06330", "06340", "06344", "06348", "06350", "06356", "06370", "06375", "06377", "06380", "06391" ], # fmt: on From e5f22c4158d969741f6e0662e8ff981de58e37eb Mon Sep 17 00:00:00 2001 From: Lukas Phaf Date: Wed, 25 Oct 2023 17:48:43 +0200 Subject: [PATCH 2/2] Fix typo. --- integration-test/test_knmi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integration-test/test_knmi.py b/integration-test/test_knmi.py index 4836de8..0f25d9a 100644 --- a/integration-test/test_knmi.py +++ b/integration-test/test_knmi.py @@ -111,7 +111,7 @@ def test_get_values_single_station_single_parameters(grpc_stub): ["06260", "06310", "06323", "06340", "06348", "06350", "06356"], ), ( - # All stations in the Netherlands with have RH + # All stations in the Netherlands which have RH ((56.00, 2.85), (56.00, 7.22), (50.75, 7.22), (50.75, 2.85)), ["rh"], # fmt: off