diff --git a/.gitignore b/.gitignore index 1376338..63ba3bd 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ .run/ # Python +.pytest_cache/ __pycache__/ venv/ diff --git a/api/main.py b/api/main.py index de70c37..2313b9f 100644 --- a/api/main.py +++ b/api/main.py @@ -140,7 +140,8 @@ def get_datetime_range(datetime_string: str | None) -> Tuple[Timestamp, Timestam else: start_datetime.FromDatetime(datetime.min) if datetimes[1] != "..": - end_datetime.FromDatetime(aware_datetime_type_adapter.validate_python(datetimes[1])) + # HACK add one second so that the end_datetime is included in the interval. + end_datetime.FromDatetime(aware_datetime_type_adapter.validate_python(datetimes[1]) + timedelta(seconds=1)) else: end_datetime.FromDatetime(datetime.max) diff --git a/database/healthcheck_postgis_uptime.sh b/database/healthcheck_postgis_uptime.sh new file mode 100755 index 0000000..d1982ad --- /dev/null +++ b/database/healthcheck_postgis_uptime.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash + +CONNECTION_STRING=$1 # Postgres connection string +UPTIME_AMOUNT=${2:-1} # Number of e.g. hours, minutes, seconds +UPTIME_TYPE=${3:-"minute"} # E.g. hour, minute, second + +# Return exit code based on the uptime of postgres +if [[ $(psql "${CONNECTION_STRING}" -XtAc \ + "SELECT COUNT(*) FROM (SELECT current_timestamp - pg_postmaster_start_time() AS uptime) AS t WHERE t.uptime > interval '${UPTIME_AMOUNT} ${UPTIME_TYPE}'") == 1 ]]; +then + exit 0 +else + exit 1 +fi diff --git a/datastore/storagebackend/postgresql/gettsattrgroups.go b/datastore/storagebackend/postgresql/gettsattrgroups.go index fa615ee..3061f2f 100644 --- a/datastore/storagebackend/postgresql/gettsattrgroups.go +++ b/datastore/storagebackend/postgresql/gettsattrgroups.go @@ -54,27 +54,25 @@ func getTSAllPBNames() []string { return pbNames } -// getTSDBColumns returns names of database columns corresponding to pbNames. -func getTSDBColumns(pbNames []string) ([]string, error) { +// validateAttrs validates pbNames. Returns nil if valid, otherwise error. +func validateAttrs(pbNames []string) error { seen := map[string]struct{}{} - cols := []string{} for _, pbName := range pbNames { if _, found := tspb2go[pbName]; !found { - return nil, fmt.Errorf( + return fmt.Errorf( "attribute not found: %s; supported attributes: %s", pbName, strings.Join(getTSAllPBNames(), ", ")) } if _, found := seen[pbName]; found { - return nil, fmt.Errorf("attribute %s specified more than once", pbName) + return fmt.Errorf("attribute %s specified more than once", pbName) } - cols = append(cols, pbName) seen[pbName] = struct{}{} } - return cols, nil + return nil } // getTSMdata creates a TSMetadata object initialized from colVals. @@ -205,17 +203,21 @@ func getCombo(tsMdata1 *datastore.TSMetadata, goNames []string) (*datastore.TSMe return &tsMdata2, nil } -// getTSAttrGroupsIncInstances populates groups from cols such that each group contains all -// instances that match a unique combination of database values corresponding to cols. +// getTSAttrGroupsIncInstances creates an array of groups from cols such that each group contains +// all instances that match a unique combination of database values corresponding to cols. // All attributes, including those in cols, are set to the actual values found in the database. -// Returns nil upon success, otherwise error. +// +// NOTE: cols is assumed to be sanitized by validateAttrs, so there is no risk of SQL injection +// in the below query. +// +// Returns (array of groups, nil) upon success, otherwise (..., error). func getTSAttrGroupsIncInstances( - db *sql.DB, cols []string, groups *[]*datastore.TSMdataGroup) error { + db *sql.DB, cols []string) ([]*datastore.TSMdataGroup, error) { allCols := getTSAllPBNames() // get all protobuf names of TSMetadata message goNames, err := getTSGoNamesFromPBNames(cols) if err != nil { - return fmt.Errorf("getTSGoNamesFromPBNames() failed: %v", err) + return nil, fmt.Errorf("getTSGoNamesFromPBNames() failed: %v", err) } // query database for all columns in time_series, ordered by cols @@ -224,32 +226,34 @@ func getTSAttrGroupsIncInstances( query := fmt.Sprintf("SELECT %s FROM time_series ORDER BY %s", allColsS, colsS) rows, err := db.Query(query) if err != nil { - return fmt.Errorf("db.Query() failed: %v", err) + return nil, fmt.Errorf("db.Query() failed: %v", err) } defer rows.Close() + groups := []*datastore.TSMdataGroup{} + // aggregate rows into groups currInstances := []*datastore.TSMetadata{} // initial current instance set for rows.Next() { // extract tsMdata from current result row tsMdata, err := scanTsRow(rows, allCols) if err != nil { - return fmt.Errorf("scanTsMdata() failed: %v", err) + return nil, fmt.Errorf("scanTsMdata() failed: %v", err) } if len(currInstances) > 0 { // check if we should create a new current instance set equal, err := tsMdataEqual(tsMdata, currInstances[0], goNames) if err != nil { - return fmt.Errorf("tsMdataEqual() failed: %v", err) + return nil, fmt.Errorf("tsMdataEqual() failed: %v", err) } if !equal { // ts metadata changed wrt. cols // add next group with current instance set currCombo, err := getCombo(currInstances[0], goNames) if err != nil { - return fmt.Errorf("getCombo() failed (1): %v", err) + return nil, fmt.Errorf("getCombo() failed (1): %v", err) } - *groups = append(*groups, &datastore.TSMdataGroup{ + groups = append(groups, &datastore.TSMdataGroup{ Combo: currCombo, Instances: currInstances, }) @@ -264,63 +268,70 @@ func getTSAttrGroupsIncInstances( // add final group with current instance set currCombo, err := getCombo(currInstances[0], goNames) if err != nil { - return fmt.Errorf("getCombo() failed (2): %v", err) + return nil, fmt.Errorf("getCombo() failed (2): %v", err) } - *groups = append(*groups, &datastore.TSMdataGroup{ + groups = append(groups, &datastore.TSMdataGroup{ Combo: currCombo, Instances: currInstances, }) - return nil + return groups, nil } -// getTSAttrGroupsComboOnly populates groups from cols such that each group contains a single, -// unique combination of database values corresponding to cols. Other attributes than those in cols -// have the default value for the type (i.e. "" for string, etc.). -// Returns nil upon success, otherwise error. -func getTSAttrGroupsComboOnly(db *sql.DB, cols []string, groups *[]*datastore.TSMdataGroup) error { +// getTSAttrGroupsComboOnly creates an array of groups from cols such that each group contains a +// single, unique combination of database values corresponding to cols. Other attributes than those +// in cols have the default value for the type (i.e. "" for string, etc.). +// +// NOTE: cols is assumed to be sanitized by validateAttrs, so there is no risk of SQL injection +// in the below query. +// +// Returns (array of groups, nil) upon success, otherwise (..., error). +func getTSAttrGroupsComboOnly(db *sql.DB, cols []string) ([]*datastore.TSMdataGroup, error) { + // query database for unique combinations of cols in time_series, ordered by cols colsS := strings.Join(cols, ",") query := fmt.Sprintf("SELECT DISTINCT %s FROM time_series ORDER BY %s", colsS, colsS) rows, err := db.Query(query) if err != nil { - return fmt.Errorf("db.Query() failed: %v", err) + return nil, fmt.Errorf("db.Query() failed: %v", err) } defer rows.Close() + groups := []*datastore.TSMdataGroup{} + // aggregate rows into groups for rows.Next() { // extract tsMdata from current result row tsMdata, err := scanTsRow(rows, cols) if err != nil { - return fmt.Errorf("scanTsMdata() failed: %v", err) + return nil, fmt.Errorf("scanTsMdata() failed: %v", err) } // add new group with tsMData as the combo (and leaving the Instances array unset) - *groups = append(*groups, &datastore.TSMdataGroup{Combo: tsMdata}) + groups = append(groups, &datastore.TSMdataGroup{Combo: tsMdata}) } - return nil + return groups, nil } // GetTSAttrGroups ... (see documentation in StorageBackend interface) func (sbe *PostgreSQL) GetTSAttrGroups(request *datastore.GetTSAGRequest) ( *datastore.GetTSAGResponse, error) { - cols, err := getTSDBColumns(request.Attrs) // get database column names for requested attributes - if err != nil { - return nil, fmt.Errorf("getTSAttrCols() failed: %v", err) + if err := validateAttrs(request.Attrs); err != nil { + return nil, fmt.Errorf("validateAttrs() failed: %v", err) } - groups := []*datastore.TSMdataGroup{} + var groups []*datastore.TSMdataGroup + var err error if request.IncludeInstances { - if err := getTSAttrGroupsIncInstances(sbe.Db, cols, &groups); err != nil { - return nil, fmt.Errorf("getTSAGroupsIncInstances() failed: %v", err) + if groups, err = getTSAttrGroupsIncInstances(sbe.Db, request.Attrs); err != nil { + return nil, fmt.Errorf("getTSAttrGroupsIncInstances() failed: %v", err) } } else { - if err := getTSAttrGroupsComboOnly(sbe.Db, cols, &groups); err != nil { - return nil, fmt.Errorf("getTSAGroupsComboOnly() failed: %v", err) + if groups, err = getTSAttrGroupsComboOnly(sbe.Db, request.Attrs); err != nil { + return nil, fmt.Errorf("getTSAttrGroupsComboOnly() failed: %v", err) } } diff --git a/docker-compose.yml b/docker-compose.yml index 7d143e6..b406de2 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -10,19 +10,29 @@ services: volumes: # - ts-data:/home/postgres/pgdata/data # for timescale image - ts-data:/var/lib/postgresql/data # for postgres image - - ./datastore/ts-init.sql:/docker-entrypoint-initdb.d/init.sql + - ./database/healthcheck_postgis_uptime.sh:/healthcheck_postgis_uptime.sh # for the healthcheck environment: - POSTGRES_USER=postgres - POSTGRES_PASSWORD=mysecretpassword - POSTGRES_DB=data restart: on-failure healthcheck: - test: [ "CMD-SHELL", "psql postgresql://postgres:mysecretpassword@localhost/data -c \"SELECT COUNT(*) from OBSERVATION\"" ] + # HACK Due to the installation of Postgis extension the database is restarted, the healthcheck checks if the database is up for longer than specified time. + test: ["CMD-SHELL", "/healthcheck_postgis_uptime.sh postgresql://postgres:mysecretpassword@localhost/data 10 second"] interval: 5s timeout: 1s retries: 3 start_period: 30s # Failures in 30 seconds do not mark container as unhealthy + migrate: + image: migrate/migrate:4 + volumes: + - ./migrate/data/migrations:/data/migrations + command: ["-path", "/data/migrations", "-database", "postgres://postgres:mysecretpassword@db:5432/data?sslmode=disable", "up"] + depends_on: + db: + condition: service_healthy + store: build: context: datastore @@ -45,8 +55,8 @@ services: retries: 3 start_period: 30s # Failures in 30 seconds do not mark container as unhealthy depends_on: - db: - condition: service_healthy + migrate: + condition: service_completed_successfully api: build: @@ -93,6 +103,7 @@ services: environment: - DSHOST=store - DSPORT=50050 + - BASE_URL=http://api:8000 depends_on: store: condition: service_healthy diff --git a/integration-test/Dockerfile b/integration-test/Dockerfile index 39a424a..eab301c 100644 --- a/integration-test/Dockerfile +++ b/integration-test/Dockerfile @@ -29,6 +29,9 @@ RUN python -m grpc_tools.protoc \ COPY "${PROJECT_PYTHON_PATH}/test_knmi.py" "${DOCKER_PATH}/test_knmi.py" COPY "${PROJECT_PYTHON_PATH}/test_delete.py" "${DOCKER_PATH}/test_delete.py" +COPY "${PROJECT_PYTHON_PATH}/test_api.py" "${DOCKER_PATH}/test_api.py" + +COPY "${PROJECT_PYTHON_PATH}/response/" "${DOCKER_PATH}/response/" WORKDIR "${DOCKER_PATH}" CMD ["pytest"] diff --git a/integration-test/requirements.in b/integration-test/requirements.in index d591866..3060a1e 100644 --- a/integration-test/requirements.in +++ b/integration-test/requirements.in @@ -3,5 +3,7 @@ # Install using: # pip-sync +deepdiff~=6.2 grpcio-tools~=1.56 pytest~=7.4 +requests~=2.31 diff --git a/integration-test/requirements.txt b/integration-test/requirements.txt index 1cb31cb..390448c 100644 --- a/integration-test/requirements.txt +++ b/integration-test/requirements.txt @@ -4,20 +4,34 @@ # # pip-compile --no-emit-index-url # -grpcio==1.58.0 +certifi==2023.11.17 + # via requests +charset-normalizer==3.3.2 + # via requests +deepdiff==6.7.1 + # via -r requirements.in +grpcio==1.59.3 # via grpcio-tools -grpcio-tools==1.58.0 +grpcio-tools==1.59.3 # via -r requirements.in +idna==3.6 + # via requests iniconfig==2.0.0 # via pytest -packaging==23.1 +ordered-set==4.1.0 + # via deepdiff +packaging==23.2 # via pytest pluggy==1.3.0 # via pytest -protobuf==4.24.3 +protobuf==4.25.1 # via grpcio-tools -pytest==7.4.2 +pytest==7.4.3 + # via -r requirements.in +requests==2.31.0 # via -r requirements.in +urllib3==2.1.0 + # via requests # The following packages are considered to be unsafe in a requirements file: # setuptools diff --git a/integration-test/response/capabilities/200/all_collections.json b/integration-test/response/capabilities/200/all_collections.json new file mode 100644 index 0000000..30ff65d --- /dev/null +++ b/integration-test/response/capabilities/200/all_collections.json @@ -0,0 +1,77 @@ +{ + "links": [ + { + "href": "http://localhost:8008/collections", + "rel": "self" + } + ], + "collections": [ + { + "id": "observations", + "links": [ + { + "href": "http://localhost:8008/collections/observations", + "rel": "self" + } + ], + "extent": { + "spatial": { + "bbox": [ + [ + 3.0, + 50.0, + 8.0, + 55.0 + ] + ], + "crs": "WGS84" + } + }, + "data_queries": { + "position": { + "link": { + "href": "http://localhost:8008/collections/observations/position", + "rel": "data", + "variables": { + "query_type": "position", + "output_format": [ + "CoverageJSON" + ] + } + } + }, + "area": { + "link": { + "href": "http://localhost:8008/collections/observations/area", + "rel": "data", + "variables": { + "query_type": "area", + "output_format": [ + "CoverageJSON" + ] + } + } + }, + "locations": { + "link": { + "href": "http://localhost:8008/collections/observations/locations", + "rel": "data", + "variables": { + "query_type": "locations", + "output_format": [ + "CoverageJSON" + ] + } + } + } + }, + "crs": [ + "WGS84" + ], + "output_formats": [ + "CoverageJSON" + ], + "parameter_names": {} + } + ] +} diff --git a/integration-test/response/collection/area/200/data_within_an_area_with_two_parameters.json b/integration-test/response/collection/area/200/data_within_an_area_with_two_parameters.json new file mode 100644 index 0000000..1878503 --- /dev/null +++ b/integration-test/response/collection/area/200/data_within_an_area_with_two_parameters.json @@ -0,0 +1,287 @@ +{ + "type": "CoverageCollection", + "coverages": [ + { + "type": "Coverage", + "domain": { + "type": "Domain", + "domainType": "PointSeries", + "axes": { + "x": { + "values": [ + 5.8723225499118 + ] + }, + "y": { + "values": [ + 52.0548617826 + ] + }, + "t": { + "values": [ + "2022-12-31T22:50:00Z", + "2022-12-31T23:00:00Z", + "2022-12-31T23:10:00Z", + "2022-12-31T23:20:00Z", + "2022-12-31T23:30:00Z", + "2022-12-31T23:40:00Z", + "2022-12-31T23:50:00Z" + ] + } + }, + "referencing": [ + { + "coordinates": [ + "y", + "x" + ], + "system": { + "type": "GeographicCRS", + "id": "http://www.opengis.net/def/crs/EPSG/0/4326" + } + }, + { + "coordinates": [ + "z" + ], + "system": { + "type": "TemporalRS", + "calendar": "Gregorian" + } + } + ] + }, + "parameters": { + "ff": { + "type": "Parameter", + "observedProperty": { + "label": { + "en": "ff" + } + }, + "unit": { + "label": { + "en": "m s-1" + } + } + }, + "rh": { + "type": "Parameter", + "observedProperty": { + "label": { + "en": "rh" + } + }, + "unit": { + "label": { + "en": "%" + } + } + } + }, + "ranges": { + "ff": { + "type": "NdArray", + "dataType": "float", + "axisNames": [ + "t", + "y", + "x" + ], + "shape": [ + 7, + 1, + 1 + ], + "values": [ + 10.68, + 8.84, + 9.09, + 8.64, + 8.72, + 9.59, + 10.7 + ] + }, + "rh": { + "type": "NdArray", + "dataType": "float", + "axisNames": [ + "t", + "y", + "x" + ], + "shape": [ + 7, + 1, + 1 + ], + "values": [ + 56, + 56, + 55, + 58, + 56, + 56, + 57 + ] + } + } + }, + { + "type": "Coverage", + "domain": { + "type": "Domain", + "domainType": "PointSeries", + "axes": { + "x": { + "values": [ + 5.1797058644882 + ] + }, + "y": { + "values": [ + 52.098821802977 + ] + }, + "t": { + "values": [ + "2022-12-31T22:50:00Z", + "2022-12-31T23:00:00Z", + "2022-12-31T23:10:00Z", + "2022-12-31T23:20:00Z", + "2022-12-31T23:30:00Z", + "2022-12-31T23:40:00Z", + "2022-12-31T23:50:00Z" + ] + } + }, + "referencing": [ + { + "coordinates": [ + "y", + "x" + ], + "system": { + "type": "GeographicCRS", + "id": "http://www.opengis.net/def/crs/EPSG/0/4326" + } + }, + { + "coordinates": [ + "z" + ], + "system": { + "type": "TemporalRS", + "calendar": "Gregorian" + } + } + ] + }, + "parameters": { + "ff": { + "type": "Parameter", + "observedProperty": { + "label": { + "en": "ff" + } + }, + "unit": { + "label": { + "en": "m s-1" + } + } + }, + "rh": { + "type": "Parameter", + "observedProperty": { + "label": { + "en": "rh" + } + }, + "unit": { + "label": { + "en": "%" + } + } + } + }, + "ranges": { + "ff": { + "type": "NdArray", + "dataType": "float", + "axisNames": [ + "t", + "y", + "x" + ], + "shape": [ + 7, + 1, + 1 + ], + "values": [ + 8.37, + 7.71, + 8.35, + 8.45, + 8.95, + 9.17, + 9.4 + ] + }, + "rh": { + "type": "NdArray", + "dataType": "float", + "axisNames": [ + "t", + "y", + "x" + ], + "shape": [ + 7, + 1, + 1 + ], + "values": [ + 58, + 58, + 58, + 58, + 58, + 58, + 59 + ] + } + } + } + ], + "parameters": { + "ff": { + "type": "Parameter", + "observedProperty": { + "label": { + "en": "ff" + } + }, + "unit": { + "label": { + "en": "m s-1" + } + } + }, + "rh": { + "type": "Parameter", + "observedProperty": { + "label": { + "en": "rh" + } + }, + "unit": { + "label": { + "en": "%" + } + } + } + } +} diff --git a/integration-test/response/collection/locations/200/locations_within_a_bbox.json b/integration-test/response/collection/locations/200/locations_within_a_bbox.json new file mode 100644 index 0000000..5759e01 --- /dev/null +++ b/integration-test/response/collection/locations/200/locations_within_a_bbox.json @@ -0,0 +1,27 @@ +{ + "type": "FeatureCollection", + "features": [ + { + "type": "Feature", + "geometry": { + "type": "Point", + "coordinates": [ + 5.1797058644882, + 52.098821802977 + ] + }, + "id": "06260" + }, + { + "type": "Feature", + "geometry": { + "type": "Point", + "coordinates": [ + 5.8723225499118, + 52.0548617826 + ] + }, + "id": "06275" + } + ] +} diff --git a/integration-test/response/collection/locations/200/single_location_with_multiple_parameters.json b/integration-test/response/collection/locations/200/single_location_with_multiple_parameters.json new file mode 100644 index 0000000..1666c0a --- /dev/null +++ b/integration-test/response/collection/locations/200/single_location_with_multiple_parameters.json @@ -0,0 +1,167 @@ +{ + "type": "Coverage", + "domain": { + "type": "Domain", + "domainType": "PointSeries", + "axes": { + "x": { + "values": [ + 5.1797058644882 + ] + }, + "y": { + "values": [ + 52.098821802977 + ] + }, + "t": { + "values": [ + "2022-12-31T00:00:00Z", + "2022-12-31T00:10:00Z", + "2022-12-31T00:20:00Z", + "2022-12-31T00:30:00Z", + "2022-12-31T00:40:00Z", + "2022-12-31T00:50:00Z", + "2022-12-31T01:00:00Z", + "2022-12-31T01:10:00Z" + ] + } + }, + "referencing": [ + { + "coordinates": [ + "y", + "x" + ], + "system": { + "type": "GeographicCRS", + "id": "http://www.opengis.net/def/crs/EPSG/0/4326" + } + }, + { + "coordinates": [ + "z" + ], + "system": { + "type": "TemporalRS", + "calendar": "Gregorian" + } + } + ] + }, + "parameters": { + "dd": { + "type": "Parameter", + "observedProperty": { + "label": { + "en": "dd" + } + }, + "unit": { + "label": { + "en": "degree" + } + } + }, + "ff": { + "type": "Parameter", + "observedProperty": { + "label": { + "en": "ff" + } + }, + "unit": { + "label": { + "en": "m s-1" + } + } + }, + "rh": { + "type": "Parameter", + "observedProperty": { + "label": { + "en": "rh" + } + }, + "unit": { + "label": { + "en": "%" + } + } + } + }, + "ranges": { + "dd": { + "type": "NdArray", + "dataType": "float", + "axisNames": [ + "t", + "y", + "x" + ], + "shape": [ + 8, + 1, + 1 + ], + "values": [ + 224.3, + 226, + 228.3, + 230.3, + 234.9, + 237.9, + 235.4, + 240 + ] + }, + "ff": { + "type": "NdArray", + "dataType": "float", + "axisNames": [ + "t", + "y", + "x" + ], + "shape": [ + 8, + 1, + 1 + ], + "values": [ + 4.95, + 4.43, + 4.35, + 3.77, + 4.2, + 4.26, + 4.72, + 4.1 + ] + }, + "rh": { + "type": "NdArray", + "dataType": "float", + "axisNames": [ + "t", + "y", + "x" + ], + "shape": [ + 8, + 1, + 1 + ], + "values": [ + 95, + 94, + 95, + 95, + 95, + 95, + 94, + 94 + ] + } + } +} diff --git a/integration-test/response/collection/locations/404/no_data_found.json b/integration-test/response/collection/locations/404/no_data_found.json new file mode 100644 index 0000000..732842a --- /dev/null +++ b/integration-test/response/collection/locations/404/no_data_found.json @@ -0,0 +1,3 @@ +{ + "detail": "No data found" +} diff --git a/integration-test/response/collection/position/200/single_coordinate_with_one_parameter.json b/integration-test/response/collection/position/200/single_coordinate_with_one_parameter.json new file mode 100644 index 0000000..f8b3846 --- /dev/null +++ b/integration-test/response/collection/position/200/single_coordinate_with_one_parameter.json @@ -0,0 +1,95 @@ +{ + "type": "Coverage", + "domain": { + "type": "Domain", + "domainType": "PointSeries", + "axes": { + "x": { + "values": [ + 5.1797058644882 + ] + }, + "y": { + "values": [ + 52.098821802977 + ] + }, + "t": { + "values": [ + "2022-12-31T00:50:00Z", + "2022-12-31T01:00:00Z", + "2022-12-31T01:10:00Z", + "2022-12-31T01:20:00Z", + "2022-12-31T01:30:00Z", + "2022-12-31T01:40:00Z", + "2022-12-31T01:50:00Z", + "2022-12-31T02:00:00Z", + "2022-12-31T02:10:00Z" + ] + } + }, + "referencing": [ + { + "coordinates": [ + "y", + "x" + ], + "system": { + "type": "GeographicCRS", + "id": "http://www.opengis.net/def/crs/EPSG/0/4326" + } + }, + { + "coordinates": [ + "z" + ], + "system": { + "type": "TemporalRS", + "calendar": "Gregorian" + } + } + ] + }, + "parameters": { + "tn": { + "type": "Parameter", + "observedProperty": { + "label": { + "en": "tn" + } + }, + "unit": { + "label": { + "en": "degrees Celsius" + } + } + } + }, + "ranges": { + "tn": { + "type": "NdArray", + "dataType": "float", + "axisNames": [ + "t", + "y", + "x" + ], + "shape": [ + 9, + 1, + 1 + ], + "values": [ + 12.1, + 12, + 12, + 11.9, + 11.9, + 11.8, + 11.8, + 11.7, + 11.7 + ] + } + } +} diff --git a/integration-test/response/metadata/200/single_collection.json b/integration-test/response/metadata/200/single_collection.json new file mode 100644 index 0000000..0043e4a --- /dev/null +++ b/integration-test/response/metadata/200/single_collection.json @@ -0,0 +1,67 @@ +{ + "id": "observations", + "links": [ + { + "href": "http://localhost:8008/collections/observations/observations", + "rel": "self" + } + ], + "extent": { + "spatial": { + "bbox": [ + [ + 3, + 50, + 8, + 55 + ] + ], + "crs": "WGS84" + } + }, + "data_queries": { + "position": { + "link": { + "href": "http://localhost:8008/collections/observations/observations/position", + "rel": "data", + "variables": { + "query_type": "position", + "output_format": [ + "CoverageJSON" + ] + } + } + }, + "area": { + "link": { + "href": "http://localhost:8008/collections/observations/observations/area", + "rel": "data", + "variables": { + "query_type": "area", + "output_format": [ + "CoverageJSON" + ] + } + } + }, + "locations": { + "link": { + "href": "http://localhost:8008/collections/observations/observations/locations", + "rel": "data", + "variables": { + "query_type": "locations", + "output_format": [ + "CoverageJSON" + ] + } + } + } + }, + "crs": [ + "WGS84" + ], + "output_formats": [ + "CoverageJSON" + ], + "parameter_names": {} +} diff --git a/integration-test/response/metadata/404/not_found.json b/integration-test/response/metadata/404/not_found.json new file mode 100644 index 0000000..634080e --- /dev/null +++ b/integration-test/response/metadata/404/not_found.json @@ -0,0 +1,3 @@ +{ + "detail": "Not Found" +} diff --git a/integration-test/test_api.py b/integration-test/test_api.py new file mode 100644 index 0000000..4b02e68 --- /dev/null +++ b/integration-test/test_api.py @@ -0,0 +1,144 @@ +import json +import logging +import os +from pathlib import Path + +import requests +from deepdiff import DeepDiff + +logging.basicConfig() +logger = logging.getLogger(__name__) +logger.setLevel(os.environ.get("LOG_LEVEL", logging.INFO)) + + +BASE_URL = os.environ.get("BASE_URL", "http://localhost:8008") + + +def actual_response_is_expected_response(actual_response, expected_path, **kwargs): + file_path = Path(Path(__file__).parent, expected_path).resolve() + with open(file_path) as file: + expected_json = json.load(file) + + diff = DeepDiff(expected_json, actual_response.json(), **kwargs) + assert diff == {} + + +def test_get_all_collections(): + actual_response = requests.get(url=BASE_URL + "/collections") + + assert actual_response.status_code == 200 + actual_response_is_expected_response( + actual_response, "response/capabilities/200/all_collections.json", exclude_regex_paths=r"\['href'\]$" + ) + + +def test_get_a_single_existing_collection(): + collection_id = "observations" + actual_response = requests.get(url=BASE_URL + f"/collections/{collection_id}") + + assert actual_response.status_code == 200 + actual_response_is_expected_response( + actual_response, "response/metadata/200/single_collection.json", exclude_regex_paths=r"\['href'\]$" + ) + + +def test_get_a_collection_which_does_not_exist(): + collection_id = "does-not-exist" + actual_response = requests.get(url=BASE_URL + f"/collections/{collection_id}") + + assert actual_response.status_code == 404 + actual_response_is_expected_response(actual_response, "response/metadata/404/not_found.json") + + +def test_from_a_single_collection_get_locations_within_a_bbox(): + collection_id = "observations" + bbox = "5.0,52.0,6.0,52.1" + actual_response = requests.get(url=BASE_URL + f"/collections/{collection_id}/locations?bbox={bbox}") + + assert actual_response.status_code == 200 + actual_response_is_expected_response( + actual_response, "response/collection/locations/200/locations_within_a_bbox.json" + ) + + +def test_from_a_single_collection_get_a_single_location(): + collection_id = "observations" + location_id = "06260" + parameters = "dd,ff,rh" + datetime = "../2022-12-31T01:10:00Z" + actual_response = requests.get( + url=BASE_URL + f"/collections/{collection_id}/locations/{location_id}" + f"?parameter-name={parameters}&datetime={datetime}" + ) + + assert actual_response.status_code == 200 + actual_response_is_expected_response( + actual_response, "response/collection/locations/200/single_location_with_multiple_parameters.json" + ) + + +def test_that_the_order_of_the_parameters_in_the_response_is_always_the_same(): + """Test that we do not care about the order of parameters passed in the query. + By comparing two requests with the same parameters but in a different sequence. + The first request returns the same response as the second request. + """ + collection_id = "observations" + location_id = "06260" + parameters = " dd, ff , rh" + first_response = requests.get( + url=BASE_URL + f"/collections/{collection_id}/locations/{location_id}" f"?parameter-name={parameters}" + ) + + parameters_2 = " rh, ff, dd " + second_response = requests.get( + url=BASE_URL + f"/collections/{collection_id}/locations/{location_id}" f"?parameter-name={parameters_2}" + ) + + assert first_response.status_code == 200 + assert second_response.status_code == 200 + diff = DeepDiff(first_response.json(), second_response.json()) + assert diff == {} + + +def test_from_a_single_collection_get_a_single_location_which_does_not_exist(): + collection_id = "observations" + location_id = "does-not-exist" + parameters = "does-not-exist" + actual_response = requests.get( + url=BASE_URL + f"/collections/{collection_id}/locations/{location_id}?parameter-name={parameters}" + ) + + assert actual_response.status_code == 404 + actual_response_is_expected_response(actual_response, "response/collection/locations/404/no_data_found.json") + + +def test_from_a_single_collection_get_a_single_position_with_one_parameter(): + collection_id = "observations" + coords = "POINT(5.179705 52.0988218)" + parameters = "tn" + datetime = "2022-12-31T00:50:00Z/2022-12-31T02:10:00Z" + actual_response = requests.get( + url=BASE_URL + f"/collections/{collection_id}/position" + f"?coords={coords}¶meter-name={parameters}&datetime={datetime}" + ) + + assert actual_response.status_code == 200 + actual_response_is_expected_response( + actual_response, "response/collection/position/200/single_coordinate_with_one_parameter.json" + ) + + +def test_from_a_single_collection_get_an_area_with_two_parameters(): + collection_id = "observations" + coords = "POLYGON((5.0 52.0, 6.0 52.0,6.0 52.1,5.0 52.1, 5.0 52.0))" + parameters = " rh, ff " + datetime = "2022-12-31T22:50:00Z/.." + actual_response = requests.get( + url=BASE_URL + f"/collections/{collection_id}/area" + f"?coords={coords}¶meter-name={parameters}&datetime={datetime}" + ) + + assert actual_response.status_code == 200 + actual_response_is_expected_response( + actual_response, "response/collection/area/200/data_within_an_area_with_two_parameters.json" + ) diff --git a/migrate/README.md b/migrate/README.md new file mode 100644 index 0000000..7e4353e --- /dev/null +++ b/migrate/README.md @@ -0,0 +1,28 @@ +# Migration Framework +To have reproducible environments, support rollbacks and that every change is only executed once, we use [Golang Migrate](https://github.com/golang-migrate/migrate/tree/master) as a migration framework. + +See the following URL for installation instructions and basic commands: +https://github.com/golang-migrate/migrate/tree/master/cmd/migrate + +See the following URL for the migration file format instructions: +https://github.com/golang-migrate/migrate/blob/master/MIGRATIONS.md + +## Practicalities +### Initialisation +The migration framework initialises the database. Therefore, no database tables exist before running the migrate step in the docker compose. + +### File name format +The migration file name format follows the suggestion in [MIGRATIONS.md](https://github.com/golang-migrate/migrate/blob/master/MIGRATIONS.md) to use a timestamp as version. + +``` +{version}_{title}.up.{extension} +{version}_{title}.down.{extension} +``` + +On Linux, you can retrieve the current timestamp by running: `date +%s`. + + +### Migration Path +The path `./migrate/data/migrations` is mounted on the migrate container. Thus, the docker container only executes the migrations in this path. + +The other path: `./migrate/data/not_supported_yet`, contains an example migration based on code comments about unfinished work from the initialise script. As the path is not mounted, the docker container does not execute migrations in that path. To try out the migrations move the files to `./migrate/data/migrations`. diff --git a/migrate/data/migrations/1701872471_initialise_schema.down.sql b/migrate/data/migrations/1701872471_initialise_schema.down.sql new file mode 100644 index 0000000..ba13745 --- /dev/null +++ b/migrate/data/migrations/1701872471_initialise_schema.down.sql @@ -0,0 +1,5 @@ +-- Commented out the statements below as you never want to undo the initialise. +-- DROP TABLE IF EXISTS observation; +-- DROP TABLE IF EXISTS geo_point; +-- DROP TABLE IF EXISTS time_series; +-- DROP EXTENSION IF EXISTS postgis; diff --git a/datastore/ts-init.sql b/migrate/data/migrations/1701872471_initialise_schema.up.sql similarity index 91% rename from datastore/ts-init.sql rename to migrate/data/migrations/1701872471_initialise_schema.up.sql index e4567c6..d6fdd15 100644 --- a/datastore/ts-init.sql +++ b/migrate/data/migrations/1701872471_initialise_schema.up.sql @@ -54,14 +54,6 @@ CREATE TABLE geo_point ( CREATE INDEX geo_point_idx ON geo_point USING GIST(point); --- not supported yet --- CREATE TABLE geo_polygon ( --- id BIGINT PRIMARY KEY GENERATED ALWAYS AS IDENTITY, --- polygon GEOGRAPHY(Polygon, 4326) NOT NULL --- ) --- --- CREATE INDEX geo_polygon_idx ON geo_polygon USING GIST(polygon); - CREATE TABLE observation ( ts_id BIGINT NOT NULL REFERENCES time_series(id) ON DELETE CASCADE, @@ -70,10 +62,9 @@ CREATE TABLE observation ( -- Refer to geometry via a foreign key to ensure that each distinct geometry is -- stored only once in the geo_* table, thus speeding up geo search. geo_point_id BIGINT NOT NULL REFERENCES geo_point(id) ON DELETE CASCADE, - -- geo_polygon_id integer NOT NULL REFERENCES geo_polygon(id) ON DELETE CASCADE, -- not supported yet -- --- BEGIN for now support only a single instant for obs time --------- - obstime_instant timestamptz, -- NOT NULL, but implied by being part of PK + obstime_instant timestamptz, -- NOT NULL, but implied by being part of PK; obs time variant 1: single instant -- --- END for now support only a single instant for obs time --------- -- --- BEGIN support both single instant and interval for obs time --------- diff --git a/migrate/data/not_supported_yet/1702281165_geo_polygon.down.sql b/migrate/data/not_supported_yet/1702281165_geo_polygon.down.sql new file mode 100644 index 0000000..2302967 --- /dev/null +++ b/migrate/data/not_supported_yet/1702281165_geo_polygon.down.sql @@ -0,0 +1,7 @@ +ALTER TABLE observation + DROP COLUMN IF EXISTS geo_polygon_id, + DROP COLUMN IF EXISTS obstime_start, -- obs time variant 2: interval + DROP COLUMN IF EXISTS obstime_end, + DROP CONSTRAINT IF EXISTS observation_chk_one_obs_time; + +DROP TABLE IF EXISTS geo_polygon; diff --git a/migrate/data/not_supported_yet/1702281165_geo_polygon.up.sql b/migrate/data/not_supported_yet/1702281165_geo_polygon.up.sql new file mode 100644 index 0000000..478cc49 --- /dev/null +++ b/migrate/data/not_supported_yet/1702281165_geo_polygon.up.sql @@ -0,0 +1,22 @@ +-- not supported yet +CREATE TABLE IF NOT EXISTS geo_polygon ( + id BIGINT PRIMARY KEY GENERATED ALWAYS AS IDENTITY, + polygon GEOGRAPHY(Polygon, 4326) NOT NULL +); + +CREATE INDEX geo_polygon_idx ON geo_polygon USING GIST(polygon); + +------- BEGIN support both single instant and interval for obs time --------- +-- TODO: Fix geo_polygon_id. How to fill the existing rows, otherwise column cannot be added +-- ALTER TABLE observation +-- ADD geo_polygon_id integer NOT NULL REFERENCES geo_polygon(id) ON DELETE CASCADE; -- not supported yet + +ALTER TABLE observation + ADD obstime_start timestamptz, -- obs time variant 2: interval + ADD obstime_end timestamptz, + ADD CONSTRAINT observation_chk_one_obs_time + CHECK ( -- ensure exactly one of [1] obstime_instant and [2] obstime_start/-end is defined + ((obstime_instant IS NOT NULL) AND (obstime_start IS NULL) AND (obstime_end IS NULL)) OR + ((obstime_instant IS NULL) AND (obstime_start IS NOT NULL) AND (obstime_end IS NOT NULL)) + ); +------- END support both single instant and interval for obs time ---------