From 4a7bece8f226d1e84fe53d671f63b2bff588ecf4 Mon Sep 17 00:00:00 2001 From: Jo Asplin Date: Sat, 17 Feb 2024 21:21:52 +0100 Subject: [PATCH] Removed latest_limit and latest_maxage options We currently don't see any important use case for these. Specifying temporal_mode=latest now returns only the single latest observation in otherwise matching time series regardless of obs time (as long as it's within the overall valid time range, typically [now-24H, now]). --- datastore/datastore/README.md | 10 +++- datastore/datastore/common/common.go | 6 +- datastore/datastore/dsimpl/getobservations.go | 36 ------------ .../postgresql/getobservations.go | 55 +++++++------------ protobuf/datastore.proto | 12 +--- 5 files changed, 33 insertions(+), 86 deletions(-) diff --git a/datastore/datastore/README.md b/datastore/datastore/README.md index e8758583..5461a259 100644 --- a/datastore/datastore/README.md +++ b/datastore/datastore/README.md @@ -241,9 +241,15 @@ $ grpcurl -d '{"filter": {"standard_name": {"values": ["wind_speed", "air_temper ... ``` -### Retrieve the two most recent wind speed observations for platform 78990 +### Retrieve all wind speed observations for platform 78990 ```text -$ grpcurl -d '{"filter": {"standard_name": {"values": ["wind_speed"]}, "platform": {"values": ["78990"]}}, "temporal_mode": "latest", "latest_limit": "2"}' -plaintext -proto protobuf/datastore.proto 127.0.0.1:50050 datastore.Datastore.GetObservations +$ grpcurl -d '{"filter": {"standard_name": {"values": ["wind_speed"]}, "platform": {"values": ["78990"]}}}' -plaintext -proto protobuf/datastore.proto 127.0.0.1:50050 datastore.Datastore.GetObservations +... +``` + +### Retrieve the most recent wind speed observation for platform 78990 +```text +$ grpcurl -d '{"filter": {"standard_name": {"values": ["wind_speed"]}, "platform": {"values": ["78990"]}}, "temporal_mode": "latest"}' -plaintext -proto protobuf/datastore.proto 127.0.0.1:50050 datastore.Datastore.GetObservations ... ``` diff --git a/datastore/datastore/common/common.go b/datastore/datastore/common/common.go index c6f106a6..52043a02 100644 --- a/datastore/datastore/common/common.go +++ b/datastore/datastore/common/common.go @@ -170,10 +170,6 @@ func ToSnakeCase(s string) string { } type TemporalSpec struct { - IntervalMode bool // whether temporal mode is 'interval' (true) or 'latest' - // (false) + IntervalMode bool // whether temporal mode is 'interval' (true) or 'latest' (false) Interval *datastore.TimeInterval // interval in 'interval' mode - LatestLimit int // max number of observations retrievable in 'latest' mode - LatestMaxage time.Duration // interval, defined as [now - LatestMaxage, now], within - // which observations may be retrieved in 'latest' mode } diff --git a/datastore/datastore/dsimpl/getobservations.go b/datastore/datastore/dsimpl/getobservations.go index 42d69fd3..a1cac5b8 100644 --- a/datastore/datastore/dsimpl/getobservations.go +++ b/datastore/datastore/dsimpl/getobservations.go @@ -3,9 +3,7 @@ package dsimpl import ( "context" "fmt" - "strconv" "strings" - "time" "datastore/common" "datastore/datastore" @@ -32,7 +30,6 @@ func getTemporalSpec(request *datastore.GetObsRequest) (common.TemporalSpec, err } if tspec.IntervalMode { // validate and initialize for 'interval' mode - ti := request.GetTemporalInterval() // do general validation of interval @@ -45,39 +42,6 @@ func getTemporalSpec(request *datastore.GetObsRequest) (common.TemporalSpec, err } tspec.Interval = ti // valid (but possibly nil to specify a fully open interval!) - - } else { // validate and initialize for 'latest' mode - - // latest_limit ... - if limit0 := request.GetLatestLimit(); limit0 != "" { - limit, err := strconv.Atoi(limit0) - if err != nil { - return common.TemporalSpec{}, - fmt.Errorf("failed to convert latest_limit to an int: %v", limit0) - } - - if limit < 0 { - return common.TemporalSpec{}, - fmt.Errorf("latest_limit cannot be negative: %d", limit) - } - - tspec.LatestLimit = limit // valid - - } else { - // by default return the single latest obs for a time series - tspec.LatestLimit = 1 // default - } - - // latest_maxage ... - if maxage0 := request.GetLatestMaxage(); maxage0 != "" { - // ### TODO! (convert maxage0 (an ISO-8601 period) into a time.Duration and assign to - // tspec.LatestMaxage) - return common.TemporalSpec{}, fmt.Errorf("latest_maxage unimplemented") - } else { - // by default disable filtering on maxage, i.e. don't consider any observation as too - // old - tspec.LatestMaxage = time.Duration(-1) - } } return tspec, nil diff --git a/datastore/datastore/storagebackend/postgresql/getobservations.go b/datastore/datastore/storagebackend/postgresql/getobservations.go index 233a1cdb..5835f89c 100644 --- a/datastore/datastore/storagebackend/postgresql/getobservations.go +++ b/datastore/datastore/storagebackend/postgresql/getobservations.go @@ -315,7 +315,8 @@ func getStringMdataFilter( // // Values to be used for query placeholders are appended to phVals. // -// Upon success the function returns four values: +// Upon success the function returns five values: +// - distinct spec, possibly just '' // - time filter used in a 'WHERE ... AND ...' clause (possibly just 'TRUE') // - geo filter ... ditto // - string metadata ... ditto @@ -323,21 +324,28 @@ func getStringMdataFilter( // otherwise (..., ..., ..., error). func createObsQueryVals( request *datastore.GetObsRequest, tspec common.TemporalSpec, phVals *[]interface{}) ( - string, string, string, error) { + string, string, string, string, error) { + + distinctSpec := "" + if !tspec.IntervalMode { + // 'latest' mode, so ensure that we select only one observation per time series + // (which will be the most recent one thanks to '... ORDER BY ts_id, obstime_instant DESC') + distinctSpec = "DISTINCT ON (ts_id)" + } timeFilter := getTimeFilter(tspec) geoFilter, err := getGeoFilter(request.GetSpatialArea(), phVals) if err != nil { - return "", "", "", fmt.Errorf("getGeoFilter() failed: %v", err) + return "", "", "", "", fmt.Errorf("getGeoFilter() failed: %v", err) } stringMdataFilter, err := getStringMdataFilter(request, phVals) if err != nil { - return "", "", "", fmt.Errorf("getStringMdataFilter() failed: %v", err) + return "", "", "", "", fmt.Errorf("getStringMdataFilter() failed: %v", err) } - return timeFilter, geoFilter, stringMdataFilter, nil + return distinctSpec, timeFilter, geoFilter, stringMdataFilter, nil } // scanObsRow scans all columns from the current result row in rows and converts to an ObsMetadata @@ -396,7 +404,7 @@ func scanObsRow(rows *sql.Rows) (*datastore.ObsMetadata, int64, error) { return &obsMdata, tsID, nil } -// getObs gets into obs all observations that match request. +// getObs gets into obs all observations that match request and tspec. // Returns nil upon success, otherwise error. func getObs( db *sql.DB, request *datastore.GetObsRequest, tspec common.TemporalSpec, @@ -404,14 +412,15 @@ func getObs( // get values needed for query phVals := []interface{}{} // placeholder values - timeFilter, geoFilter, stringMdataFilter, err := createObsQueryVals(request, tspec, &phVals) + distinctSpec, timeFilter, geoFilter, stringMdataFilter, err := createObsQueryVals( + request, tspec, &phVals) if err != nil { return fmt.Errorf("createQueryVals() failed: %v", err) } // define and execute query query := fmt.Sprintf(` - SELECT + SELECT %s ts_id, obstime_instant, pubtime, @@ -423,7 +432,8 @@ func getObs( JOIN geo_point ON observation.geo_point_id = geo_point.id WHERE %s AND %s AND %s ORDER BY ts_id, obstime_instant DESC - `, strings.Join(obsStringMdataCols, ","), timeFilter, geoFilter, stringMdataFilter) + `, distinctSpec, strings.Join(obsStringMdataCols, ","), timeFilter, geoFilter, + stringMdataFilter) rows, err := db.Query(query, phVals...) if err != nil { @@ -433,17 +443,6 @@ func getObs( obsMdatas := make(map[int64][]*datastore.ObsMetadata) // observations per time series ID - // set oldest allowable obs time when in 'latest' mode - oldestTime := time.Time{} - if !tspec.IntervalMode { - loTime, hiTime := common.GetValidTimeRange() - if tspec.LatestMaxage < 0 { // i.e. filtering on maxage is disabled - oldestTime = loTime - } else { - oldestTime = hiTime.Add(-tspec.LatestMaxage) - } - } - // scan rows for rows.Next() { obsMdata, tsID, err := scanObsRow(rows) @@ -451,20 +450,8 @@ func getObs( return fmt.Errorf("scanObsRow() failed: %v", err) } - includeObs := true // by default include obs in this time series (in particular in - // 'interval' mode, since filtering for that has been done already) - if !tspec.IntervalMode { // 'latest' mode - switch { - case len(obsMdatas[tsID]) >= tspec.LatestLimit: - includeObs = false // reject, since not room for this obs - case obsMdata.GetObstimeInstant().AsTime().Before(oldestTime): - includeObs = false // reject, since obs too old - } - } - - if includeObs { // prepend obs to time series to get chronological order - obsMdatas[tsID] = append([]*datastore.ObsMetadata{obsMdata}, obsMdatas[tsID]...) - } + // prepend obs to time series to get chronological order + obsMdatas[tsID] = append([]*datastore.ObsMetadata{obsMdata}, obsMdatas[tsID]...) } // get time series diff --git a/protobuf/datastore.proto b/protobuf/datastore.proto index 82d07be5..a78771e3 100644 --- a/protobuf/datastore.proto +++ b/protobuf/datastore.proto @@ -163,19 +163,13 @@ message GetObsRequest { // --- BEGIN non-string metadata ------------------------- // temporal mode - string temporal_mode = 4; // use 'interval' (the default) or 'latest' + string temporal_mode = 4; // use 'interval' (the default) to retrieve observations in + // an interval (defined by temporal) or 'latest' to retrieve the most recent observation + // of otherwise matching time series // temporal spec applicable when temporal_mode = 'interval' TimeInterval temporal_interval = 1; // only observations in this time range may be returned - // temporal spec applicable when temporal_mode = 'latest' - string latest_limit = 5; // only this many of the latest observations may be returned per time - // series; default: 1 - string latest_maxage = 6; // ISO 8601 period (like 'PT1H') that limits the time window (ending - // at the current time) from which the latest observations may be retrieved (use case: specify - // PT1H if you're not interested in observations older than one hour!); default: entire valid - // time range of the data store, typically PT24H - // spatial filter Polygon spatial_area = 2; // if specified, only observations in this area may be returned