Skip to content

Commit

Permalink
Add API to get stat for all child places of certain type given parent…
Browse files Browse the repository at this point in the history
… place (#358)

* Add ObsCollection

* More change

* update proto

* remove obsolete golden file

* clean up
  • Loading branch information
shifucun authored Nov 6, 2020
1 parent bb68a5a commit d47abc1
Show file tree
Hide file tree
Showing 12 changed files with 644 additions and 153 deletions.
104 changes: 104 additions & 0 deletions e2etest/get_stat_collection_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package e2etest

import (
"context"
"io/ioutil"
"path"
"runtime"
"testing"

pb "github.com/datacommonsorg/mixer/proto"
"github.com/datacommonsorg/mixer/server"
"github.com/google/go-cmp/cmp"
"google.golang.org/protobuf/encoding/protojson"
"google.golang.org/protobuf/testing/protocmp"
)

func TestGetStatCollection(t *testing.T) {
ctx := context.Background()

memcacheData, err := loadMemcache()
if err != nil {
t.Fatalf("Failed to load memcache %v", err)
}

client, err := setup(server.NewMemcache(memcacheData))
if err != nil {
t.Fatalf("Failed to set up mixer and client")
}
_, filename, _, _ := runtime.Caller(0)
goldenPath := path.Join(
path.Dir(filename), "../golden_response/staging/get_stat_collection")

for _, c := range []struct {
parentPlace string
childType string
date string
statVar []string
goldenFile string
}{
{
"geoId/06",
"County",
"2016",
[]string{"Count_Person", "Median_Age_Person"},
"CA_County_2016.json",
},
{
"country/USA",
"County",
"2016",
[]string{"Count_Person"},
"USA_County_2016.json",
},
{
"country/USA",
"City",
"2016",
[]string{"Count_Person"},
"USA_City_2016.json",
},
} {
resp, err := client.GetStatCollection(ctx, &pb.GetStatCollectionRequest{
ParentPlace: c.parentPlace,
ChildType: c.childType,
StatVars: c.statVar,
Date: c.date,
})
if err != nil {
t.Errorf("could not GetStatCollections: %s", err)
continue
}
goldenFile := path.Join(goldenPath, c.goldenFile)
if generateGolden {
updateGolden(resp, goldenFile)
continue
}
var expected pb.GetStatCollectionResponse
file, _ := ioutil.ReadFile(goldenFile)
err = protojson.Unmarshal(file, &expected)
if err != nil {
t.Errorf("Can not Unmarshal golden file")
continue
}

if diff := cmp.Diff(resp, &expected, protocmp.Transform()); diff != "" {
t.Errorf("payload got diff: %v", diff)
continue
}
}
}
135 changes: 135 additions & 0 deletions golden_response/staging/get_stat_collection/CA_County_2016.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
{
"data": {
"Count_Person": {
"val": {
"geoId/06001": 1650950,
"geoId/06003": 1057,
"geoId/06005": 37438,
"geoId/06007": 226231,
"geoId/06009": 45303,
"geoId/06011": 21553,
"geoId/06013": 1137268,
"geoId/06015": 27382,
"geoId/06017": 185976,
"geoId/06019": 976830,
"geoId/06021": 27851,
"geoId/06023": 136290,
"geoId/06025": 180179,
"geoId/06027": 17996,
"geoId/06029": 880856,
"geoId/06031": 149382,
"geoId/06033": 63903,
"geoId/06035": 30721,
"geoId/06037": 10105708,
"geoId/06039": 153956,
"geoId/06041": 260562,
"geoId/06043": 17455,
"geoId/06045": 87285,
"geoId/06047": 267628,
"geoId/06049": 8935,
"geoId/06051": 14202,
"geoId/06053": 433953,
"geoId/06055": 140840,
"geoId/06057": 98913,
"geoId/06059": 3164986,
"geoId/06061": 378943,
"geoId/06063": 18772,
"geoId/06065": 2380081,
"geoId/06067": 1510987,
"geoId/06069": 59225,
"geoId/06071": 2131960,
"geoId/06073": 3306089,
"geoId/06077": 732809,
"geoId/06079": 281803,
"geoId/06083": 444340,
"geoId/06085": 1928368,
"geoId/06087": 274396,
"geoId/06089": 178571,
"geoId/06091": 2957,
"geoId/06093": 43479,
"geoId/06095": 438858,
"geoId/06097": 502547,
"geoId/06099": 539255,
"geoId/06101": 95742,
"geoId/06103": 63453,
"geoId/06105": 12830,
"geoId/06107": 459235,
"geoId/06109": 53770,
"geoId/06111": 847323,
"geoId/06113": 215627,
"geoId/06115": 74952
},
"measurement_method": "CensusPEPSurvey",
"import_name": "CensusPEP",
"provenance_domain": "census.gov",
"provenance_url": "https://www.census.gov/programs-surveys/popest.html"
},
"Median_Age_Person": {
"val": {
"geoId/06001": 37.2,
"geoId/06003": 42.8,
"geoId/06005": 50.3,
"geoId/06007": 36.9,
"geoId/06009": 51.2,
"geoId/06011": 34.7,
"geoId/06013": 39.1,
"geoId/06015": 38.1,
"geoId/06017": 45.2,
"geoId/06019": 31.6,
"geoId/06021": 36.9,
"geoId/06023": 37.6,
"geoId/06025": 32.2,
"geoId/06027": 45.4,
"geoId/06029": 31.2,
"geoId/06031": 31.4,
"geoId/06033": 45.8,
"geoId/06035": 36.4,
"geoId/06037": 35.8,
"geoId/06039": 33.5,
"geoId/06041": 45.7,
"geoId/06043": 50.6,
"geoId/06045": 42.3,
"geoId/06047": 30.6,
"geoId/06049": 47.1,
"geoId/06051": 38.9,
"geoId/06053": 33.7,
"geoId/06055": 40.7,
"geoId/06057": 49.5,
"geoId/06059": 37.3,
"geoId/06061": 41.5,
"geoId/06063": 51.7,
"geoId/06065": 34.8,
"geoId/06067": 35.7,
"geoId/06069": 35.1,
"geoId/06071": 32.7,
"geoId/06073": 35.3,
"geoId/06075": 38.4,
"geoId/06077": 33.7,
"geoId/06079": 39,
"geoId/06081": 39.5,
"geoId/06083": 33.7,
"geoId/06085": 36.8,
"geoId/06087": 37,
"geoId/06089": 42.3,
"geoId/06091": 55.2,
"geoId/06093": 47.9,
"geoId/06095": 37.5,
"geoId/06097": 41.2,
"geoId/06099": 33.8,
"geoId/06101": 35.6,
"geoId/06103": 40.6,
"geoId/06105": 50.6,
"geoId/06107": 30.4,
"geoId/06109": 48.2,
"geoId/06111": 37.3,
"geoId/06113": 30.9,
"geoId/06115": 32.2
},
"measurement_method": "CensusACS5yrSurvey",
"import_name": "CensusACS5YearSurvey",
"provenance_domain": "census.gov",
"unit": "Year",
"provenance_url": "https://www.census.gov/"
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"data": {
"Count_Person": {}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"data": {
"Count_Person": {}
}
}
33 changes: 33 additions & 0 deletions proto/mixer.proto
Original file line number Diff line number Diff line change
Expand Up @@ -205,10 +205,18 @@ message ObsTimeSeries {
string provenance_url = 8;
}

// A collection of observation values for various places with the same parent
// and place type, for given measured property, observation period, stat type,
// observation date, pop type, and an optional list of pop PVs.
message ObsCollection {
repeated SourceSeries source_cohorts = 2;
}

// Wrapper for observation data.
message ChartStore {
oneof val {
ObsTimeSeries obs_time_series = 1;
ObsCollection obs_collection = 2;
}
}

Expand Down Expand Up @@ -723,6 +731,22 @@ message GetStatAllResponse {
map<string, PlaceStat> place_data = 1;
}

message GetStatCollectionRequest {
// Parent place dcid.
string parent_place = 1;
// Child place type.
string child_type = 2;
// Date for the stat in ISO format.
string date = 3;
// Dcid of the stat var.
repeated string stat_vars = 4;
}

// Response for GetStatCollection service.
message GetStatCollectionResponse {
map<string, SourceSeries> data = 1;
}

service Mixer {
// Query DataCommons Graph with Sparql.
rpc Query(QueryRequest) returns (QueryResponse) {
Expand Down Expand Up @@ -887,6 +911,15 @@ service Mixer {
};
}

// Get the stat value for children places of certain place type at a given
// date.
rpc GetStatCollection(GetStatCollectionRequest) returns (GetStatCollectionResponse) {
option (google.api.http) = {
get: "/stat/collection"
body: "*"
};
}

// Get rankings for given stat var DCIDs.
rpc GetLocationsRankings(GetLocationsRankingsRequest)
returns (GetLocationsRankingsResponse) {
Expand Down
2 changes: 1 addition & 1 deletion server/bigtable.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ func bigTableReadRowsParallel(

result := map[string]interface{}{}
for elem := range elemChan {
result[elem.dcid] = elem.data
result[elem.token] = elem.data
}
return result, nil
}
38 changes: 35 additions & 3 deletions server/key.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,17 +65,49 @@ func buildStatsKey(
bigtable.RowList, map[string]*placeStatVar) {
rowList := bigtable.RowList{}
keyToToken := map[string]*placeStatVar{}
for statVarDcid, statVarObject := range statVars {
keySuffix := buildStatsKeySuffix(statVarObject)
for sv, svObj := range statVars {
keySuffix := buildStatsKeySuffix(svObj)
for _, place := range places {
rowKey := fmt.Sprintf("%s%s^%s", util.BtChartDataPrefix, place, keySuffix)
rowList = append(rowList, rowKey)
keyToToken[rowKey] = &placeStatVar{place, statVarDcid}
keyToToken[rowKey] = &placeStatVar{place, sv}
}
}
return rowList, keyToToken
}

func buildStatCollectionKey(
parentPlace, childType, date string,
statVars map[string]*StatisticalVariable) (
bigtable.RowList, map[string]string) {

rowList := bigtable.RowList{}
keyToToken := map[string]string{}
for sv, svObj := range statVars {
rowKey := strings.Join([]string{
util.BtChartDataPrefix + parentPlace,
childType,
svObj.MeasuredProp,
svObj.StatType,
svObj.MeasurementDenominator,
svObj.MeasurementQualifier,
date,
svObj.PopType,
}, "^")
cprops := []string{}
for cprop := range svObj.PVs {
cprops = append(cprops, cprop)
}
sort.Strings(cprops)
for _, cprop := range cprops {
rowKey += fmt.Sprintf("^%s^%s", cprop, svObj.PVs[cprop])
}
rowList = append(rowList, rowKey)
keyToToken[rowKey] = sv
}
return rowList, keyToToken
}

func buildPropertyValuesKey(
dcids []string, prop string, arcOut bool) bigtable.RowList {
rowList := bigtable.RowList{}
Expand Down
2 changes: 1 addition & 1 deletion server/memcache.go
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ func (m *Memcache) ReadParallel(
close(elemChan)
result := map[string]interface{}{}
for elem := range elemChan {
result[elem.dcid] = elem.data
result[elem.token] = elem.data
}
return result
}
6 changes: 4 additions & 2 deletions server/model.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,10 @@ type PropLabelCache struct {
}

type chanData struct {
dcid string
data interface{}
// token is the identifier of the data when reading multiple BigTable rows
// concurrently. It could be place dcid, statvar dcid or other "key".
token string
data interface{}
}

// RelatedPlacesInfo represents the json structure returned by the RelatedPlaces cache.
Expand Down
Loading

0 comments on commit d47abc1

Please sign in to comment.