From 1353e82528e8d6d266f17f4cce013a6d8c65f7bd Mon Sep 17 00:00:00 2001 From: Bo Xu Date: Fri, 9 Jun 2023 21:29:07 +0000 Subject: [PATCH] Improve BQ query efficiency for stat var observation related SPARQL (#1160) --- internal/translator/translate.go | 23 ++++++++++++++++++++--- internal/translator/translate_test.go | 24 ++++++++++++++++++++++-- 2 files changed, 42 insertions(+), 5 deletions(-) diff --git a/internal/translator/translate.go b/internal/translator/translate.go index e7fc060f7..e6d856ae2 100644 --- a/internal/translator/translate.go +++ b/internal/translator/translate.go @@ -28,6 +28,8 @@ import ( "google.golang.org/grpc/status" ) +const svProp = "variable_measured" + // Binding contains a query and mapping object which bind together. type Binding struct { Query *types.Query @@ -812,8 +814,17 @@ func getSQL( // Sort to get deterministic result. sort.SliceStable(whereConstraints, func(i, j int) bool { - return strings.Compare( - whereConstraints[i].LHS.String(), whereConstraints[j].LHS.String()) < 0 + l := whereConstraints[i].LHS + r := whereConstraints[j].LHS + // Put "variable_measured" constraints at the beginning to better use + // StatVarObservation's key. + if l.Name == svProp { + return true + } + if r.Name == svProp { + return false + } + return strings.Compare(l.String(), r.String()) < 0 }) for idx, c := range whereConstraints { if idx == 0 { @@ -904,7 +915,13 @@ func Translate( queryOptions = &types.QueryOptions{} } - sql, prov, err := getSQL(nodes, constraints, constNode, ProvInfo{queryProv, tableProv}, queryOptions) + sql, prov, err := getSQL( + nodes, + constraints, + constNode, + ProvInfo{queryProv, tableProv}, + queryOptions, + ) if err != nil { return nil, err } diff --git a/internal/translator/translate_test.go b/internal/translator/translate_test.go index 29f688fa3..513da7e9c 100644 --- a/internal/translator/translate_test.go +++ b/internal/translator/translate_test.go @@ -1234,8 +1234,28 @@ func TestStatVarObs(t *testing.T) { "FROM `dc_v3.Place` AS _dc_v3_Place_1\n" + "JOIN `dc_v3.StatVarObservation` AS _dc_v3_StatVarObservation_0\n" + "ON _dc_v3_Place_1.id = _dc_v3_StatVarObservation_0.observation_about\n" + - "WHERE _dc_v3_Place_1.type = \"Country\"\n" + - "AND _dc_v3_StatVarObservation_0.variable_measured = \"Amount_EconomicActivity_GrossNationalIncome_PurchasingPowerParity_PerCapita\"\n", + "WHERE _dc_v3_StatVarObservation_0.variable_measured = \"Amount_EconomicActivity_GrossNationalIncome_PurchasingPowerParity_PerCapita\"\n" + + "AND _dc_v3_Place_1.type = \"Country\"\n", + }, + { + "browser-observation", + ` + SELECT ?dcid ?mmethod ?obsPeriod ?obsDate + WHERE { + ?svObservation typeOf StatVarObservation . + ?svObservation variableMeasured Count_Person . + ?svObservation observationAbout country/USA . + ?svObservation dcid ?dcid . + ?svObservation measurementMethod ?mmethod . + ?svObservation observationPeriod ?obsPeriod . + } + `, + "SELECT _dc_v3_StatVarObservation_0.id AS dcid,\n" + + "_dc_v3_StatVarObservation_0.measurement_method AS mmethod,\n" + + "_dc_v3_StatVarObservation_0.observation_period AS obsPeriod,\n\n" + + "FROM `dc_v3.StatVarObservation` AS _dc_v3_StatVarObservation_0\n" + + "WHERE _dc_v3_StatVarObservation_0.variable_measured = \"Count_Person\"\n" + + "AND _dc_v3_StatVarObservation_0.observation_about = \"country/USA\"\n", }, } { nodes, queries, _, err := sparql.ParseQuery(c.queryStr)