Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Dashboard] Support arbitrary non-aggregate expressions in dimensions #3702

Merged
merged 15 commits into from
Dec 22, 2023
Merged
14 changes: 12 additions & 2 deletions proto/gen/rill/runtime/v1/resources.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions proto/gen/rill/runtime/v1/resources.pb.validate.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions proto/gen/rill/runtime/v1/runtime.swagger.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2424,6 +2424,8 @@ definitions:
type: string
column:
type: string
expression:
type: string
label:
type: string
description:
Expand Down
1 change: 1 addition & 0 deletions proto/rill/runtime/v1/resources.proto
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ message MetricsViewSpec {
message DimensionV2 {
string name = 1;
string column = 2;
string expression = 6;
string label = 3;
string description = 4;
bool unnest = 5;
Expand Down
14 changes: 3 additions & 11 deletions runtime/compilers/rillv1/parse_metrics_view.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ type MetricsViewYAML struct {
Name string
Label string
Column string
Expression string
Comment on lines 36 to +37
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like we don't have validation to ensure that only one of these are set (exactly one of them should be non-nil)

Property string // For backwards compatibility
Description string
Ignore bool `yaml:"ignore"`
Expand Down Expand Up @@ -213,7 +214,7 @@ func (p *Parser) parseMetricsView(ctx context.Context, node *Node) error {
}

names := make(map[string]bool)
columns := make(map[string]bool)

for i, dim := range tmp.Dimensions {
if dim == nil || dim.Ignore {
continue
Expand All @@ -238,12 +239,6 @@ func (p *Parser) parseMetricsView(ctx context.Context, node *Node) error {
return fmt.Errorf("found duplicate dimension or measure name %q", dim.Name)
}
names[lower] = true

lower = strings.ToLower(dim.Column)
if ok := columns[lower]; ok {
return fmt.Errorf("found duplicate dimension column name %q", dim.Column)
}
columns[lower] = true
begelundmuller marked this conversation as resolved.
Show resolved Hide resolved
}

measureCount := 0
Expand All @@ -265,10 +260,6 @@ func (p *Parser) parseMetricsView(ctx context.Context, node *Node) error {
}
names[lower] = true

if ok := columns[lower]; ok {
return fmt.Errorf("measure name %q coincides with a dimension column name", measure.Name)
}

if measure.FormatPreset != "" && measure.FormatD3 != "" {
return fmt.Errorf(`cannot set both "format_preset" and "format_d3" for a measure`)
}
Expand Down Expand Up @@ -424,6 +415,7 @@ func (p *Parser) parseMetricsView(ctx context.Context, node *Node) error {
spec.Dimensions = append(spec.Dimensions, &runtimev1.MetricsViewSpec_DimensionV2{
Name: dim.Name,
Column: dim.Column,
Expression: dim.Expression,
Label: dim.Label,
Description: dim.Description,
Unnest: dim.Unnest,
Expand Down
9 changes: 2 additions & 7 deletions runtime/queries/column_timeseries.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,13 +105,8 @@ func (q *ColumnTimeseries) Resolve(ctx context.Context, rt *runtime.Runtime, ins
}

return olap.WithConnection(ctx, priority, false, false, func(ctx context.Context, ensuredCtx context.Context, _ *sql.Conn) error {
filter, args, err := buildFilterClauseForMetricsViewFilter(q.MetricsView, q.MetricsViewFilter, olap.Dialect(), q.MetricsViewPolicy)
if err != nil {
return err
}
if filter != "" {
filter = "WHERE 1=1 " + filter
}
filter := ""
var args []any

measures := normaliseMeasures(q.Measures, q.Pixels != 0)
dateTruncSpecifier := convertToDateTruncSpecifier(timeRange.Interval)
Expand Down
200 changes: 24 additions & 176 deletions runtime/queries/metricsview.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,165 +139,11 @@ func structTypeToMetricsViewColumn(v *runtimev1.StructType) []*runtimev1.Metrics
return res
}

// buildFilterClauseForMetricsViewFilter builds a SQL string of conditions joined with AND.
// Unless the result is empty, it is prefixed with "AND".
// I.e. it has the format "AND (...) AND (...) ...".
func buildFilterClauseForMetricsViewFilter(mv *runtimev1.MetricsViewSpec, filter *runtimev1.MetricsViewFilter, dialect drivers.Dialect, policy *runtime.ResolvedMetricsViewSecurity) (string, []any, error) {
var clauses []string
var args []any

if filter != nil && filter.Include != nil {
clause, clauseArgs, err := buildFilterClauseForConditions(mv, filter.Include, false, dialect)
if err != nil {
return "", nil, err
}
clauses = append(clauses, clause)
args = append(args, clauseArgs...)
}

if filter != nil && filter.Exclude != nil {
clause, clauseArgs, err := buildFilterClauseForConditions(mv, filter.Exclude, true, dialect)
if err != nil {
return "", nil, err
}
clauses = append(clauses, clause)
args = append(args, clauseArgs...)
}

if policy != nil && policy.RowFilter != "" {
clauses = append(clauses, "AND "+policy.RowFilter)
}

return strings.Join(clauses, " "), args, nil
}

// buildFilterClauseForConditions returns a string with the format "AND (...) AND (...) ..."
func buildFilterClauseForConditions(mv *runtimev1.MetricsViewSpec, conds []*runtimev1.MetricsViewFilter_Cond, exclude bool, dialect drivers.Dialect) (string, []any, error) {
var clauses []string
var args []any

for _, cond := range conds {
condClause, condArgs, err := buildFilterClauseForCondition(mv, cond, exclude, dialect)
if err != nil {
return "", nil, err
}
if condClause == "" {
continue
}
clauses = append(clauses, condClause)
args = append(args, condArgs...)
}

return strings.Join(clauses, " "), args, nil
}

// buildFilterClauseForCondition returns a string with the format "AND (...)"
func buildFilterClauseForCondition(mv *runtimev1.MetricsViewSpec, cond *runtimev1.MetricsViewFilter_Cond, exclude bool, dialect drivers.Dialect) (string, []any, error) {
var clauses []string
var args []any

// NOTE: Looking up for dimension like this will lead to O(nm).
// Ideal way would be to create a map, but we need to find a clean solution down the line
dim, err := metricsViewDimension(mv, cond.Name)
if err != nil {
return "", nil, err
}
name := safeName(metricsViewDimensionColumn(dim))

notKeyword := ""
if exclude {
notKeyword = "NOT"
}

// Tracks if we found NULL(s) in cond.In
inHasNull := false

// Build "dim [NOT] IN (?, ?, ...)" clause
if len(cond.In) > 0 {
// Add to args, skipping nulls
for _, val := range cond.In {
if _, ok := val.Kind.(*structpb.Value_NullValue); ok {
inHasNull = true
continue // Handled later using "dim IS [NOT] NULL" clause
}
arg, err := pbutil.FromValue(val)
if err != nil {
return "", nil, fmt.Errorf("filter error: %w", err)
}
args = append(args, arg)
}

// If there were non-null args, add a "dim [NOT] IN (...)" clause
if len(args) > 0 {
questionMarks := strings.Join(repeatString("?", len(args)), ",")
var clause string
// Build [NOT] list_has_any("dim", ARRAY[?, ?, ...])
if dim.Unnest && dialect != drivers.DialectDruid {
clause = fmt.Sprintf("%s list_has_any(%s, ARRAY[%s])", notKeyword, name, questionMarks)
} else {
clause = fmt.Sprintf("%s %s IN (%s)", name, notKeyword, questionMarks)
}
clauses = append(clauses, clause)
}
}

// Build "dim [NOT] ILIKE ?"
if len(cond.Like) > 0 {
for _, val := range cond.Like {
var clause string
// Build [NOT] len(list_filter("dim", x -> x ILIKE ?)) > 0
if dim.Unnest && dialect != drivers.DialectDruid {
clause = fmt.Sprintf("%s len(list_filter(%s, x -> x %s ILIKE ?)) > 0", notKeyword, name, notKeyword)
} else {
if dialect == drivers.DialectDruid {
// Druid does not support ILIKE
clause = fmt.Sprintf("LOWER(%s) %s LIKE LOWER(CAST(? AS VARCHAR))", name, notKeyword)
} else {
clause = fmt.Sprintf("%s %s ILIKE ?", name, notKeyword)
}
}

args = append(args, val)
clauses = append(clauses, clause)
}
}

// Add null check
// NOTE: DuckDB doesn't handle NULL values in an "IN" expression. They must be checked with a "dim IS [NOT] NULL" clause.
if inHasNull {
clauses = append(clauses, fmt.Sprintf("%s IS %s NULL", name, notKeyword))
}

// If no checks were added, exit
if len(clauses) == 0 {
return "", nil, nil
}

// Join conditions
var condJoiner string
if exclude {
condJoiner = " AND "
} else {
condJoiner = " OR "
}
condsClause := strings.Join(clauses, condJoiner)

// When you have "dim NOT IN (a, b, ...)", then NULL values are always excluded, even if NULL is not in the list.
// E.g. this returns zero rows: "select * from (select 1 as a union select null as a) where a not in (1)"
// We need to explicitly include it.
if exclude && !inHasNull && len(condsClause) > 0 {
condsClause += fmt.Sprintf(" OR %s IS NULL", name)
}

// Done
return fmt.Sprintf("AND (%s) ", condsClause), args, nil
}

func columnIdentifierExpression(mv *runtimev1.MetricsViewSpec, aliases []*runtimev1.MetricsViewComparisonMeasureAlias, name string, dialect drivers.Dialect) (string, bool) {
// check if identifier is a dimension
for _, dim := range mv.Dimensions {
if dim.Name == name {
return safeName(metricsViewDimensionColumn(dim)), true
return metricsViewDimensionExpression(dim), true
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will get templated into the where clause:

  1. Might it be problematic to put a free form expression in here? I think at the least it will need to be wrapped in parentheses
  2. Would it be better to template in just the dimension name (which the expression is aliased to in the SELECT clause)? But not sure if that works on Druid

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The problem with this is for queries where the dimension is not selected we would have issues with the column not being there. The other option is to only add it if is in the select caluse. That made these builder functions too complex.

As for adding parentheses, it gets added when the identifier is actually used.

I guess we are missing a validation to make sure there isnt sql injection happening.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. The case where the dim is not selected makes sense. Agree on not making it conditional on the select clause, seems to complicated.
  2. About the parentheses, imagine if the expression is col1 = 10 and the filter is dim1 = true, then you're saying the parenthesis will be added such that the where clause becomes (col1 = 10) = true (correct) and not col1 = 10 = true (which would error)?
  3. How could SQL injection happen? For the where filters, I thought we were doing lookups to resolve identifiers and using ? and args to pass values?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. Yes all binary expressions have it. like and in do not seem to have it. Lemme add that.
  2. It could happen in the dimension expression, count(*)";drop table blah; -- or something like that.

}
}

Expand Down Expand Up @@ -340,6 +186,22 @@ func identifierIsUnnest(mv *runtimev1.MetricsViewSpec, expr *runtimev1.Expressio
return false
}

func dimensionSelect(mv *runtimev1.MetricsViewSpec, dim *runtimev1.MetricsViewSpec_DimensionV2, dialect drivers.Dialect) (dimSelect, unnestClause string) {
colName := safeName(dim.Name)
if !dim.Unnest || dialect == drivers.DialectDruid {
return fmt.Sprintf(`%s as %s`, metricsViewDimensionExpression(dim), colName), ""
}

unnestColName := safeName(tempName(fmt.Sprintf("%s_%s_", "unnested", dim.Name)))
sel := fmt.Sprintf(`%s as %s`, unnestColName, colName)
if dim.Expression == "" {
// select "unnested_colName" as "colName" ... FROM "mv_table", LATERAL UNNEST("mv_table"."colName") tbl("unnested_colName") ...
return sel, fmt.Sprintf(`, LATERAL UNNEST(%s.%s) tbl(%s)`, safeName(mv.Table), colName, unnestColName)
}

return sel, fmt.Sprintf(`, LATERAL UNNEST(%s) tbl(%s)`, dim.Expression, unnestColName)
}

func buildExpression(mv *runtimev1.MetricsViewSpec, expr *runtimev1.Expression, aliases []*runtimev1.MetricsViewComparisonMeasureAlias, dialect drivers.Dialect) (string, []any, error) {
var emptyArg []any
switch e := expr.Expression.(type) {
Expand Down Expand Up @@ -421,7 +283,7 @@ func buildLikeExpression(mv *runtimev1.MetricsViewSpec, cond *runtimev1.Conditio
var clause string
// Build [NOT] len(list_filter("dim", x -> x ILIKE ?)) > 0
if unnest && dialect != drivers.DialectDruid {
clause = fmt.Sprintf("%s len(list_filter(%s, x -> x %s ILIKE %s)) > 0", notKeyword, leftExpr, notKeyword, rightExpr)
clause = fmt.Sprintf("%s len(list_filter(%s, x -> x ILIKE %s)) > 0", notKeyword, leftExpr, rightExpr)
} else {
if dialect == drivers.DialectDruid {
// Druid does not support ILIKE
Expand Down Expand Up @@ -629,14 +491,6 @@ func convertDimensionFilterToExpression(cond *runtimev1.MetricsViewFilter_Cond,
return nil
}

func repeatString(val string, n int) []string {
res := make([]string, n)
for i := 0; i < n; i++ {
res[i] = val
}
return res
}

func convertToString(pbvalue *structpb.Value) (string, error) {
switch pbvalue.GetKind().(type) {
case *structpb.Value_StructValue:
Expand Down Expand Up @@ -669,15 +523,6 @@ func convertToXLSXValue(pbvalue *structpb.Value) (interface{}, error) {
}
}

func metricsViewDimensionToSafeColumn(mv *runtimev1.MetricsViewSpec, dimName string) (string, error) {
dimName = strings.ToLower(dimName)
dimension, err := metricsViewDimension(mv, dimName)
if err != nil {
return "", err
}
return safeName(metricsViewDimensionColumn(dimension)), nil
}

func metricsViewDimension(mv *runtimev1.MetricsViewSpec, dimName string) (*runtimev1.MetricsViewSpec_DimensionV2, error) {
for _, dimension := range mv.Dimensions {
if strings.EqualFold(dimension.Name, dimName) {
Expand All @@ -687,13 +532,16 @@ func metricsViewDimension(mv *runtimev1.MetricsViewSpec, dimName string) (*runti
return nil, fmt.Errorf("dimension %s not found", dimName)
}

func metricsViewDimensionColumn(dimension *runtimev1.MetricsViewSpec_DimensionV2) string {
func metricsViewDimensionExpression(dimension *runtimev1.MetricsViewSpec_DimensionV2) string {
if dimension.Expression != "" {
return dimension.Expression
}
if dimension.Column != "" {
return dimension.Column
return safeName(dimension.Column)
}
// backwards compatibility for older projects that have not run reconcile on this dashboard
// in that case `column` will not be present
return dimension.Name
return safeName(dimension.Name)
}

func metricsViewMeasureExpression(mv *runtimev1.MetricsViewSpec, measureName string) (string, error) {
Expand Down
Loading
Loading