Skip to content
This repository has been archived by the owner on Jan 28, 2021. It is now read-only.

analyzer: only optimize distinct for sorts where first column is in schema #842

Merged
merged 1 commit into from
Oct 16, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions engine_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1570,6 +1570,20 @@ var queries = []struct {
`SELECT (SELECT i FROM mytable ORDER BY i ASC LIMIT 1) AS x`,
[]sql.Row{{int64(1)}},
},
{
`SELECT DISTINCT n FROM bigtable ORDER BY t`,
[]sql.Row{
{int64(1)},
{int64(9)},
{int64(7)},
{int64(3)},
{int64(2)},
{int64(8)},
{int64(6)},
{int64(5)},
{int64(4)},
},
},
}

func TestQueries(t *testing.T) {
Expand Down
10 changes: 6 additions & 4 deletions sql/analyzer/optimization_rules.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,17 +34,19 @@ func optimizeDistinct(ctx *sql.Context, a *Analyzer, node sql.Node) (sql.Node, e

a.Log("optimize distinct, node of type: %T", node)
if n, ok := node.(*plan.Distinct); ok {
var isSorted bool
var sortField *expression.GetField
plan.Inspect(n, func(node sql.Node) bool {
a.Log("checking for optimization in node of type: %T", node)
if _, ok := node.(*plan.Sort); ok {
isSorted = true
if sort, ok := node.(*plan.Sort); ok && sortField == nil {
if col, ok := sort.SortFields[0].Column.(*expression.GetField); ok {
sortField = col
}
return false
}
return true
})

if isSorted {
if sortField != nil && n.Schema().Contains(sortField.Name(), sortField.Table()) {
a.Log("distinct optimized for ordered output")
return plan.NewOrderedDistinct(n.Child), nil
}
Expand Down
56 changes: 43 additions & 13 deletions sql/analyzer/optimization_rules_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -186,24 +186,54 @@ func TestEraseProjection(t *testing.T) {
}

func TestOptimizeDistinct(t *testing.T) {
require := require.New(t)

t1 := memory.NewTable("foo", nil)
t2 := memory.NewTable("foo", nil)
t1 := memory.NewTable("foo", sql.Schema{
{Name: "a", Source: "foo"},
{Name: "b", Source: "foo"},
})

notSorted := plan.NewDistinct(plan.NewResolvedTable(t1))
sorted := plan.NewDistinct(plan.NewSort(nil, plan.NewResolvedTable(t2)))
testCases := []struct {
name string
child sql.Node
optimized bool
}{
{
"without sort",
plan.NewResolvedTable(t1),
false,
},
{
"sort but column not projected",
plan.NewSort(
[]plan.SortField{
{Column: gf(0, "foo", "c")},
},
plan.NewResolvedTable(t1),
),
false,
},
{
"sort and column projected",
plan.NewSort(
[]plan.SortField{
{Column: gf(0, "foo", "a")},
},
plan.NewResolvedTable(t1),
),
true,
},
}

rule := getRule("optimize_distinct")

analyzedNotSorted, err := rule.Apply(sql.NewEmptyContext(), nil, notSorted)
require.NoError(err)

analyzedSorted, err := rule.Apply(sql.NewEmptyContext(), nil, sorted)
require.NoError(err)
for _, tt := range testCases {
t.Run(tt.name, func(t *testing.T) {
node, err := rule.Apply(sql.NewEmptyContext(), nil, plan.NewDistinct(tt.child))
require.NoError(t, err)

require.Equal(notSorted, analyzedNotSorted)
require.Equal(plan.NewOrderedDistinct(sorted.Child), analyzedSorted)
_, ok := node.(*plan.OrderedDistinct)
require.Equal(t, tt.optimized, ok)
})
}
}

func TestMoveJoinConditionsToFilter(t *testing.T) {
Expand Down