Skip to content

Commit

Permalink
Add outer join tests
Browse files Browse the repository at this point in the history
  • Loading branch information
viirya committed Feb 1, 2024
1 parent 00d4160 commit 99940c2
Show file tree
Hide file tree
Showing 2 changed files with 81 additions and 5 deletions.
7 changes: 5 additions & 2 deletions datafusion/physical-plan/src/joins/sort_merge_join.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1172,8 +1172,11 @@ impl SMJStream {
.collect::<Vec<_>>()
};

let filter_columns =
get_filter_column(&self.filter, &streamed_columns, &buffered_columns);
let filter_columns = if matches!(self.join_type, JoinType::Right) {
get_filter_column(&self.filter, &buffered_columns, &streamed_columns)
} else {
get_filter_column(&self.filter, &streamed_columns, &buffered_columns)
};

let columns = if matches!(self.join_type, JoinType::Right) {
buffered_columns.extend(streamed_columns);
Expand Down
79 changes: 76 additions & 3 deletions datafusion/sqllogictest/test_files/sort_merge_join.slt
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,12 @@ statement ok
set datafusion.optimizer.prefer_hash_join = false;

statement ok
CREATE TABLE t1(a text, b int) AS VALUES ('Alice', 50), ('Alice', 100);
CREATE TABLE t1(a text, b int) AS VALUES ('Alice', 50), ('Alice', 100), ('Bob', 1);

statement ok
CREATE TABLE t2(a text, b int) AS VALUES ('Alice', 2), ('Alice', 1);

# equijoin and join filter (sort merge join)

# inner join query plan with join filter
query TT
EXPLAIN SELECT t1.a, t1.b, t2.a, t2.b FROM t1 JOIN t2 ON t1.a = t2.a AND t2.b * 50 <= t1.b
----
Expand All @@ -48,6 +47,7 @@ SortMergeJoin: join_type=Inner, on=[(a@0, a@0)], filter=CAST(b@1 AS Int64) * 50
------RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=1
--------MemoryExec: partitions=1, partition_sizes=[1]

# inner join with join filter
query TITI rowsort
SELECT t1.a, t1.b, t2.a, t2.b FROM t1 JOIN t2 ON t1.a = t2.a AND t2.b * 50 <= t1.b
----
Expand All @@ -67,6 +67,79 @@ query TITI rowsort
SELECT t1.a, t1.b, t2.a, t2.b FROM t1 JOIN t2 ON t1.a = t2.a AND t2.b > t1.b
----

# left join without join filter
query TITI rowsort
SELECT * FROM t1 LEFT JOIN t2 ON t1.a = t2.a
----
Alice 100 Alice 1
Alice 100 Alice 2
Alice 50 Alice 1
Alice 50 Alice 2
Bob 1 NULL NULL

# left join with join filter
query TITI rowsort
SELECT * FROM t1 LEFT JOIN t2 ON t1.a = t2.a AND t2.b * 50 <= t1.b
----
Alice 100 Alice 1
Alice 100 Alice 2
Alice 50 Alice 1

query TITI rowsort
SELECT * FROM t1 LEFT JOIN t2 ON t1.a = t2.a AND t2.b < t1.b
----
Alice 100 Alice 1
Alice 100 Alice 2
Alice 50 Alice 1
Alice 50 Alice 2

# right join without join filter
query TITI rowsort
SELECT * FROM t1 RIGHT JOIN t2 ON t1.a = t2.a
----
Alice 100 Alice 1
Alice 100 Alice 2
Alice 50 Alice 1
Alice 50 Alice 2

# right join with join filter
query TITI rowsort
SELECT * FROM t1 RIGHT JOIN t2 ON t1.a = t2.a AND t2.b * 50 <= t1.b
----
Alice 100 Alice 1
Alice 100 Alice 2
Alice 50 Alice 1

query TITI rowsort
SELECT * FROM t1 RIGHT JOIN t2 ON t1.a = t2.a AND t1.b > t2.b
----
Alice 100 Alice 1
Alice 100 Alice 2
Alice 50 Alice 1
Alice 50 Alice 2

# full join without join filter
query TITI rowsort
SELECT * FROM t1 FULL JOIN t2 ON t1.a = t2.a
----
Alice 100 Alice 1
Alice 100 Alice 2
Alice 50 Alice 1
Alice 50 Alice 2
Bob 1 NULL NULL

# full join with join filter
query TITI rowsort
SELECT * FROM t1 FULL JOIN t2 ON t1.a = t2.a AND t2.b * 50 > t1.b
----
Alice 50 Alice 2

query TITI rowsort
SELECT * FROM t1 FULL JOIN t2 ON t1.a = t2.a AND t1.b > t2.b + 50
----
Alice 100 Alice 1
Alice 100 Alice 2

statement ok
set datafusion.optimizer.prefer_hash_join = true;

Expand Down

0 comments on commit 99940c2

Please sign in to comment.