Skip to content

Commit

Permalink
fix: Incorrect LEFT JOIN evaluation result on OR conditions
Browse files Browse the repository at this point in the history
  • Loading branch information
viirya committed Jul 1, 2024
1 parent 9fc5312 commit 7549e8b
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 3 deletions.
7 changes: 4 additions & 3 deletions datafusion/optimizer/src/push_down_filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -424,8 +424,9 @@ fn push_down_all_join(
}
}

let (on_left_preserved, on_right_preserved) = on_lr_is_preserved(join.join_type)?;

if !on_filter.is_empty() {
let (on_left_preserved, on_right_preserved) = on_lr_is_preserved(join.join_type)?;
for on in on_filter {
if on_left_preserved && can_pushdown_join_predicate(&on, left_schema)? {
left_push.push(on)
Expand All @@ -441,11 +442,11 @@ fn push_down_all_join(

// Extract from OR clause, generate new predicates for both side of join if possible.
// We only track the unpushable predicates above.
if left_preserved {
if on_left_preserved {
left_push.extend(extract_or_clauses_for_join(&keep_predicates, left_schema));
left_push.extend(extract_or_clauses_for_join(&join_conditions, left_schema));
}
if right_preserved {
if on_right_preserved {
right_push.extend(extract_or_clauses_for_join(&keep_predicates, right_schema));
right_push.extend(extract_or_clauses_for_join(&join_conditions, right_schema));
}
Expand Down
73 changes: 73 additions & 0 deletions datafusion/sqllogictest/test_files/join.slt
Original file line number Diff line number Diff line change
Expand Up @@ -793,3 +793,76 @@ DROP TABLE companies

statement ok
DROP TABLE leads

# create tables
statement ok
CREATE TABLE employees(emp_id INT, name VARCHAR);

statement ok
CREATE TABLE department(emp_id INT, dept_name VARCHAR);

statement ok
INSERT INTO employees (emp_id, name) VALUES (1, 'Alice'), (2, 'Bob'), (3, 'Carol');

statement ok
INSERT INTO department (emp_id, dept_name) VALUES (1, 'HR'), (3, 'Engineering'), (4, 'Sales');

query TT
EXPLAIN SELECT e.emp_id, e.name, d.dept_name
FROM employees AS e
LEFT JOIN department AS d
ON (e.name = 'Alice' OR e.name = 'Bob');
----
logical_plan
01)Left Join: Filter: e.name = Utf8("Alice") OR e.name = Utf8("Bob")
02)--SubqueryAlias: e
03)----TableScan: employees projection=[emp_id, name]
04)--SubqueryAlias: d
05)----TableScan: department projection=[dept_name]
physical_plan
01)ProjectionExec: expr=[emp_id@1 as emp_id, name@2 as name, dept_name@0 as dept_name]
02)--NestedLoopJoinExec: join_type=Right, filter=name@0 = Alice OR name@0 = Bob
03)----MemoryExec: partitions=1, partition_sizes=[1]
04)----MemoryExec: partitions=1, partition_sizes=[1]

query ITT
SELECT e.emp_id, e.name, d.dept_name
FROM employees AS e
LEFT JOIN department AS d
ON (e.name = 'Alice' OR e.name = 'Bob');
----
1 Alice HR
2 Bob HR
1 Alice Engineering
2 Bob Engineering
1 Alice Sales
2 Bob Sales
3 Carol NULL

query ITT
SELECT e.emp_id, e.name, d.dept_name
FROM employees e
LEFT JOIN department d
ON (e.name = 'NotExist1' OR e.name = 'NotExist2');
----
1 Alice NULL
2 Bob NULL
3 Carol NULL

query ITT
SELECT e.emp_id, e.name, d.dept_name
FROM employees e
LEFT JOIN department d
ON (e.name = 'Alice' OR e.name = 'NotExist');
----
1 Alice HR
1 Alice Engineering
1 Alice Sales
2 Bob NULL
3 Carol NULL

statement ok
DROP TABLE employees

statement ok
DROP TABLE department

0 comments on commit 7549e8b

Please sign in to comment.