Skip to content

Commit

Permalink
Update
Browse files Browse the repository at this point in the history
  • Loading branch information
viirya committed Dec 4, 2023
1 parent 6926b2f commit f22a1ca
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 28 deletions.
17 changes: 12 additions & 5 deletions datafusion/expr/src/window_frame.rs
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,8 @@ impl WindowFrame {
pub fn regularize(mut frame: WindowFrame, order_bys: usize) -> Result<WindowFrame> {
if frame.units == WindowFrameUnits::Range && order_bys != 1 {
// Normally, RANGE frames require an ORDER BY clause with exactly one
// column. However, an ORDER BY clause may be absent in two edge cases:
// column. However, an ORDER BY clause may be absent or present but with
// more than one column in two edge cases:
// 1. start bound is UNBOUNDED or CURRENT ROW
// 2. end bound is CURRENT ROW or UNBOUNDED.
// In these cases, we regularize the RANGE frame to be equivalent to a ROWS
Expand All @@ -158,11 +159,17 @@ pub fn regularize(mut frame: WindowFrame, order_bys: usize) -> Result<WindowFram
|| frame.start_bound == WindowFrameBound::CurrentRow)
&& (frame.end_bound == WindowFrameBound::CurrentRow
|| frame.end_bound.is_unbounded())
&& order_bys == 0
{
frame.units = WindowFrameUnits::Rows;
frame.start_bound = WindowFrameBound::Preceding(ScalarValue::UInt64(None));
frame.end_bound = WindowFrameBound::Following(ScalarValue::UInt64(None));
// If an ORDER BY clause is absent, the frame is equivalent to a ROWS
// frame with the UNBOUNDED bounds.
// If an ORDER BY clause is present but has more than one column, the
// frame is unchanged.
if order_bys == 0 {
frame.units = WindowFrameUnits::Rows;
frame.start_bound =
WindowFrameBound::Preceding(ScalarValue::UInt64(None));
frame.end_bound = WindowFrameBound::Following(ScalarValue::UInt64(None));
}
} else {
plan_err!("RANGE requires exactly one ORDER BY column")?
}
Expand Down
74 changes: 51 additions & 23 deletions datafusion/sqllogictest/test_files/window.slt
Original file line number Diff line number Diff line change
Expand Up @@ -1080,7 +1080,7 @@ SELECT
794 95 95

#fn test_window_range_equivalent_frames
query error DataFusion error: Error during planning: RANGE requires exactly one ORDER BY column
query IIIIIII
SELECT
c9,
COUNT(*) OVER(ORDER BY c9, c1 RANGE BETWEEN CURRENT ROW AND CURRENT ROW) AS cnt1,
Expand All @@ -1092,22 +1092,12 @@ SELECT
FROM aggregate_test_100
ORDER BY c9
LIMIT 5

query IIII
SELECT
c9,
COUNT(*) OVER(RANGE BETWEEN CURRENT ROW AND CURRENT ROW) AS cnt4,
COUNT(*) OVER(RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS cnt5,
COUNT(*) OVER(RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS cnt6
FROM aggregate_test_100
ORDER BY c9
LIMIT 5
----
28774375 100 100 100
63044568 100 100 100
141047417 100 100 100
141680161 100 100 100
145294611 100 100 100
28774375 1 1 1 100 100 100
63044568 1 2 1 100 100 100
141047417 1 3 1 100 100 100
141680161 1 4 1 100 100 100
145294611 1 5 1 100 100 100

#fn test_window_cume_dist
query IRR
Expand Down Expand Up @@ -3738,22 +3728,60 @@ FROM score_board s
statement ok
DROP TABLE score_board;

# RANGE frame can be regularized to ROWS frame only if empty ORDER BY clause
# Regularize RANGE frame
query error DataFusion error: Error during planning: RANGE requires exactly one ORDER BY column
select a,
rank() over (partition by a order by a, a + 1 RANGE UNBOUNDED PRECEDING) rnk
from (select 1 a) q
rank() over (order by a, a + 1 RANGE BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) rnk
from (select 1 a union select 2 a) q ORDER BY a

query II
select a,
rank() over (partition by a order by a RANGE UNBOUNDED PRECEDING) rnk
from (select 1 a) q
rank() over (order by a RANGE BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) rnk
from (select 1 a union select 2 a) q ORDER BY a
----
1 1
2 2

query error DataFusion error: Error during planning: RANGE requires exactly one ORDER BY column
select a,
rank() over (RANGE BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) rnk
from (select 1 a union select 2 a) q ORDER BY a

query II
select a,
rank() over (order by a, a + 1 RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) rnk
from (select 1 a union select 2 a) q ORDER BY a
----
1 1
2 2

query II
select a,
rank() over (partition by a RANGE UNBOUNDED PRECEDING) rnk
from (select 1 a) q
rank() over (order by a RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) rnk
from (select 1 a union select 2 a) q ORDER BY a
----
1 1
2 2

# TODO: this is different to Postgres which returns [1, 1] for `rnk`.
# Comment it because it is flaky now as it depends on the order of the `a` column.
# query II
# select a,
# rank() over (RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) rnk
# from (select 1 a union select 2 a) q ORDER BY rnk
# ----
# 1 1
# 2 2

# TODO: this works in Postgres which returns [1, 1].
query error DataFusion error: Arrow error: Invalid argument error: must either specify a row count or at least one column
select rank() over (RANGE between UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) rnk
from (select 1 a union select 2 a) q;

# TODO: this is different to Postgres which returns [1, 1] for `rnk`.
query I
select rank() over (order by 1 RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) rnk
from (select 1 a union select 2 a) q ORDER BY rnk
----
1
2

0 comments on commit f22a1ca

Please sign in to comment.