From 86871a8480e7fcfcc18dea90e842e5e325896ae5 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Sun, 3 Dec 2023 13:03:57 -0800 Subject: [PATCH] fix: RANGE frame can be regularized to ROWS frame only if empty ORDER BY clause --- datafusion/expr/src/window_frame.rs | 16 ++++--- datafusion/sqllogictest/test_files/window.slt | 44 ++++++++++++++++--- 2 files changed, 47 insertions(+), 13 deletions(-) diff --git a/datafusion/expr/src/window_frame.rs b/datafusion/expr/src/window_frame.rs index 5f161b85dd9a..978f7b508d14 100644 --- a/datafusion/expr/src/window_frame.rs +++ b/datafusion/expr/src/window_frame.rs @@ -148,18 +148,20 @@ impl WindowFrame { pub fn regularize(mut frame: WindowFrame, order_bys: usize) -> Result { if frame.units == WindowFrameUnits::Range && order_bys != 1 { // Normally, RANGE frames require an ORDER BY clause with exactly one - // column. However, an ORDER BY clause may be absent in two edge cases. + // column. However, an ORDER BY clause may be absent in two edge cases: + // 1. start bound is UNBOUNDED or CURRENT ROW + // 2. end bound is CURRENT ROW or UNBOUNDED. + // In these cases, we regularize the RANGE frame to be equivalent to a ROWS + // frame with the UNBOUNDED bounds. if (frame.start_bound.is_unbounded() || frame.start_bound == WindowFrameBound::CurrentRow) && (frame.end_bound == WindowFrameBound::CurrentRow || frame.end_bound.is_unbounded()) + && order_bys == 0 { - if order_bys == 0 { - frame.units = WindowFrameUnits::Rows; - frame.start_bound = - WindowFrameBound::Preceding(ScalarValue::UInt64(None)); - frame.end_bound = WindowFrameBound::Following(ScalarValue::UInt64(None)); - } + frame.units = WindowFrameUnits::Rows; + frame.start_bound = WindowFrameBound::Preceding(ScalarValue::UInt64(None)); + frame.end_bound = WindowFrameBound::Following(ScalarValue::UInt64(None)); } else { plan_err!("RANGE requires exactly one ORDER BY column")? } diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt index bb6ca119480d..ee5447618506 100644 --- a/datafusion/sqllogictest/test_files/window.slt +++ b/datafusion/sqllogictest/test_files/window.slt @@ -1080,7 +1080,7 @@ SELECT 794 95 95 #fn test_window_range_equivalent_frames -query IIIIIII +query error DataFusion error: Error during planning: RANGE requires exactly one ORDER BY column SELECT c9, COUNT(*) OVER(ORDER BY c9, c1 RANGE BETWEEN CURRENT ROW AND CURRENT ROW) AS cnt1, @@ -1092,12 +1092,22 @@ SELECT FROM aggregate_test_100 ORDER BY c9 LIMIT 5 + +query IIII +SELECT + c9, + COUNT(*) OVER(RANGE BETWEEN CURRENT ROW AND CURRENT ROW) AS cnt4, + COUNT(*) OVER(RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS cnt5, + COUNT(*) OVER(RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS cnt6 + FROM aggregate_test_100 + ORDER BY c9 + LIMIT 5 ---- -28774375 1 1 1 100 100 100 -63044568 1 2 1 100 100 100 -141047417 1 3 1 100 100 100 -141680161 1 4 1 100 100 100 -145294611 1 5 1 100 100 100 +28774375 100 100 100 +63044568 100 100 100 +141047417 100 100 100 +141680161 100 100 100 +145294611 100 100 100 #fn test_window_cume_dist query IRR @@ -3727,3 +3737,25 @@ FROM score_board s statement ok DROP TABLE score_board; + +# RANGE frame with/without (multiple) ORDER BY +query error DataFusion error: Error during planning: RANGE requires exactly one ORDER BY column +select a, + rank() over (partition by a order by a, a + 1 RANGE UNBOUNDED PRECEDING) rnk + from (select 1 a union all select 2 a) q + +query II +select a, + rank() over (partition by a order by a RANGE UNBOUNDED PRECEDING) rnk + from (select 1 a union all select 2 a) q +---- +1 1 +2 1 + +query II +select a, + rank() over (partition by a RANGE UNBOUNDED PRECEDING) rnk + from (select 1 a union all select 2 a) q +---- +1 1 +2 1