Skip to content

Commit

Permalink
Fix overflow in date_parser (apache#529)
Browse files Browse the repository at this point in the history
The date_parser was introduced in apache#383 and is mostly a direct port of
code in Spark. Since the code uses the JVM it has defined integer
overflow as wrapping. The proposed fixed is to use std::num::Wrapping to
get the same wrapping behavior in rust.  The overflown value will still
be disgarded in a later check that uses `current_segment_digits` so
allowing the overflow does not lead to correctness issues.

This resolves one of the overflows discussed in apache#481
  • Loading branch information
eejbyfeldt authored and kazuyukitanimura committed Jul 1, 2024
1 parent 2253042 commit 0c6c5ad
Showing 1 changed file with 11 additions and 7 deletions.
18 changes: 11 additions & 7 deletions core/src/execution/datafusion/expressions/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ use std::{
any::Any,
fmt::{Debug, Display, Formatter},
hash::{Hash, Hasher},
num::Wrapping,
sync::Arc,
};

Expand Down Expand Up @@ -1570,7 +1571,7 @@ fn date_parser(date_str: &str, eval_mode: EvalMode) -> CometResult<Option<i32>>
let mut date_segments = [1, 1, 1];
let mut sign = 1;
let mut current_segment = 0;
let mut current_segment_value = 0;
let mut current_segment_value = Wrapping(0);
let mut current_segment_digits = 0;
let bytes = date_str.as_bytes();

Expand All @@ -1597,16 +1598,16 @@ fn date_parser(date_str: &str, eval_mode: EvalMode) -> CometResult<Option<i32>>
return return_result(date_str, eval_mode);
}
//if valid update corresponding segment with the current segment value.
date_segments[current_segment as usize] = current_segment_value;
current_segment_value = 0;
date_segments[current_segment as usize] = current_segment_value.0;
current_segment_value = Wrapping(0);
current_segment_digits = 0;
current_segment += 1;
} else if !b.is_ascii_digit() {
return return_result(date_str, eval_mode);
} else {
//increment value of current segment by the next digit
let parsed_value = (b - b'0') as i32;
current_segment_value = current_segment_value * 10 + parsed_value;
let parsed_value = Wrapping((b - b'0') as i32);
current_segment_value = current_segment_value * Wrapping(10) + parsed_value;
current_segment_digits += 1;
}
j += 1;
Expand All @@ -1622,7 +1623,7 @@ fn date_parser(date_str: &str, eval_mode: EvalMode) -> CometResult<Option<i32>>
return return_result(date_str, eval_mode);
}

date_segments[current_segment as usize] = current_segment_value;
date_segments[current_segment as usize] = current_segment_value.0;

match NaiveDate::from_ymd_opt(
sign * date_segments[0],
Expand Down Expand Up @@ -1836,6 +1837,8 @@ mod tests {
Some(" 202 "),
Some("\n 2020-\r8 "),
Some("2020-01-01T"),
// Overflows i32
Some("-4607172990231812908"),
]));

for eval_mode in &[EvalMode::Legacy, EvalMode::Try] {
Expand All @@ -1857,7 +1860,8 @@ mod tests {
None,
None,
None,
Some(18262)
Some(18262),
None
]
);
}
Expand Down

0 comments on commit 0c6c5ad

Please sign in to comment.