-
Notifications
You must be signed in to change notification settings - Fork 166
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
fix: Enable cast string to int tests and fix compatibility issue #453
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -82,7 +82,7 @@ macro_rules! cast_utf8_to_int { | |
for i in 0..len { | ||
if $array.is_null(i) { | ||
cast_array.append_null() | ||
} else if let Some(cast_value) = $cast_method($array.value(i).trim(), $eval_mode)? { | ||
} else if let Some(cast_value) = $cast_method($array.value(i), $eval_mode)? { | ||
cast_array.append_value(cast_value); | ||
} else { | ||
cast_array.append_null() | ||
|
@@ -1010,14 +1010,6 @@ fn cast_string_to_int_with_range_check( | |
} | ||
} | ||
|
||
#[derive(PartialEq)] | ||
enum State { | ||
SkipLeadingWhiteSpace, | ||
SkipTrailingWhiteSpace, | ||
ParseSignAndDigits, | ||
ParseFractionalDigits, | ||
} | ||
|
||
/// Equivalent to | ||
/// - org.apache.spark.unsafe.types.UTF8String.toInt(IntWrapper intWrapper, boolean allowDecimal) | ||
/// - org.apache.spark.unsafe.types.UTF8String.toLong(LongWrapper longWrapper, boolean allowDecimal) | ||
|
@@ -1029,34 +1021,22 @@ fn do_cast_string_to_int< | |
type_name: &str, | ||
min_value: T, | ||
) -> CometResult<Option<T>> { | ||
let len = str.len(); | ||
if str.is_empty() { | ||
let trimmed_str = str.trim(); | ||
if trimmed_str.is_empty() { | ||
return none_or_err(eval_mode, type_name, str); | ||
} | ||
|
||
let len = trimmed_str.len(); | ||
let mut result: T = T::zero(); | ||
let mut negative = false; | ||
let radix = T::from(10); | ||
let stop_value = min_value / radix; | ||
let mut state = State::SkipLeadingWhiteSpace; | ||
let mut parsed_sign = false; | ||
|
||
for (i, ch) in str.char_indices() { | ||
// skip leading whitespace | ||
if state == State::SkipLeadingWhiteSpace { | ||
if ch.is_whitespace() { | ||
// consume this char | ||
continue; | ||
} | ||
// change state and fall through to next section | ||
state = State::ParseSignAndDigits; | ||
} | ||
let mut parse_sign_and_digits = true; | ||
|
||
if state == State::ParseSignAndDigits { | ||
if !parsed_sign { | ||
for (i, ch) in trimmed_str.char_indices() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We process the trimmed string here and we no longer need the logic for skipping leading and trailing whitespace |
||
if parse_sign_and_digits { | ||
if i == 0 { | ||
negative = ch == '-'; | ||
let positive = ch == '+'; | ||
parsed_sign = true; | ||
if negative || positive { | ||
if i + 1 == len { | ||
// input string is just "+" or "-" | ||
|
@@ -1070,7 +1050,7 @@ fn do_cast_string_to_int< | |
if ch == '.' { | ||
if eval_mode == EvalMode::Legacy { | ||
// truncate decimal in legacy mode | ||
state = State::ParseFractionalDigits; | ||
parse_sign_and_digits = false; | ||
continue; | ||
} else { | ||
return none_or_err(eval_mode, type_name, str); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. error messages refer to the original input string, not the trimmed version |
||
|
@@ -1102,27 +1082,12 @@ fn do_cast_string_to_int< | |
return none_or_err(eval_mode, type_name, str); | ||
} | ||
} | ||
} | ||
|
||
if state == State::ParseFractionalDigits { | ||
// This is the case when we've encountered a decimal separator. The fractional | ||
// part will not change the number, but we will verify that the fractional part | ||
// is well-formed. | ||
if ch.is_whitespace() { | ||
// finished parsing fractional digits, now need to skip trailing whitespace | ||
state = State::SkipTrailingWhiteSpace; | ||
// consume this char | ||
continue; | ||
} | ||
} else { | ||
// make sure fractional digits are valid digits but ignore them | ||
if !ch.is_ascii_digit() { | ||
return none_or_err(eval_mode, type_name, str); | ||
} | ||
} | ||
|
||
// skip trailing whitespace | ||
if state == State::SkipTrailingWhiteSpace && !ch.is_whitespace() { | ||
return none_or_err(eval_mode, type_name, str); | ||
} | ||
} | ||
|
||
if !negative { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We originally trimmed the input strings here, so error messages did not have access to the original inputs