-
Notifications
You must be signed in to change notification settings - Fork 169
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: [comet-parquet-exec] Schema adapter fixes #1139
Changes from 3 commits
bf6b4d4
d4d71bc
b6036f2
0b43b23
0602e24
74add9c
2021406
42d9f93
d220ae3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,14 +19,15 @@ use arrow_array::{ | |
cast::as_primitive_array, | ||
types::{Int32Type, TimestampMicrosecondType}, | ||
}; | ||
use arrow_schema::{ArrowError, DataType}; | ||
use arrow_schema::{ArrowError, DataType, TimeUnit}; | ||
use std::sync::Arc; | ||
|
||
use crate::timezone::Tz; | ||
use arrow::{ | ||
array::{as_dictionary_array, Array, ArrayRef, PrimitiveArray}, | ||
temporal_conversions::as_datetime, | ||
}; | ||
use arrow_array::types::TimestampMillisecondType; | ||
use chrono::{DateTime, Offset, TimeZone}; | ||
|
||
/// Preprocesses input arrays to add timezone information from Spark to Arrow array datatype or | ||
|
@@ -70,6 +71,9 @@ pub fn array_with_timezone( | |
Some(DataType::Timestamp(_, Some(_))) => { | ||
timestamp_ntz_to_timestamp(array, timezone.as_str(), Some(timezone.as_str())) | ||
} | ||
Some(DataType::Timestamp(_, None)) => { | ||
timestamp_ntz_to_timestamp(array, timezone.as_str(), None) | ||
} | ||
_ => { | ||
// Not supported | ||
panic!( | ||
|
@@ -80,7 +84,7 @@ pub fn array_with_timezone( | |
} | ||
} | ||
} | ||
DataType::Timestamp(_, Some(_)) => { | ||
DataType::Timestamp(TimeUnit::Microsecond, Some(_)) => { | ||
assert!(!timezone.is_empty()); | ||
let array = as_primitive_array::<TimestampMicrosecondType>(&array); | ||
let array_with_timezone = array.clone().with_timezone(timezone.clone()); | ||
|
@@ -92,6 +96,18 @@ pub fn array_with_timezone( | |
_ => Ok(array), | ||
} | ||
} | ||
DataType::Timestamp(TimeUnit::Millisecond, Some(_)) => { | ||
assert!(!timezone.is_empty()); | ||
let array = as_primitive_array::<TimestampMillisecondType>(&array); | ||
let array_with_timezone = array.clone().with_timezone(timezone.clone()); | ||
let array = Arc::new(array_with_timezone) as ArrayRef; | ||
match to_type { | ||
Some(DataType::Utf8) | Some(DataType::Date32) => { | ||
pre_timestamp_cast(array, timezone) | ||
} | ||
_ => Ok(array), | ||
} | ||
} | ||
DataType::Dictionary(_, value_type) | ||
if matches!(value_type.as_ref(), &DataType::Timestamp(_, _)) => | ||
{ | ||
|
@@ -127,7 +143,7 @@ fn timestamp_ntz_to_timestamp( | |
) -> Result<ArrayRef, ArrowError> { | ||
assert!(!tz.is_empty()); | ||
match array.data_type() { | ||
DataType::Timestamp(_, None) => { | ||
DataType::Timestamp(TimeUnit::Microsecond, None) => { | ||
let array = as_primitive_array::<TimestampMicrosecondType>(&array); | ||
let tz: Tz = tz.parse()?; | ||
let array: PrimitiveArray<TimestampMicrosecondType> = array.try_unary(|value| { | ||
|
@@ -146,6 +162,25 @@ fn timestamp_ntz_to_timestamp( | |
}; | ||
Ok(Arc::new(array_with_tz)) | ||
} | ||
DataType::Timestamp(TimeUnit::Millisecond, None) => { | ||
let array = as_primitive_array::<TimestampMillisecondType>(&array); | ||
let tz: Tz = tz.parse()?; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this called frequently (per row)? timezone parse is somewhat expensive (and does not change for a session). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is once per array, but I think the parsing could happen once during planning rather than per batch/array. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Makes sense, we can defer this for the moment. |
||
let array: PrimitiveArray<TimestampMillisecondType> = array.try_unary(|value| { | ||
as_datetime::<TimestampMillisecondType>(value) | ||
.ok_or_else(|| datetime_cast_err(value)) | ||
.map(|local_datetime| { | ||
let datetime: DateTime<Tz> = | ||
tz.from_local_datetime(&local_datetime).unwrap(); | ||
datetime.timestamp_millis() | ||
}) | ||
})?; | ||
let array_with_tz = if let Some(to_tz) = to_timezone { | ||
array.with_timezone(to_tz) | ||
} else { | ||
array | ||
}; | ||
Ok(Arc::new(array_with_tz)) | ||
} | ||
_ => Ok(array), | ||
} | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
+1 :)