From 21fcee837f05b67d02a1162b37f0610d072c4a82 Mon Sep 17 00:00:00 2001 From: TCeason <33082201+TCeason@users.noreply.github.com> Date: Sun, 7 Jan 2024 15:04:49 +0800 Subject: [PATCH] feat(query): string to ts support parse ISO 8601 tz format (#14253) --- .../cursor_ext/cursor_read_datetime_ext.rs | 111 ++++++++++++------ .../tests/it/cursor_ext/read_datetime_ext.rs | 6 +- .../11_0001_data_type_date_time.test | 4 +- .../02_0012_function_datetimes_tz.test | 33 +++++- 4 files changed, 111 insertions(+), 43 deletions(-) diff --git a/src/common/io/src/cursor_ext/cursor_read_datetime_ext.rs b/src/common/io/src/cursor_ext/cursor_read_datetime_ext.rs index 5521b24bf7f8..136a8baea55c 100644 --- a/src/common/io/src/cursor_ext/cursor_read_datetime_ext.rs +++ b/src/common/io/src/cursor_ext/cursor_read_datetime_ext.rs @@ -62,20 +62,6 @@ fn parse_time_part(buf: &[u8], size: usize) -> Result { } } -// fn calc_offset(current_tz_sec: i64, val_tz_sec: i64, dt: &DateTime, tz: &Tz) -> () { -// let offset = (current_tz_sec - val_tz_sec) * 1000 * 1000; -// let mut ts = dt.timestamp_micros(); -// ts += offset; -// // TODO: need support timestamp_micros in chrono-0.4.22/src/offset/mod.rs -// // use like tz.timestamp_nanos() -// let (mut secs, mut micros) = (ts / 1_000_000, ts % 1_000_000); -// if ts < 0 { -// secs -= 1; -// micros += 1_000_000; -// } -// Ok(tz.timestamp_opt(secs, (micros as u32) * 1000).unwrap()) -// } - impl BufferReadDateTimeExt for Cursor where T: AsRef<[u8]> { @@ -276,26 +262,14 @@ where T: AsRef<[u8]> west_tz: bool, calc_offset: impl Fn(i64, i64, &DateTime) -> Result>, ) -> Result> { - let n = self.keep_read(buf, |f| f.is_ascii_digit()); - if n != 2 { - // +0800 will err in there - return Err(ErrorCode::BadBytes( - "Timezone Parsing Error: Incorrect format. Expected format for timezone offset is [+08:00].", - )); - } - let hour_offset: i32 = lexical_core::FromLexical::from_lexical(buf.as_slice()).unwrap(); - if (0..15).contains(&hour_offset) { - buf.clear(); - self.ignore_byte(b':'); - if self.keep_read(buf, |f| f.is_ascii_digit()) != 2 { - // +08[other byte]00 will err in there, e.g. +08-00 - return Err(ErrorCode::BadBytes( - "Timezone Parsing Error: Incorrect format in hour part. Expected format for timezone offset is [+08:00].", - )); - } - let minute_offset: i32 = - lexical_core::FromLexical::from_lexical(buf.as_slice()).unwrap(); - // max utc: 14:00, min utc: 00:00 + fn get_hour_minute_offset( + tz: &Tz, + dt: &DateTime, + west_tz: bool, + calc_offset: &impl Fn(i64, i64, &DateTime) -> Result>, + hour_offset: i32, + minute_offset: i32, + ) -> Result, ErrorCode> { if (hour_offset == 14 && minute_offset == 0) || ((0..60).contains(&minute_offset) && hour_offset < 14) { @@ -323,11 +297,70 @@ where T: AsRef<[u8]> minute_offset ))) } - } else { - Err(ErrorCode::BadBytes(format!( - "Invalid Timezone Offset: The hour offset '{}' is outside the valid range. Expected range is [00-14] within a timezone gap of [-14:00, +14:00].", - hour_offset - ))) + } + let n = self.keep_read(buf, |f| f.is_ascii_digit()); + match n { + 2 => { + let hour_offset: i32 = + lexical_core::FromLexical::from_lexical(buf.as_slice()).unwrap(); + if (0..15).contains(&hour_offset) { + buf.clear(); + if self.ignore_byte(b':') { + if self.keep_read(buf, |f| f.is_ascii_digit()) != 2 { + // +08[other byte]00 will err in there, e.g. +08-00 + return Err(ErrorCode::BadBytes( + "Timezone Parsing Error: Incorrect format in hour part. The time zone format must conform to the ISO 8601 standard.", + )); + } + let minute_offset: i32 = + lexical_core::FromLexical::from_lexical(buf.as_slice()).unwrap(); + // max utc: 14:00, min utc: 00:00 + get_hour_minute_offset( + tz, + dt, + west_tz, + &calc_offset, + hour_offset, + minute_offset, + ) + } else { + get_hour_minute_offset(tz, dt, west_tz, &calc_offset, hour_offset, 0) + } + } else { + Err(ErrorCode::BadBytes(format!( + "Invalid Timezone Offset: The hour offset '{}' is outside the valid range. Expected range is [00-14] within a timezone gap of [-14:00, +14:00].", + hour_offset + ))) + } + } + 4 => { + let hour_offset = &buf.as_slice()[..2]; + let hour_offset: i32 = + lexical_core::FromLexical::from_lexical(hour_offset).unwrap(); + let minute_offset = &buf.as_slice()[2..]; + let minute_offset: i32 = + lexical_core::FromLexical::from_lexical(minute_offset).unwrap(); + buf.clear(); + // max utc: 14:00, min utc: 00:00 + if (0..15).contains(&hour_offset) { + get_hour_minute_offset( + tz, + dt, + west_tz, + &calc_offset, + hour_offset, + minute_offset, + ) + } else { + Err(ErrorCode::BadBytes(format!( + "Invalid Timezone Offset: The hour offset '{}' is outside the valid range. Expected range is [00-14] within a timezone gap of [-14:00, +14:00].", + hour_offset + ))) + } + } + _ => Err(ErrorCode::BadBytes( + "Timezone Parsing Error: Incorrect format. The time zone format must conform to the ISO 8601 standard.", + )), } } } diff --git a/src/common/io/tests/it/cursor_ext/read_datetime_ext.rs b/src/common/io/tests/it/cursor_ext/read_datetime_ext.rs index 170c02e18912..6a569a6b74c8 100644 --- a/src/common/io/tests/it/cursor_ext/read_datetime_ext.rs +++ b/src/common/io/tests/it/cursor_ext/read_datetime_ext.rs @@ -21,10 +21,14 @@ use databend_common_io::cursor_ext::*; #[test] fn test_read_timestamp_text() -> Result<()> { let mut reader = Cursor::new( - "2009-01-01 00:00:00.12,2009-01-01 00:00:00.1234,2009-01-01 00:00:00.1234567891,2022-02-02T,2022-02-02 12,2022-02-02T13:4:,2022-02-02 12:03,2023-03-03,2022-02-02,2009-01-01 3:2:1.123,2009-01-01 0:00:00,2009-01-01 00:00:00.123,2009-01-01 00:00:00.123456,0002-03-03T00:01:02,2022-03-04T00:01:02+08:00,2022-03-04T00:01:02-08:00,0000-00-00,0000-00-00 00:00:00,0001-01-01 00:00:00,2020-01-01T11:11:11Z,2009-01-03 00:00:00,2020-01-01T11:11:11.123Z,2055-02-03 10:00:20.234+08:00,2055-02-03 10:00:20.234-08:00,1022-05-16T03:25:02.000000+08:00".as_bytes(), + "2023-12-25T02:31:07.485281+0545,2023-12-25T02:31:07.485281-0545,2023-12-25T02:31:07.485281+05,2023-12-25T02:31:07.485281-05,2009-01-01 00:00:00.12,2009-01-01 00:00:00.1234,2009-01-01 00:00:00.1234567891,2022-02-02T,2022-02-02 12,2022-02-02T13:4:,2022-02-02 12:03,2023-03-03,2022-02-02,2009-01-01 3:2:1.123,2009-01-01 0:00:00,2009-01-01 00:00:00.123,2009-01-01 00:00:00.123456,0002-03-03T00:01:02,2022-03-04T00:01:02+08:00,2022-03-04T00:01:02-08:00,0000-00-00,0000-00-00 00:00:00,0001-01-01 00:00:00,2020-01-01T11:11:11Z,2009-01-03 00:00:00,2020-01-01T11:11:11.123Z,2055-02-03 10:00:20.234+08:00,2055-02-03 10:00:20.234-08:00,1022-05-16T03:25:02.000000+08:00".as_bytes(), ); let tz = Tz::UTC; let expected = vec![ + "2023-12-24T20:46:07.485281UTC", + "2023-12-25T08:16:07.485281UTC", + "2023-12-24T21:31:07.485281UTC", + "2023-12-25T07:31:07.485281UTC", "2009-01-01T00:00:00.120UTC", "2009-01-01T00:00:00.123400UTC", "2009-01-01T00:00:00.123456789UTC", diff --git a/tests/sqllogictests/suites/base/11_data_type/11_0001_data_type_date_time.test b/tests/sqllogictests/suites/base/11_data_type/11_0001_data_type_date_time.test index a5da6e066354..65d1a12bb6fb 100644 --- a/tests/sqllogictests/suites/base/11_data_type/11_0001_data_type_date_time.test +++ b/tests/sqllogictests/suites/base/11_data_type/11_0001_data_type_date_time.test @@ -267,10 +267,10 @@ insert into t values(1, '2022-02-03 03:02:00Z.123') statement error 1006 insert into t values(1, '2022-02-03 03:02:00Z+08:00') -statement error 1006 +statement ok insert into t values(1, '2022-02-03 03:02:00+0800') -statement error 1006 +statement ok insert into t values(1, '2022-02-03T03:02:00+0800') statement ok diff --git a/tests/sqllogictests/suites/query/02_function/02_0012_function_datetimes_tz.test b/tests/sqllogictests/suites/query/02_function/02_0012_function_datetimes_tz.test index 711a56096457..7552bee2a915 100644 --- a/tests/sqllogictests/suites/query/02_function/02_0012_function_datetimes_tz.test +++ b/tests/sqllogictests/suites/query/02_function/02_0012_function_datetimes_tz.test @@ -1,3 +1,6 @@ +statement ok +unset enable_query_result_cache; + statement ok drop table if exists tt all @@ -9,7 +12,6 @@ select to_timestamp(1630320462000000) ---- 2021-08-30 10:47:42.000000 - query T select to_timestamp('2000-01-01 00:00:00') ---- @@ -34,6 +36,35 @@ select to_timestamp('2000-01-01 12:00:00+08:00') ---- 2000-01-01 12:00:00.000000 +query T +select to_timestamp('2000-01-01 12:00:00+08') +---- +2000-01-01 12:00:00.000000 + +query T +select to_timestamp('2000-01-01 12:00:00-08') +---- +2000-01-02 04:00:00.000000 + +query T +select to_timestamp('2000-01-01 12:00:00+0811') +---- +2000-01-01 11:49:00.000000 + +query T +select to_timestamp('2000-01-01 12:00:00-0811') +---- +2000-01-02 04:11:00.000000 + +statement error 1006 +select to_timestamp('2000-01-01 12:00:00-08112') + +statement error 1006 +select to_timestamp('2000-01-01 12:00:00-081') + +statement error 1006 +select to_timestamp('2000-01-01 12:00:00+08:') + statement ok set timezone = 'UTC'