diff --git a/hybridse/src/codegen/udf_ir_builder_test.cc b/hybridse/src/codegen/udf_ir_builder_test.cc index 13a82a1a925..6cd82be7859 100644 --- a/hybridse/src/codegen/udf_ir_builder_test.cc +++ b/hybridse/src/codegen/udf_ir_builder_test.cc @@ -1078,14 +1078,21 @@ TEST_F(UdfIRBuilderTest, DateDiff) { CheckUdf, Nullable, Nullable>(func_name, 44924, "2022-12-31", "1900-01-01"); CheckUdf, Nullable, Nullable>(func_name, 50, "20220620", "2022-05-01 11:11:11"); - CheckUdf, Nullable, Nullable>(func_name, 0, "2022-05-01", "20220501"); + CheckUdf, Nullable, Nullable>(func_name, 0, + "2022-05-01", "20220501"); CheckUdf, Nullable, Nullable>(func_name, nullptr, "2022-02-29", "20220501"); - CheckUdf, Nullable, Nullable>(func_name, nullptr, "1899-05-20", - "2020-05-20"); + CheckUdf, Nullable, Nullable>(func_name, 9, "1899-05-20", "1899-05-11"); CheckUdf, Nullable, Nullable>(func_name, nullptr, "2022-05-40", "2020-05-20"); - CheckUdf, Nullable, Nullable>(func_name, nullptr, "2020-05-20", - "1899-05-20"); + CheckUdf, Nullable, Nullable>(func_name, -30, "1199-10-12", "1199-11-11"); + // rfc3399 full format + CheckUdf, Nullable, Nullable>( + func_name, 20, "2000-01-01t00:12:00.1+08:00", "1999-12-12T12:12:12+08:00"); + CheckUdf, Nullable, Nullable>( + func_name, 19, "2000-01-01t00:12:00.1+08:00", "1999-12-12T20:12:12Z"); + CheckUdf, Nullable, Nullable>( + func_name, 20, "2000-01-01t06:12:00.1+08:00", "1999-12-12T12:12:12Z"); + CheckUdf, Nullable, Nullable>(func_name, nullptr, nullptr, "20220501"); CheckUdf, Nullable, Nullable>(func_name, nullptr, "2022-05-01", nullptr); CheckUdf, Nullable, Nullable>(func_name, nullptr, nullptr, nullptr); @@ -1093,6 +1100,8 @@ TEST_F(UdfIRBuilderTest, DateDiff) { // mix types CheckUdf, Nullable, Nullable>(func_name, -19, "2022-05-01", Date(2022, 5, 20)); CheckUdf, Nullable, Nullable>(func_name, 19, Date(2022, 5, 20), "2022-05-01"); + CheckUdf, Nullable, Nullable>(func_name, 3, Date(1900, 1, 1), "1899-12-29"); + CheckUdf, Nullable, Nullable>(func_name, -3, "1899-12-29", Date(1900, 1, 1)); CheckUdf, Nullable, Nullable>(func_name, nullptr, nullptr, "2022-05-01"); CheckUdf, Nullable, Nullable>(func_name, nullptr, Date(2022, 5, 20), nullptr); CheckUdf, Nullable, Nullable>(func_name, nullptr, nullptr, nullptr); @@ -1101,6 +1110,29 @@ TEST_F(UdfIRBuilderTest, DateDiff) { CheckUdf, Nullable, Nullable>(func_name, nullptr, nullptr, nullptr); } +TEST_F(UdfIRBuilderTest, DateDiffNull) { + auto func_name = "datediff"; + + // out-of-range format + CheckUdf, Nullable, Nullable>(func_name, nullptr, "1900-01-00", + "1999-12-12T12:12:12Z"); + CheckUdf, Nullable, Nullable>(func_name, nullptr, "1977-13-01", + "1999-12-12T12:12:12Z"); + CheckUdf, Nullable, Nullable>(func_name, nullptr, "19771232", + "1999-12-12T12:12:12Z"); + CheckUdf, Nullable, Nullable>(func_name, nullptr, "1999-12-12T25:12:12Z", + "1999-12-12T12:12:12Z"); + CheckUdf, Nullable, Nullable>(func_name, nullptr, "1999-12-12T12:66:12Z", + "1999-12-12T12:12:12Z"); + CheckUdf, Nullable, Nullable>(func_name, nullptr, "1999-12-12T12:00:61Z", + "1999-12-12T12:12:12Z"); + + // invalid format + CheckUdf, Nullable, Nullable>(func_name, nullptr, "1999-12-12T12:12:12Z", + "202 2-12-2 9"); + CheckUdf, Nullable, Nullable>(func_name, nullptr, "1999-12-12T12:12:12Z", + "12:30:30"); +} class UdfIRCastTest : public ::testing::TestWithParam>> {}; diff --git a/hybridse/src/udf/default_udf_library.cc b/hybridse/src/udf/default_udf_library.cc index f2c9bc1afd8..8b98212fffb 100644 --- a/hybridse/src/udf/default_udf_library.cc +++ b/hybridse/src/udf/default_udf_library.cc @@ -2591,16 +2591,21 @@ void DefaultUdfLibrary::InitTimeAndDateUdf() { }); RegisterExternal("datediff") - .args(reinterpret_cast(static_cast(v1::date_diff))) - .return_by_arg(true) - .returns>() + .args(static_cast(v1::date_diff)) .doc(R"( @brief days difference from date1 to date2 Supported date string style: - yyyy-mm-dd - yyyymmdd - - yyyy-mm-dd hh:mm:ss + - yyyy-mm-dd HH:MM:SS + - yyyy-mm-ddTHH:MM:SS.fff+HH:MM (RFC3399 format) + + Dates from string are transformed into the same time zone (which is currently always UTC+8) before differentiation, + dates from date type by default is at UTC+8, you may see a +1/-1 difference if the two date string have different time zones. + + Hint: since openmldb date type limits range from year 1900, to datadiff from/to a date before + 1900, pass it as string. Example: @@ -2614,20 +2619,11 @@ void DefaultUdfLibrary::InitTimeAndDateUdf() { @endcode @since 0.7.0)"); RegisterExternal("datediff") - .args( - reinterpret_cast(static_cast(v1::date_diff))) - .return_by_arg(true) - .returns>(); + .args(static_cast(v1::date_diff)); RegisterExternal("datediff") - .args( - reinterpret_cast(static_cast(v1::date_diff))) - .return_by_arg(true) - .returns>(); + .args(static_cast(v1::date_diff)); RegisterExternal("datediff") - .args( - reinterpret_cast(static_cast(v1::date_diff))) - .return_by_arg(true) - .returns>(); + .args(static_cast(v1::date_diff)); RegisterExternal("unix_timestamp") .args(reinterpret_cast(static_cast(v1::date_to_unix_timestamp))) diff --git a/hybridse/src/udf/udf.cc b/hybridse/src/udf/udf.cc index c32a58b8adb..2ec7033472f 100644 --- a/hybridse/src/udf/udf.cc +++ b/hybridse/src/udf/udf.cc @@ -16,7 +16,6 @@ #include "udf/udf.h" -#include #include #include @@ -28,6 +27,7 @@ #include "absl/strings/ascii.h" #include "absl/strings/str_replace.h" #include "absl/time/civil_time.h" +#include "absl/time/time.h" #include "base/iterator.h" #include "boost/date_time.hpp" #include "boost/date_time/gregorian/parsers.hpp" @@ -37,7 +37,7 @@ #include "codec/row.h" #include "codec/type_codec.h" #include "codegen/fn_ir_builder.h" -#include "farmhash.h" // NOLINT +#include "farmhash.h" #include "node/node_manager.h" #include "node/sql_node.h" #include "re2/re2.h" @@ -57,6 +57,20 @@ using openmldb::base::StringRef; using openmldb::base::Timestamp; using openmldb::base::Date; +// strftime()-like formatting options with extensions +// ref absl::FormatTime +static constexpr char DATE_FMT_YMD_1[] = "%E4Y-%m-%d"; +static constexpr char DATE_FMT_YMD_2[] = "%E4Y%m%d"; +static constexpr char DATE_FMT_YMDHMS[] = "%E4Y-%m-%d %H:%M:%S"; +static constexpr char DATE_FMT_RF3399_FULL[] = "%Y-%m-%d%ET%H:%M:%E*S%Ez"; + +// TODO(chenjing): 时区统一配置 +static constexpr int32_t TZ = 8; +static const absl::TimeZone DEFAULT_TZ = absl::FixedTimeZone(TZ * 60 * 60); +static constexpr time_t TZ_OFFSET = TZ * 3600000; +static constexpr int MAX_ALLOC_SIZE = 2 * 1024 * 1024; // 2M +bthread_key_t B_THREAD_LOCAL_MEM_POOL_KEY; + void hex(StringRef *str, StringRef *output) { std::ostringstream ss; for (uint32_t i=0; i < str->size_; i++) { @@ -104,12 +118,6 @@ void unhex(StringRef *str, StringRef *output, bool* is_null) { } } -// TODO(chenjing): 时区统一配置 -constexpr int32_t TZ = 8; -constexpr time_t TZ_OFFSET = TZ * 3600000; -constexpr int MAX_ALLOC_SIZE = 2 * 1024 * 1024; // 2M -bthread_key_t B_THREAD_LOCAL_MEM_POOL_KEY; - void trivial_fun() {} void dayofyear(int64_t ts, int32_t* out, bool* is_null) { @@ -818,7 +826,26 @@ void string_to_date(StringRef *str, Date *output, return; } -void date_diff(Date *date1, Date *date2, int *diff, bool *is_null) { +absl::StatusOr string_to_time(absl::string_view ref) { + absl::string_view fmt = DATE_FMT_RF3399_FULL; + if (19 == ref.size()) { + fmt = DATE_FMT_YMDHMS; + } else if (10 == ref.size()) { + fmt = DATE_FMT_YMD_1; + } else if (8 == ref.size()) { + fmt = DATE_FMT_YMD_2; + } + absl::Time tm; + std::string err; + bool ret = absl::ParseTime(fmt, ref, &tm, &err); + + if (!ret) { + return absl::InvalidArgumentError(err); + } + return tm; +} + +void date_diff(Date *date1, Date *date2, int32_t *diff, bool *is_null) { if (date1 == nullptr || date2 == nullptr || date1->date_ <= 0 || date2->date_ <= 0) { *is_null = true; return; @@ -838,36 +865,61 @@ void date_diff(Date *date1, Date *date2, int *diff, bool *is_null) { *is_null = false; } -void date_diff(StringRef *date1, StringRef *date2, int *diff, bool *is_null) { - Date d1; - string_to_date(date1, &d1, is_null); - if (*is_null) { +void date_diff(StringRef *date1, StringRef *date2, int32_t *diff, bool *is_null) { + auto t1 = string_to_time(absl::string_view(date1->data_, date1->size_)); + if (!t1.ok()) { + *is_null = true; return; } - Date d2; - string_to_date(date2, &d2, is_null); - if (*is_null) { + auto t2 = string_to_time(absl::string_view(date2->data_, date2->size_)); + if (!t2.ok()) { + *is_null = true; return; } - date_diff(&d1, &d2, diff, is_null); + + auto d1 = absl::ToCivilDay(t1.value(), DEFAULT_TZ); + auto d2 = absl::ToCivilDay(t2.value(), DEFAULT_TZ); + + *diff = d1 - d2; + *is_null = false; } -void date_diff(StringRef *date1, Date *date2, int *diff, bool *is_null) { - Date d1; - string_to_date(date1, &d1, is_null); - if (*is_null) { +void date_diff(StringRef *date1, Date *date2, int32_t *diff, bool *is_null) { + auto t1 = string_to_time(absl::string_view(date1->data_, date1->size_)); + if (!t1.ok()) { + *is_null = true; return; } - date_diff(&d1, date2, diff, is_null); + auto d1 = absl::ToCivilDay(t1.value(), DEFAULT_TZ); + + int32_t year, month, day; + if (!Date::Decode(date2->date_, &year, &month, &day)) { + *is_null = true; + return; + } + auto d2 = absl::CivilDay(year, month, day); + + *diff = d1 - d2; + *is_null = false; } -void date_diff(Date *date1, StringRef *date2, int *diff, bool *is_null) { - Date d2; - string_to_date(date2, &d2, is_null); - if (*is_null) { +void date_diff(Date *date1, StringRef *date2, int32_t *diff, bool *is_null) { + auto t2 = string_to_time(absl::string_view(date2->data_, date2->size_)); + if (!t2.ok()) { + *is_null = true; return; } - date_diff(date1, &d2, diff, is_null); + auto d2 = absl::ToCivilDay(t2.value(), DEFAULT_TZ); + + int32_t year, month, day; + if (!Date::Decode(date1->date_, &year, &month, &day)) { + *is_null = true; + return; + } + auto d1 = absl::CivilDay(year, month, day); + + *diff = d1 - d2; + *is_null = false; } // cast string to timestamp with yyyy-mm-dd or YYYY-mm-dd HH:MM:SS diff --git a/hybridse/src/udf/udf.h b/hybridse/src/udf/udf.h index c2f2d2dc6f0..a761e99f88b 100644 --- a/hybridse/src/udf/udf.h +++ b/hybridse/src/udf/udf.h @@ -367,10 +367,10 @@ void timestamp_to_date(Timestamp *timestamp, Date *output, bool *is_null); void date_to_string(Date *date, StringRef *output); -void date_diff(Date *date1, Date *date2, int *diff, bool *is_null); -void date_diff(StringRef *date1, StringRef *date2, int *diff, bool *is_null); -void date_diff(StringRef *date1, Date *date2, int *diff, bool *is_null); -void date_diff(Date *date1, StringRef *date2, int *diff, bool *is_null); +void date_diff(Date *date1, Date *date2, int32_t *diff, bool *is_null); +void date_diff(StringRef *date1, StringRef *date2, int32_t *diff, bool *is_null); +void date_diff(StringRef *date1, Date *date2, int32_t *diff, bool *is_null); +void date_diff(Date *date1, StringRef *date2, int32_t *diff, bool *is_null); void like(StringRef *name, StringRef *pattern, StringRef *escape, bool *out, bool *is_null); @@ -384,6 +384,8 @@ void regexp_like(StringRef *name, StringRef *pattern, bool *out, bool *is_null); void date_to_timestamp(Date *date, Timestamp *output, bool *is_null); void string_to_date(StringRef *str, Date *output, bool *is_null); +absl::StatusOr string_to_time(absl::string_view str); + void string_to_timestamp(StringRef *str, Timestamp *output, bool *is_null); void date_to_unix_timestamp(Date *date, int64_t *output, bool *is_null); void string_to_unix_timestamp(StringRef *str, int64_t *output, bool *is_null);