Skip to content

Commit

Permalink
feat(udf): support datediff dates before 1900 (#3499)
Browse files Browse the repository at this point in the history
  • Loading branch information
aceforeverd authored Oct 7, 2023
1 parent 9190ecf commit 116fbf5
Show file tree
Hide file tree
Showing 4 changed files with 134 additions and 52 deletions.
42 changes: 37 additions & 5 deletions hybridse/src/codegen/udf_ir_builder_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1078,21 +1078,30 @@ TEST_F(UdfIRBuilderTest, DateDiff) {
CheckUdf<Nullable<int32_t>, Nullable<StringRef>, Nullable<StringRef>>(func_name, 44924, "2022-12-31", "1900-01-01");
CheckUdf<Nullable<int32_t>, Nullable<StringRef>, Nullable<StringRef>>(func_name, 50, "20220620",
"2022-05-01 11:11:11");
CheckUdf<Nullable<int32_t>, Nullable<StringRef>, Nullable<StringRef>>(func_name, 0, "2022-05-01", "20220501");
CheckUdf<Nullable<int32_t>, Nullable<StringRef>, Nullable<StringRef>>(func_name, 0,
"2022-05-01", "20220501");
CheckUdf<Nullable<int32_t>, Nullable<StringRef>, Nullable<StringRef>>(func_name, nullptr, "2022-02-29", "20220501");
CheckUdf<Nullable<int32_t>, Nullable<StringRef>, Nullable<StringRef>>(func_name, nullptr, "1899-05-20",
"2020-05-20");
CheckUdf<Nullable<int32_t>, Nullable<StringRef>, Nullable<StringRef>>(func_name, 9, "1899-05-20", "1899-05-11");
CheckUdf<Nullable<int32_t>, Nullable<StringRef>, Nullable<StringRef>>(func_name, nullptr, "2022-05-40",
"2020-05-20");
CheckUdf<Nullable<int32_t>, Nullable<StringRef>, Nullable<StringRef>>(func_name, nullptr, "2020-05-20",
"1899-05-20");
CheckUdf<Nullable<int32_t>, Nullable<StringRef>, Nullable<StringRef>>(func_name, -30, "1199-10-12", "1199-11-11");
// rfc3399 full format
CheckUdf<Nullable<int32_t>, Nullable<StringRef>, Nullable<StringRef>>(
func_name, 20, "2000-01-01t00:12:00.1+08:00", "1999-12-12T12:12:12+08:00");
CheckUdf<Nullable<int32_t>, Nullable<StringRef>, Nullable<StringRef>>(
func_name, 19, "2000-01-01t00:12:00.1+08:00", "1999-12-12T20:12:12Z");
CheckUdf<Nullable<int32_t>, Nullable<StringRef>, Nullable<StringRef>>(
func_name, 20, "2000-01-01t06:12:00.1+08:00", "1999-12-12T12:12:12Z");

CheckUdf<Nullable<int32_t>, Nullable<StringRef>, Nullable<StringRef>>(func_name, nullptr, nullptr, "20220501");
CheckUdf<Nullable<int32_t>, Nullable<StringRef>, Nullable<StringRef>>(func_name, nullptr, "2022-05-01", nullptr);
CheckUdf<Nullable<int32_t>, Nullable<StringRef>, Nullable<StringRef>>(func_name, nullptr, nullptr, nullptr);

// mix types
CheckUdf<Nullable<int32_t>, Nullable<StringRef>, Nullable<Date>>(func_name, -19, "2022-05-01", Date(2022, 5, 20));
CheckUdf<Nullable<int32_t>, Nullable<Date>, Nullable<StringRef>>(func_name, 19, Date(2022, 5, 20), "2022-05-01");
CheckUdf<Nullable<int32_t>, Nullable<Date>, Nullable<StringRef>>(func_name, 3, Date(1900, 1, 1), "1899-12-29");
CheckUdf<Nullable<int32_t>, Nullable<StringRef>, Nullable<Date>>(func_name, -3, "1899-12-29", Date(1900, 1, 1));
CheckUdf<Nullable<int32_t>, Nullable<Date>, Nullable<StringRef>>(func_name, nullptr, nullptr, "2022-05-01");
CheckUdf<Nullable<int32_t>, Nullable<Date>, Nullable<StringRef>>(func_name, nullptr, Date(2022, 5, 20), nullptr);
CheckUdf<Nullable<int32_t>, Nullable<Date>, Nullable<StringRef>>(func_name, nullptr, nullptr, nullptr);
Expand All @@ -1101,6 +1110,29 @@ TEST_F(UdfIRBuilderTest, DateDiff) {
CheckUdf<Nullable<int32_t>, Nullable<StringRef>, Nullable<Date>>(func_name, nullptr, nullptr, nullptr);
}

TEST_F(UdfIRBuilderTest, DateDiffNull) {
auto func_name = "datediff";

// out-of-range format
CheckUdf<Nullable<int32_t>, Nullable<StringRef>, Nullable<StringRef>>(func_name, nullptr, "1900-01-00",
"1999-12-12T12:12:12Z");
CheckUdf<Nullable<int32_t>, Nullable<StringRef>, Nullable<StringRef>>(func_name, nullptr, "1977-13-01",
"1999-12-12T12:12:12Z");
CheckUdf<Nullable<int32_t>, Nullable<StringRef>, Nullable<StringRef>>(func_name, nullptr, "19771232",
"1999-12-12T12:12:12Z");
CheckUdf<Nullable<int32_t>, Nullable<StringRef>, Nullable<StringRef>>(func_name, nullptr, "1999-12-12T25:12:12Z",
"1999-12-12T12:12:12Z");
CheckUdf<Nullable<int32_t>, Nullable<StringRef>, Nullable<StringRef>>(func_name, nullptr, "1999-12-12T12:66:12Z",
"1999-12-12T12:12:12Z");
CheckUdf<Nullable<int32_t>, Nullable<StringRef>, Nullable<StringRef>>(func_name, nullptr, "1999-12-12T12:00:61Z",
"1999-12-12T12:12:12Z");

// invalid format
CheckUdf<Nullable<int32_t>, Nullable<StringRef>, Nullable<StringRef>>(func_name, nullptr, "1999-12-12T12:12:12Z",
"202 2-12-2 9");
CheckUdf<Nullable<int32_t>, Nullable<StringRef>, Nullable<StringRef>>(func_name, nullptr, "1999-12-12T12:12:12Z",
"12:30:30");
}

class UdfIRCastTest : public ::testing::TestWithParam<std::pair<absl::string_view, Nullable<int64_t>>> {};

Expand Down
28 changes: 12 additions & 16 deletions hybridse/src/udf/default_udf_library.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2591,16 +2591,21 @@ void DefaultUdfLibrary::InitTimeAndDateUdf() {
});

RegisterExternal("datediff")
.args<Date, Date>(reinterpret_cast<void*>(static_cast<void (*)(Date*, Date*, int32_t*, bool*)>(v1::date_diff)))
.return_by_arg(true)
.returns<Nullable<int32_t>>()
.args<Date, Date>(static_cast<void (*)(Date*, Date*, int32_t*, bool*)>(v1::date_diff))
.doc(R"(
@brief days difference from date1 to date2
Supported date string style:
- yyyy-mm-dd
- yyyymmdd
- yyyy-mm-dd hh:mm:ss
- yyyy-mm-dd HH:MM:SS
- yyyy-mm-ddTHH:MM:SS.fff+HH:MM (RFC3399 format)
Dates from string are transformed into the same time zone (which is currently always UTC+8) before differentiation,
dates from date type by default is at UTC+8, you may see a +1/-1 difference if the two date string have different time zones.
Hint: since openmldb date type limits range from year 1900, to datadiff from/to a date before
1900, pass it as string.
Example:
Expand All @@ -2614,20 +2619,11 @@ void DefaultUdfLibrary::InitTimeAndDateUdf() {
@endcode
@since 0.7.0)");
RegisterExternal("datediff")
.args<StringRef, StringRef>(
reinterpret_cast<void*>(static_cast<void (*)(StringRef*, StringRef*, int32_t*, bool*)>(v1::date_diff)))
.return_by_arg(true)
.returns<Nullable<int32_t>>();
.args<StringRef, StringRef>(static_cast<void (*)(StringRef*, StringRef*, int32_t*, bool*)>(v1::date_diff));
RegisterExternal("datediff")
.args<StringRef, Date>(
reinterpret_cast<void*>(static_cast<void (*)(StringRef*, Date*, int32_t*, bool*)>(v1::date_diff)))
.return_by_arg(true)
.returns<Nullable<int32_t>>();
.args<StringRef, Date>(static_cast<void (*)(StringRef*, Date*, int32_t*, bool*)>(v1::date_diff));
RegisterExternal("datediff")
.args<Date, StringRef>(
reinterpret_cast<void*>(static_cast<void (*)(Date*, StringRef*, int32_t*, bool*)>(v1::date_diff)))
.return_by_arg(true)
.returns<Nullable<int32_t>>();
.args<Date, StringRef>(static_cast<void (*)(Date*, StringRef*, int32_t*, bool*)>(v1::date_diff));

RegisterExternal("unix_timestamp")
.args<Date>(reinterpret_cast<void*>(static_cast<void (*)(Date*, int64_t*, bool*)>(v1::date_to_unix_timestamp)))
Expand Down
106 changes: 79 additions & 27 deletions hybridse/src/udf/udf.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

#include "udf/udf.h"

#include <absl/time/time.h>
#include <stdint.h>
#include <time.h>

Expand All @@ -28,6 +27,7 @@
#include "absl/strings/ascii.h"
#include "absl/strings/str_replace.h"
#include "absl/time/civil_time.h"
#include "absl/time/time.h"
#include "base/iterator.h"
#include "boost/date_time.hpp"
#include "boost/date_time/gregorian/parsers.hpp"
Expand All @@ -37,7 +37,7 @@
#include "codec/row.h"
#include "codec/type_codec.h"
#include "codegen/fn_ir_builder.h"
#include "farmhash.h" // NOLINT
#include "farmhash.h"
#include "node/node_manager.h"
#include "node/sql_node.h"
#include "re2/re2.h"
Expand All @@ -57,6 +57,20 @@ using openmldb::base::StringRef;
using openmldb::base::Timestamp;
using openmldb::base::Date;

// strftime()-like formatting options with extensions
// ref absl::FormatTime
static constexpr char DATE_FMT_YMD_1[] = "%E4Y-%m-%d";
static constexpr char DATE_FMT_YMD_2[] = "%E4Y%m%d";
static constexpr char DATE_FMT_YMDHMS[] = "%E4Y-%m-%d %H:%M:%S";
static constexpr char DATE_FMT_RF3399_FULL[] = "%Y-%m-%d%ET%H:%M:%E*S%Ez";

// TODO(chenjing): 时区统一配置
static constexpr int32_t TZ = 8;
static const absl::TimeZone DEFAULT_TZ = absl::FixedTimeZone(TZ * 60 * 60);
static constexpr time_t TZ_OFFSET = TZ * 3600000;
static constexpr int MAX_ALLOC_SIZE = 2 * 1024 * 1024; // 2M
bthread_key_t B_THREAD_LOCAL_MEM_POOL_KEY;

void hex(StringRef *str, StringRef *output) {
std::ostringstream ss;
for (uint32_t i=0; i < str->size_; i++) {
Expand Down Expand Up @@ -104,12 +118,6 @@ void unhex(StringRef *str, StringRef *output, bool* is_null) {
}
}

// TODO(chenjing): 时区统一配置
constexpr int32_t TZ = 8;
constexpr time_t TZ_OFFSET = TZ * 3600000;
constexpr int MAX_ALLOC_SIZE = 2 * 1024 * 1024; // 2M
bthread_key_t B_THREAD_LOCAL_MEM_POOL_KEY;

void trivial_fun() {}

void dayofyear(int64_t ts, int32_t* out, bool* is_null) {
Expand Down Expand Up @@ -818,7 +826,26 @@ void string_to_date(StringRef *str, Date *output,
return;
}

void date_diff(Date *date1, Date *date2, int *diff, bool *is_null) {
absl::StatusOr<absl::Time> string_to_time(absl::string_view ref) {
absl::string_view fmt = DATE_FMT_RF3399_FULL;
if (19 == ref.size()) {
fmt = DATE_FMT_YMDHMS;
} else if (10 == ref.size()) {
fmt = DATE_FMT_YMD_1;
} else if (8 == ref.size()) {
fmt = DATE_FMT_YMD_2;
}
absl::Time tm;
std::string err;
bool ret = absl::ParseTime(fmt, ref, &tm, &err);

if (!ret) {
return absl::InvalidArgumentError(err);
}
return tm;
}

void date_diff(Date *date1, Date *date2, int32_t *diff, bool *is_null) {
if (date1 == nullptr || date2 == nullptr || date1->date_ <= 0 || date2->date_ <= 0) {
*is_null = true;
return;
Expand All @@ -838,36 +865,61 @@ void date_diff(Date *date1, Date *date2, int *diff, bool *is_null) {
*is_null = false;
}

void date_diff(StringRef *date1, StringRef *date2, int *diff, bool *is_null) {
Date d1;
string_to_date(date1, &d1, is_null);
if (*is_null) {
void date_diff(StringRef *date1, StringRef *date2, int32_t *diff, bool *is_null) {
auto t1 = string_to_time(absl::string_view(date1->data_, date1->size_));
if (!t1.ok()) {
*is_null = true;
return;
}
Date d2;
string_to_date(date2, &d2, is_null);
if (*is_null) {
auto t2 = string_to_time(absl::string_view(date2->data_, date2->size_));
if (!t2.ok()) {
*is_null = true;
return;
}
date_diff(&d1, &d2, diff, is_null);

auto d1 = absl::ToCivilDay(t1.value(), DEFAULT_TZ);
auto d2 = absl::ToCivilDay(t2.value(), DEFAULT_TZ);

*diff = d1 - d2;
*is_null = false;
}

void date_diff(StringRef *date1, Date *date2, int *diff, bool *is_null) {
Date d1;
string_to_date(date1, &d1, is_null);
if (*is_null) {
void date_diff(StringRef *date1, Date *date2, int32_t *diff, bool *is_null) {
auto t1 = string_to_time(absl::string_view(date1->data_, date1->size_));
if (!t1.ok()) {
*is_null = true;
return;
}
date_diff(&d1, date2, diff, is_null);
auto d1 = absl::ToCivilDay(t1.value(), DEFAULT_TZ);

int32_t year, month, day;
if (!Date::Decode(date2->date_, &year, &month, &day)) {
*is_null = true;
return;
}
auto d2 = absl::CivilDay(year, month, day);

*diff = d1 - d2;
*is_null = false;
}

void date_diff(Date *date1, StringRef *date2, int *diff, bool *is_null) {
Date d2;
string_to_date(date2, &d2, is_null);
if (*is_null) {
void date_diff(Date *date1, StringRef *date2, int32_t *diff, bool *is_null) {
auto t2 = string_to_time(absl::string_view(date2->data_, date2->size_));
if (!t2.ok()) {
*is_null = true;
return;
}
date_diff(date1, &d2, diff, is_null);
auto d2 = absl::ToCivilDay(t2.value(), DEFAULT_TZ);

int32_t year, month, day;
if (!Date::Decode(date1->date_, &year, &month, &day)) {
*is_null = true;
return;
}
auto d1 = absl::CivilDay(year, month, day);

*diff = d1 - d2;
*is_null = false;
}

// cast string to timestamp with yyyy-mm-dd or YYYY-mm-dd HH:MM:SS
Expand Down
10 changes: 6 additions & 4 deletions hybridse/src/udf/udf.h
Original file line number Diff line number Diff line change
Expand Up @@ -367,10 +367,10 @@ void timestamp_to_date(Timestamp *timestamp, Date *output, bool *is_null);

void date_to_string(Date *date, StringRef *output);

void date_diff(Date *date1, Date *date2, int *diff, bool *is_null);
void date_diff(StringRef *date1, StringRef *date2, int *diff, bool *is_null);
void date_diff(StringRef *date1, Date *date2, int *diff, bool *is_null);
void date_diff(Date *date1, StringRef *date2, int *diff, bool *is_null);
void date_diff(Date *date1, Date *date2, int32_t *diff, bool *is_null);
void date_diff(StringRef *date1, StringRef *date2, int32_t *diff, bool *is_null);
void date_diff(StringRef *date1, Date *date2, int32_t *diff, bool *is_null);
void date_diff(Date *date1, StringRef *date2, int32_t *diff, bool *is_null);

void like(StringRef *name, StringRef *pattern,
StringRef *escape, bool *out, bool *is_null);
Expand All @@ -384,6 +384,8 @@ void regexp_like(StringRef *name, StringRef *pattern, bool *out, bool *is_null);

void date_to_timestamp(Date *date, Timestamp *output, bool *is_null);
void string_to_date(StringRef *str, Date *output, bool *is_null);
absl::StatusOr<absl::Time> string_to_time(absl::string_view str);

void string_to_timestamp(StringRef *str, Timestamp *output, bool *is_null);
void date_to_unix_timestamp(Date *date, int64_t *output, bool *is_null);
void string_to_unix_timestamp(StringRef *str, int64_t *output, bool *is_null);
Expand Down

0 comments on commit 116fbf5

Please sign in to comment.