From 13af384a8784a04f9574c585b4523c40495e515b Mon Sep 17 00:00:00 2001 From: Rollie Ma Date: Sun, 6 Jun 2021 00:45:12 -0700 Subject: [PATCH] dateparser: support more formats + parsing on a specific timezone (#13) - add more internal parsing functions for more formats - add another function to parse with a specific timezone instead of the default local timezone - create a new parse struct and move all the internal parsing functions into the struct's impl block as methods --- Cargo.lock | 4 +- Makefile | 7 + README.md | 145 +++- belt/Cargo.toml | 2 +- dateparser/Cargo.toml | 2 +- dateparser/README.md | 171 +++- dateparser/benches/parse.rs | 55 +- dateparser/src/datetime.rs | 1595 +++++++++++++++++++++++++++++++++++ dateparser/src/lib.rs | 1216 ++++++++++++-------------- dateparser/src/timezone.rs | 23 +- 10 files changed, 2421 insertions(+), 799 deletions(-) create mode 100644 dateparser/src/datetime.rs diff --git a/Cargo.lock b/Cargo.lock index d9f2927..5dfc53e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -54,7 +54,7 @@ checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd" [[package]] name = "belt" -version = "0.1.3" +version = "0.1.4" dependencies = [ "anyhow", "chrono", @@ -331,7 +331,7 @@ dependencies = [ [[package]] name = "dateparser" -version = "0.1.3" +version = "0.1.4" dependencies = [ "anyhow", "chrono", diff --git a/Makefile b/Makefile index a501757..2aa2d46 100644 --- a/Makefile +++ b/Makefile @@ -69,6 +69,13 @@ release: cross checksum: shasum -a 256 target/package/$(APP)-$(VERSION)-*.tar.gz > target/package/$(APP)-$(VERSION)-checksums.txt +version: + @grep -rn --color \ + --exclude-dir ./target \ + --exclude-dir ./.git \ + --exclude Cargo.lock \ + --fixed-strings '$(VERSION)' . + .PHONY: publish publish: cargo publish --manifest-path dateparser/Cargo.toml diff --git a/README.md b/README.md index 257bb70..9ec7fd8 100644 --- a/README.md +++ b/README.md @@ -93,29 +93,124 @@ Date parsing in belt is powered by `dateparser` crate, which is [a part of this Date string in the following formats can be parsed by `belt`: -``` -1511648546 -1620021848429 -1620024872717915000 -2021-05-01T01:17:02.604456Z -2017-11-25T22:34:50Z -Wed, 02 Jun 2021 06:31:39 GMT -2019-11-29 08:08:05-08 -2021-05-02 23:31:36.0741-07 -2021-05-02 23:31:39.12689-07 -2019-11-29 08:15:47.624504-08 -2021-04-30 21:14:10 -2021-04-30 21:14:10.052282 -2017-11-25 13:31:15 PST -2017-11-25 13:31 PST -2021-02-21 -2021-02-21 PST -01:06:06 -4:00pm -6:00 AM -01:06:06 PST -4:00pm PST -6:00 AM PST -May 02, 2021 15:51:31 UTC -May 02, 2021 15:51 UTC +```rust +// unix timestamp +"1511648546", +"1620021848429", +"1620024872717915000", +// rfc3339 +"2021-05-01T01:17:02.604456Z", +"2017-11-25T22:34:50Z", +// rfc2822 +"Wed, 02 Jun 2021 06:31:39 GMT", +// postgres timestamp yyyy-mm-dd hh:mm:ss z +"2019-11-29 08:08-08", +"2019-11-29 08:08:05-08", +"2021-05-02 23:31:36.0741-07", +"2021-05-02 23:31:39.12689-07", +"2019-11-29 08:15:47.624504-08", +"2017-07-19 03:21:51+00:00", +// yyyy-mm-dd hh:mm:ss +"2014-04-26 05:24:37 PM", +"2021-04-30 21:14", +"2021-04-30 21:14:10", +"2021-04-30 21:14:10.052282", +"2014-04-26 17:24:37.123", +"2014-04-26 17:24:37.3186369", +"2012-08-03 18:31:59.257000000", +// yyyy-mm-dd hh:mm:ss z +"2017-11-25 13:31:15 PST", +"2017-11-25 13:31 PST", +"2014-12-16 06:20:00 UTC", +"2014-12-16 06:20:00 GMT", +"2014-04-26 13:13:43 +0800", +"2014-04-26 13:13:44 +09:00", +"2012-08-03 18:31:59.257000000 +0000", +"2015-09-30 18:48:56.35272715 UTC", +// yyyy-mm-dd +"2021-02-21", +// yyyy-mm-dd z +"2021-02-21 PST", +"2021-02-21 UTC", +"2020-07-20+08:00", +// hh:mm:ss +"01:06:06", +"4:00pm", +"6:00 AM", +// hh:mm:ss z +"01:06:06 PST", +"4:00pm PST", +"6:00 AM PST", +"6:00pm UTC", +// Mon dd hh:mm:ss +"May 6 at 9:24 PM", +"May 27 02:45:27", +// Mon dd, yyyy, hh:mm:ss +"May 8, 2009 5:57:51 PM", +"September 17, 2012 10:09am", +"September 17, 2012, 10:10:09", +// Mon dd, yyyy hh:mm:ss z +"May 02, 2021 15:51:31 UTC", +"May 02, 2021 15:51 UTC", +"May 26, 2021, 12:49 AM PDT", +"September 17, 2012 at 10:09am PST", +// yyyy-mon-dd +"2021-Feb-21", +// Mon dd, yyyy +"May 25, 2021", +"oct 7, 1970", +"oct 7, 70", +"oct. 7, 1970", +"oct. 7, 70", +"October 7, 1970", +// dd Mon yyyy hh:mm:ss +"12 Feb 2006, 19:17", +"12 Feb 2006 19:17", +"14 May 2019 19:11:40.164", +// dd Mon yyyy +"7 oct 70", +"7 oct 1970", +"03 February 2013", +"1 July 2013", +// mm/dd/yyyy hh:mm:ss +"4/8/2014 22:05", +"04/08/2014 22:05", +"4/8/14 22:05", +"04/2/2014 03:00:51", +"8/8/1965 12:00:00 AM", +"8/8/1965 01:00:01 PM", +"8/8/1965 01:00 PM", +"8/8/1965 1:00 PM", +"8/8/1965 12:00 AM", +"4/02/2014 03:00:51", +"03/19/2012 10:11:59", +"03/19/2012 10:11:59.3186369", +// mm/dd/yyyy +"3/31/2014", +"03/31/2014", +"08/21/71", +"8/1/71", +// yyyy/mm/dd hh:mm:ss +"2014/4/8 22:05", +"2014/04/08 22:05", +"2014/04/2 03:00:51", +"2014/4/02 03:00:51", +"2012/03/19 10:11:59", +"2012/03/19 10:11:59.3186369", +// yyyy/mm/dd +"2014/3/31", +"2014/03/31", +// mm.dd.yyyy +"3.31.2014", +"03.31.2014", +"08.21.71", +// yyyy.mm.dd +"2014.03.30", +"2014.03", +// yymmdd hh:mm:ss mysql log +"171113 14:14:20", +// chinese yyyy mm dd hh mm ss +"2014年04月08日11时25分18秒", +// chinese yyyy mm dd +"2014年04月08日", ``` diff --git a/belt/Cargo.toml b/belt/Cargo.toml index 97de66e..41bf6eb 100644 --- a/belt/Cargo.toml +++ b/belt/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "belt" -version = "0.1.3" +version = "0.1.4" authors = ["Rollie Ma "] edition = "2018" publish = false diff --git a/dateparser/Cargo.toml b/dateparser/Cargo.toml index 1c57dc3..65e86d7 100644 --- a/dateparser/Cargo.toml +++ b/dateparser/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "dateparser" -version = "0.1.3" +version = "0.1.4" authors = ["Rollie Ma "] description = "Parse dates in string formats that are commonly used" readme = "README.md" diff --git a/dateparser/README.md b/dateparser/README.md index 79913a6..79e34f7 100644 --- a/dateparser/README.md +++ b/dateparser/README.md @@ -23,7 +23,7 @@ Add to your `Cargo.toml`: ```toml [dependencies] -dateparser = "0.1.3" +dateparser = "0.1.4" ``` And then use `dateparser` in your code: @@ -57,7 +57,7 @@ Convert returned `DateTime` to pacific time zone datetime with `chrono-tz`: ```toml [dependencies] chrono-tz = "0.5.3" -dateparser = "0.1.3" +dateparser = "0.1.4" ``` ```rust @@ -72,31 +72,148 @@ fn main() -> Result<(), Box> { } ``` -## Accepted date formats +Parse using a custom timezone offset for a datetime string that doesn't come with a specific timezone: + +```rust +use dateparser::parse_with_timezone; +use chrono::offset::{Local, Utc}; +use chrono_tz::US::Pacific; +use std::error::Error; + +fn main() -> Result<(), Box> { + let parsed_in_local = parse_with_timezone("6:15pm", &Local)?; + println!("{:#?}" parsed_in_local); + + let parsed_in_utc = parse_with_timezone("6:15pm", &Utc)?; + println!("{:#?}" parsed_in_utc); + let parsed_in_pacific = parse_with_timezone("6:15pm", &Pacific)?; + println!("{:#?}" parsed_in_pacific); + + Ok(()) +} ``` -1511648546 -1620021848429 -1620024872717915000 -2021-05-01T01:17:02.604456Z -2017-11-25T22:34:50Z -Wed, 02 Jun 2021 06:31:39 GMT -2019-11-29 08:08:05-08 -2021-05-02 23:31:36.0741-07 -2021-05-02 23:31:39.12689-07 -2019-11-29 08:15:47.624504-08 -2021-04-30 21:14:10 -2021-04-30 21:14:10.052282 -2017-11-25 13:31:15 PST -2017-11-25 13:31 PST -2021-02-21 -2021-02-21 PST -01:06:06 -4:00pm -6:00 AM -01:06:06 PST -4:00pm PST -6:00 AM PST -May 02, 2021 15:51:31 UTC -May 02, 2021 15:51 UTC + +## Accepted date formats + +```rust +// unix timestamp +"1511648546", +"1620021848429", +"1620024872717915000", +// rfc3339 +"2021-05-01T01:17:02.604456Z", +"2017-11-25T22:34:50Z", +// rfc2822 +"Wed, 02 Jun 2021 06:31:39 GMT", +// postgres timestamp yyyy-mm-dd hh:mm:ss z +"2019-11-29 08:08-08", +"2019-11-29 08:08:05-08", +"2021-05-02 23:31:36.0741-07", +"2021-05-02 23:31:39.12689-07", +"2019-11-29 08:15:47.624504-08", +"2017-07-19 03:21:51+00:00", +// yyyy-mm-dd hh:mm:ss +"2014-04-26 05:24:37 PM", +"2021-04-30 21:14", +"2021-04-30 21:14:10", +"2021-04-30 21:14:10.052282", +"2014-04-26 17:24:37.123", +"2014-04-26 17:24:37.3186369", +"2012-08-03 18:31:59.257000000", +// yyyy-mm-dd hh:mm:ss z +"2017-11-25 13:31:15 PST", +"2017-11-25 13:31 PST", +"2014-12-16 06:20:00 UTC", +"2014-12-16 06:20:00 GMT", +"2014-04-26 13:13:43 +0800", +"2014-04-26 13:13:44 +09:00", +"2012-08-03 18:31:59.257000000 +0000", +"2015-09-30 18:48:56.35272715 UTC", +// yyyy-mm-dd +"2021-02-21", +// yyyy-mm-dd z +"2021-02-21 PST", +"2021-02-21 UTC", +"2020-07-20+08:00", +// hh:mm:ss +"01:06:06", +"4:00pm", +"6:00 AM", +// hh:mm:ss z +"01:06:06 PST", +"4:00pm PST", +"6:00 AM PST", +"6:00pm UTC", +// Mon dd hh:mm:ss +"May 6 at 9:24 PM", +"May 27 02:45:27", +// Mon dd, yyyy, hh:mm:ss +"May 8, 2009 5:57:51 PM", +"September 17, 2012 10:09am", +"September 17, 2012, 10:10:09", +// Mon dd, yyyy hh:mm:ss z +"May 02, 2021 15:51:31 UTC", +"May 02, 2021 15:51 UTC", +"May 26, 2021, 12:49 AM PDT", +"September 17, 2012 at 10:09am PST", +// yyyy-mon-dd +"2021-Feb-21", +// Mon dd, yyyy +"May 25, 2021", +"oct 7, 1970", +"oct 7, 70", +"oct. 7, 1970", +"oct. 7, 70", +"October 7, 1970", +// dd Mon yyyy hh:mm:ss +"12 Feb 2006, 19:17", +"12 Feb 2006 19:17", +"14 May 2019 19:11:40.164", +// dd Mon yyyy +"7 oct 70", +"7 oct 1970", +"03 February 2013", +"1 July 2013", +// mm/dd/yyyy hh:mm:ss +"4/8/2014 22:05", +"04/08/2014 22:05", +"4/8/14 22:05", +"04/2/2014 03:00:51", +"8/8/1965 12:00:00 AM", +"8/8/1965 01:00:01 PM", +"8/8/1965 01:00 PM", +"8/8/1965 1:00 PM", +"8/8/1965 12:00 AM", +"4/02/2014 03:00:51", +"03/19/2012 10:11:59", +"03/19/2012 10:11:59.3186369", +// mm/dd/yyyy +"3/31/2014", +"03/31/2014", +"08/21/71", +"8/1/71", +// yyyy/mm/dd hh:mm:ss +"2014/4/8 22:05", +"2014/04/08 22:05", +"2014/04/2 03:00:51", +"2014/4/02 03:00:51", +"2012/03/19 10:11:59", +"2012/03/19 10:11:59.3186369", +// yyyy/mm/dd +"2014/3/31", +"2014/03/31", +// mm.dd.yyyy +"3.31.2014", +"03.31.2014", +"08.21.71", +// yyyy.mm.dd +"2014.03.30", +"2014.03", +// yymmdd hh:mm:ss mysql log +"171113 14:14:20", +// chinese yyyy mm dd hh mm ss +"2014年04月08日11时25分18秒", +// chinese yyyy mm dd +"2014年04月08日", ``` diff --git a/dateparser/benches/parse.rs b/dateparser/benches/parse.rs index 0f151a6..0bce38e 100644 --- a/dateparser/benches/parse.rs +++ b/dateparser/benches/parse.rs @@ -3,38 +3,39 @@ use dateparser::parse; use lazy_static::lazy_static; lazy_static! { - static ref ACCEPTED: Vec<&'static str> = vec![ - "1511648546", - "1620021848429", - "1620024872717915000", - "2021-05-01T01:17:02.604456Z", - "2017-11-25T22:34:50Z", - "Wed, 02 Jun 2021 06:31:39 GMT", - "2019-11-29 08:08:05-08", - "2021-05-02 23:31:36.0741-07", - "2021-05-02 23:31:39.12689-07", - "2019-11-29 08:15:47.624504-08", - "2021-04-30 21:14:10", - "2021-04-30 21:14:10.052282", - "2017-11-25 13:31:15 PST", - "2017-11-25 13:31 PST", - "2021-02-21", - "2021-02-21 PST", - "01:06:06", - "4:00pm", - "6:00 AM", - "01:06:06 PST", - "4:00pm PST", - "6:00 AM PST", - "May 02, 2021 15:51:31 UTC", - "May 02, 2021 15:51 UTC", + static ref SELECTED: Vec<&'static str> = vec![ + "1511648546", // unix_timestamp + "2017-11-25T22:34:50Z", // rfc3339 + "Wed, 02 Jun 2021 06:31:39 GMT", // rfc2822 + "2019-11-29 08:08:05-08", // postgres_timestamp + "2021-04-30 21:14:10", // ymd_hms + "2017-11-25 13:31:15 PST", // ymd_hms_z + "2021-02-21", // ymd + "2021-02-21 PST", // ymd_z + "4:00pm", // hms + "6:00 AM PST", // hms_z + "May 27 02:45:27", // month_md_hms + "May 8, 2009 5:57:51 PM", // month_mdy_hms + "May 02, 2021 15:51 UTC", // month_mdy_hms_z + "2021-Feb-21", // month_ymd + "May 25, 2021", // month_mdy + "14 May 2019 19:11:40.164", // month_dmy_hms + "1 July 2013", // month_dmy + "03/19/2012 10:11:59", // slash_mdy_hms + "08/21/71", // slash_mdy + "2012/03/19 10:11:59", // slash_ymd_hms + "2014/3/31", // slash_ymd + "2014.03.30", // dot_mdy_or_ymd + "171113 14:14:20", // mysql_log_timestamp + "2014年04月08日11时25分18秒", // chinese_ymd_hms + "2014年04月08日", // chinese_ymd ]; } fn bench_parse_all(c: &mut Criterion) { c.bench_with_input( BenchmarkId::new("parse_all", "accepted_formats"), - &ACCEPTED, + &SELECTED, |b, all| { b.iter(|| { for date_str in all.iter() { @@ -47,7 +48,7 @@ fn bench_parse_all(c: &mut Criterion) { fn bench_parse_each(c: &mut Criterion) { let mut group = c.benchmark_group("parse_each"); - for date_str in ACCEPTED.iter() { + for date_str in SELECTED.iter() { group.bench_with_input(*date_str, *date_str, |b, input| b.iter(|| parse(input))); } group.finish(); diff --git a/dateparser/src/datetime.rs b/dateparser/src/datetime.rs new file mode 100644 index 0000000..7fe17d1 --- /dev/null +++ b/dateparser/src/datetime.rs @@ -0,0 +1,1595 @@ +use crate::timezone; +use anyhow::{anyhow, Result}; +use chrono::prelude::*; +use lazy_static::lazy_static; +use regex::Regex; + +/// Parse struct has methods implemented parsers for accepted formats. +pub struct Parse<'z, Tz2> { + tz: &'z Tz2, +} + +impl<'z, Tz2> Parse<'z, Tz2> +where + Tz2: TimeZone, +{ + /// Create a new instrance of [`Parse`] with a custom parsing timezone that handles the + /// datetime string without time offset. + pub fn new(tz: &'z Tz2) -> Self { + Self { tz } + } + + /// This method tries to parse the input datetime string with a list of accepted formats. See + /// more exmaples from [`Parse`], [`crate::parse()`] and [`crate::parse_with_timezone()`]. + pub fn parse(&self, input: &str) -> Result> { + self.unix_timestamp(input) + .or_else(|| self.rfc2822(input)) + .or_else(|| self.ymd_family(input)) + .or_else(|| self.hms_family(input)) + .or_else(|| self.month_ymd(input)) + .or_else(|| self.month_mdy_family(input)) + .or_else(|| self.month_dmy_family(input)) + .or_else(|| self.slash_mdy_family(input)) + .or_else(|| self.slash_ymd_family(input)) + .or_else(|| self.dot_mdy_or_ymd(input)) + .or_else(|| self.mysql_log_timestamp(input)) + .or_else(|| self.chinese_ymd_family(input)) + .unwrap_or_else(|| Err(anyhow!("{} did not match any formats.", input))) + } + + fn ymd_family(&self, input: &str) -> Option>> { + lazy_static! { + static ref RE: Regex = Regex::new(r"^[0-9]{4}-[0-9]{2}").unwrap(); + } + if !RE.is_match(input) { + return None; + } + self.rfc3339(input) + .or_else(|| self.postgres_timestamp(input)) + .or_else(|| self.ymd_hms(input)) + .or_else(|| self.ymd_hms_z(input)) + .or_else(|| self.ymd(input)) + .or_else(|| self.ymd_z(input)) + } + + fn hms_family(&self, input: &str) -> Option>> { + lazy_static! { + static ref RE: Regex = Regex::new(r"^[0-9]{1,2}:[0-9]{2}").unwrap(); + } + if !RE.is_match(input) { + return None; + } + self.hms(input).or_else(|| self.hms_z(input)) + } + + fn month_mdy_family(&self, input: &str) -> Option>> { + lazy_static! { + static ref RE: Regex = Regex::new(r"^[a-zA-Z]{3,9}\.?\s+[0-9]{1,2}").unwrap(); + } + if !RE.is_match(input) { + return None; + } + self.month_md_hms(input) + .or_else(|| self.month_mdy_hms(input)) + .or_else(|| self.month_mdy_hms_z(input)) + .or_else(|| self.month_mdy(input)) + } + + fn month_dmy_family(&self, input: &str) -> Option>> { + lazy_static! { + static ref RE: Regex = Regex::new(r"^[0-9]{1,2}\s+[a-zA-Z]{3,9}").unwrap(); + } + if !RE.is_match(input) { + return None; + } + self.month_dmy_hms(input).or_else(|| self.month_dmy(input)) + } + + fn slash_mdy_family(&self, input: &str) -> Option>> { + lazy_static! { + static ref RE: Regex = Regex::new(r"^[0-9]{1,2}/[0-9]{1,2}").unwrap(); + } + if !RE.is_match(input) { + return None; + } + self.slash_mdy_hms(input).or_else(|| self.slash_mdy(input)) + } + + fn slash_ymd_family(&self, input: &str) -> Option>> { + lazy_static! { + static ref RE: Regex = Regex::new(r"^[0-9]{4}/[0-9]{1,2}").unwrap(); + } + if !RE.is_match(input) { + return None; + } + self.slash_ymd_hms(input).or_else(|| self.slash_ymd(input)) + } + + fn chinese_ymd_family(&self, input: &str) -> Option>> { + lazy_static! { + static ref RE: Regex = Regex::new(r"^[0-9]{4}年[0-9]{2}月").unwrap(); + } + if !RE.is_match(input) { + return None; + } + self.chinese_ymd_hms(input) + .or_else(|| self.chinese_ymd(input)) + } + + // unix timestamp + // - 1511648546 + // - 1620021848429 + // - 1620024872717915000 + fn unix_timestamp(&self, input: &str) -> Option>> { + lazy_static! { + static ref RE: Regex = Regex::new(r"^[0-9]{10,19}$").unwrap(); + } + if !RE.is_match(input) { + return None; + } + + input + .parse::() + .ok() + .and_then(|timestamp| { + match input.len() { + 10 => Some(Utc.timestamp(timestamp, 0)), + 13 => Some(Utc.timestamp_millis(timestamp)), + 19 => Some(Utc.timestamp_nanos(timestamp)), + _ => None, + } + .map(|datetime| datetime.with_timezone(&Utc)) + }) + .map(Ok) + } + + // rfc3339 + // - 2021-05-01T01:17:02.604456Z + // - 2017-11-25T22:34:50Z + fn rfc3339(&self, input: &str) -> Option>> { + DateTime::parse_from_rfc3339(input) + .ok() + .map(|parsed| parsed.with_timezone(&Utc)) + .map(Ok) + } + + // rfc2822 + // - Wed, 02 Jun 2021 06:31:39 GMT + fn rfc2822(&self, input: &str) -> Option>> { + DateTime::parse_from_rfc2822(input) + .ok() + .map(|parsed| parsed.with_timezone(&Utc)) + .map(Ok) + } + + // postgres timestamp yyyy-mm-dd hh:mm:ss z + // - 2019-11-29 08:08-08 + // - 2019-11-29 08:08:05-08 + // - 2021-05-02 23:31:36.0741-07 + // - 2021-05-02 23:31:39.12689-07 + // - 2019-11-29 08:15:47.624504-08 + // - 2017-07-19 03:21:51+00:00 + fn postgres_timestamp(&self, input: &str) -> Option>> { + lazy_static! { + static ref RE: Regex = Regex::new( + r"^[0-9]{4}-[0-9]{2}-[0-9]{2}\s+[0-9]{2}:[0-9]{2}(:[0-9]{2})?(\.[0-9]{1,9})?[+-:0-9]{3,6}$", + ) + .unwrap(); + } + if !RE.is_match(input) { + return None; + } + + DateTime::parse_from_str(input, "%Y-%m-%d %H:%M:%S%#z") + .or_else(|_| DateTime::parse_from_str(input, "%Y-%m-%d %H:%M:%S.%f%#z")) + .or_else(|_| DateTime::parse_from_str(input, "%Y-%m-%d %H:%M%#z")) + .ok() + .map(|parsed| parsed.with_timezone(&Utc)) + .map(Ok) + } + + // yyyy-mm-dd hh:mm:ss + // - 2014-04-26 05:24:37 PM + // - 2021-04-30 21:14 + // - 2021-04-30 21:14:10 + // - 2021-04-30 21:14:10.052282 + // - 2014-04-26 17:24:37.123 + // - 2014-04-26 17:24:37.3186369 + // - 2012-08-03 18:31:59.257000000 + fn ymd_hms(&self, input: &str) -> Option>> { + lazy_static! { + static ref RE: Regex = Regex::new( + r"^[0-9]{4}-[0-9]{2}-[0-9]{2}\s+[0-9]{2}:[0-9]{2}(:[0-9]{2})?(\.[0-9]{1,9})?\s*(am|pm|AM|PM)?$", + ) + .unwrap(); + } + if !RE.is_match(input) { + return None; + } + + self.tz + .datetime_from_str(input, "%Y-%m-%d %H:%M:%S") + .or_else(|_| self.tz.datetime_from_str(input, "%Y-%m-%d %H:%M")) + .or_else(|_| self.tz.datetime_from_str(input, "%Y-%m-%d %H:%M:%S.%f")) + .or_else(|_| self.tz.datetime_from_str(input, "%Y-%m-%d %I:%M:%S %P")) + .or_else(|_| self.tz.datetime_from_str(input, "%Y-%m-%d %I:%M %P")) + .ok() + .map(|parsed| parsed.with_timezone(&Utc)) + .map(Ok) + } + + // yyyy-mm-dd hh:mm:ss z + // - 2017-11-25 13:31:15 PST + // - 2017-11-25 13:31 PST + // - 2014-12-16 06:20:00 UTC + // - 2014-12-16 06:20:00 GMT + // - 2014-04-26 13:13:43 +0800 + // - 2014-04-26 13:13:44 +09:00 + // - 2012-08-03 18:31:59.257000000 +0000 + // - 2015-09-30 18:48:56.35272715 UTC + fn ymd_hms_z(&self, input: &str) -> Option>> { + lazy_static! { + static ref RE: Regex = Regex::new( + r"^[0-9]{4}-[0-9]{2}-[0-9]{2}\s+[0-9]{2}:[0-9]{2}(:[0-9]{2})?(\.[0-9]{1,9})?(?P\s*[+-:a-zA-Z0-9]{3,6})$", + ).unwrap(); + } + + if !RE.is_match(input) { + return None; + } + if let Some(caps) = RE.captures(input) { + if let Some(matched_tz) = caps.name("tz") { + let parse_from_str = NaiveDateTime::parse_from_str; + return match timezone::parse(matched_tz.as_str().trim()) { + Ok(offset) => parse_from_str(input, "%Y-%m-%d %H:%M:%S %Z") + .or_else(|_| parse_from_str(input, "%Y-%m-%d %H:%M %Z")) + .or_else(|_| parse_from_str(input, "%Y-%m-%d %H:%M:%S.%f %Z")) + .ok() + .and_then(|parsed| offset.from_local_datetime(&parsed).single()) + .map(|datetime| datetime.with_timezone(&Utc)) + .map(Ok), + Err(err) => Some(Err(err)), + }; + } + } + None + } + + // yyyy-mm-dd + // - 2021-02-21 + fn ymd(&self, input: &str) -> Option>> { + lazy_static! { + static ref RE: Regex = Regex::new(r"^[0-9]{4}-[0-9]{2}-[0-9]{2}$").unwrap(); + } + + if !RE.is_match(input) { + return None; + } + let now = Utc::now().with_timezone(self.tz); + NaiveDate::parse_from_str(input, "%Y-%m-%d") + .ok() + .map(|parsed| parsed.and_time(now.time())) + .and_then(|datetime| self.tz.from_local_datetime(&datetime).single()) + .map(|at_tz| at_tz.with_timezone(&Utc)) + .map(Ok) + } + + // yyyy-mm-dd z + // - 2021-02-21 PST + // - 2021-02-21 UTC + // - 2020-07-20+08:00 (yyyy-mm-dd-07:00) + fn ymd_z(&self, input: &str) -> Option>> { + lazy_static! { + static ref RE: Regex = + Regex::new(r"^[0-9]{4}-[0-9]{2}-[0-9]{2}(?P\s*[+-:a-zA-Z0-9]{3,6})$").unwrap(); + } + if !RE.is_match(input) { + return None; + } + + if let Some(caps) = RE.captures(input) { + if let Some(matched_tz) = caps.name("tz") { + return match timezone::parse(matched_tz.as_str().trim()) { + Ok(offset) => { + let now = Utc::now().with_timezone(&offset); + NaiveDate::parse_from_str(input, "%Y-%m-%d %Z") + .ok() + .map(|parsed| parsed.and_time(now.time())) + .and_then(|datetime| offset.from_local_datetime(&datetime).single()) + .map(|at_tz| at_tz.with_timezone(&Utc)) + .map(Ok) + } + Err(err) => Some(Err(err)), + }; + } + } + None + } + + // hh:mm:ss + // - 01:06:06 + // - 4:00pm + // - 6:00 AM + fn hms(&self, input: &str) -> Option>> { + lazy_static! { + static ref RE: Regex = + Regex::new(r"^[0-9]{1,2}:[0-9]{2}(:[0-9]{2})?\s*(am|pm|AM|PM)?$").unwrap(); + } + if !RE.is_match(input) { + return None; + } + + let now = Utc::now().with_timezone(self.tz); + NaiveTime::parse_from_str(input, "%H:%M:%S") + .or_else(|_| NaiveTime::parse_from_str(input, "%I:%M %P")) + .ok() + .and_then(|parsed| now.date().and_time(parsed)) + .map(|datetime| datetime.with_timezone(&Utc)) + .map(Ok) + } + + // hh:mm:ss z + // - 01:06:06 PST + // - 4:00pm PST + // - 6:00 AM PST + // - 6:00pm UTC + fn hms_z(&self, input: &str) -> Option>> { + lazy_static! { + static ref RE: Regex = Regex::new( + r"^[0-9]{1,2}:[0-9]{2}(:[0-9]{2})?\s*(am|pm|AM|PM)?(?P\s+[+-:a-zA-Z0-9]{3,6})$", + ) + .unwrap(); + } + if !RE.is_match(input) { + return None; + } + + if let Some(caps) = RE.captures(input) { + if let Some(matched_tz) = caps.name("tz") { + return match timezone::parse(matched_tz.as_str().trim()) { + Ok(offset) => { + let now = Utc::now().with_timezone(&offset); + NaiveTime::parse_from_str(input, "%H:%M:%S %Z") + .or_else(|_| NaiveTime::parse_from_str(input, "%I:%M:%S %P %Z")) + .or_else(|_| NaiveTime::parse_from_str(input, "%I:%M %P %Z")) + .ok() + .map(|parsed| now.date().naive_local().and_time(parsed)) + .and_then(|datetime| offset.from_local_datetime(&datetime).single()) + .map(|at_tz| at_tz.with_timezone(&Utc)) + .map(Ok) + } + Err(err) => Some(Err(err)), + }; + } + } + None + } + + // yyyy-mon-dd + // - 2021-Feb-21 + fn month_ymd(&self, input: &str) -> Option>> { + lazy_static! { + static ref RE: Regex = Regex::new(r"^[0-9]{4}-[a-zA-Z]{3,9}-[0-9]{2}$").unwrap(); + } + if !RE.is_match(input) { + return None; + } + + let now = Utc::now().with_timezone(self.tz); + NaiveDate::parse_from_str(input, "%Y-%m-%d") + .or_else(|_| NaiveDate::parse_from_str(input, "%Y-%b-%d")) + .ok() + .map(|parsed| parsed.and_time(now.time())) + .and_then(|datetime| self.tz.from_local_datetime(&datetime).single()) + .map(|at_tz| at_tz.with_timezone(&Utc)) + .map(Ok) + } + + // Mon dd hh:mm:ss + // - May 6 at 9:24 PM + // - May 27 02:45:27 + fn month_md_hms(&self, input: &str) -> Option>> { + lazy_static! { + static ref RE: Regex = Regex::new( + r"^[a-zA-Z]{3}\s+[0-9]{1,2}\s*(at)?\s+[0-9]{1,2}:[0-9]{2}(:[0-9]{2})?\s*(am|pm|AM|PM)?$", + ) + .unwrap(); + } + if !RE.is_match(input) { + return None; + } + + let now = Utc::now().with_timezone(self.tz); + let with_year = format!("{} {}", now.year(), input); + self.tz + .datetime_from_str(&with_year, "%Y %b %d at %I:%M %P") + .or_else(|_| self.tz.datetime_from_str(&with_year, "%Y %b %d %H:%M:%S")) + .ok() + .map(|parsed| parsed.with_timezone(&Utc)) + .map(Ok) + } + + // Mon dd, yyyy, hh:mm:ss + // - May 8, 2009 5:57:51 PM + // - September 17, 2012 10:09am + // - September 17, 2012, 10:10:09 + fn month_mdy_hms(&self, input: &str) -> Option>> { + lazy_static! { + static ref RE: Regex = Regex::new( + r"^[a-zA-Z]{3,9}\.?\s+[0-9]{1,2},\s+[0-9]{2,4},?\s+[0-9]{1,2}:[0-9]{2}(:[0-9]{2})?\s*(am|pm|AM|PM)?$", + ).unwrap(); + } + if !RE.is_match(input) { + return None; + } + + let dt = input.replace(", ", " ").replace(". ", " "); + self.tz + .datetime_from_str(&dt, "%B %d %Y %H:%M:%S") + .or_else(|_| self.tz.datetime_from_str(&dt, "%B %d %Y %H:%M")) + .or_else(|_| self.tz.datetime_from_str(&dt, "%B %d %Y %I:%M:%S %P")) + .or_else(|_| self.tz.datetime_from_str(&dt, "%B %d %Y %I:%M %P")) + .ok() + .map(|at_tz| at_tz.with_timezone(&Utc)) + .map(Ok) + } + + // Mon dd, yyyy hh:mm:ss z + // - May 02, 2021 15:51:31 UTC + // - May 02, 2021 15:51 UTC + // - May 26, 2021, 12:49 AM PDT + // - September 17, 2012 at 10:09am PST + fn month_mdy_hms_z(&self, input: &str) -> Option>> { + lazy_static! { + static ref RE: Regex = Regex::new( + r"^[a-zA-Z]{3,9}\s+[0-9]{1,2},?\s+[0-9]{4}\s*,?(at)?\s+[0-9]{2}:[0-9]{2}(:[0-9]{2})?\s*(am|pm|AM|PM)?(?P\s+[+-:a-zA-Z0-9]{3,6})$", + ).unwrap(); + } + if !RE.is_match(input) { + return None; + } + + if let Some(caps) = RE.captures(input) { + if let Some(matched_tz) = caps.name("tz") { + let parse_from_str = NaiveDateTime::parse_from_str; + return match timezone::parse(matched_tz.as_str().trim()) { + Ok(offset) => { + let dt = input.replace(",", "").replace("at", ""); + parse_from_str(&dt, "%B %d %Y %H:%M:%S %Z") + .or_else(|_| parse_from_str(&dt, "%B %d %Y %H:%M %Z")) + .or_else(|_| parse_from_str(&dt, "%B %d %Y %I:%M:%S %P %Z")) + .or_else(|_| parse_from_str(&dt, "%B %d %Y %I:%M %P %Z")) + .ok() + .and_then(|parsed| offset.from_local_datetime(&parsed).single()) + .map(|datetime| datetime.with_timezone(&Utc)) + .map(Ok) + } + Err(err) => Some(Err(err)), + }; + } + } + None + } + + // Mon dd, yyyy + // - May 25, 2021 + // - oct 7, 1970 + // - oct 7, 70 + // - oct. 7, 1970 + // - oct. 7, 70 + // - October 7, 1970 + fn month_mdy(&self, input: &str) -> Option>> { + lazy_static! { + static ref RE: Regex = + Regex::new(r"^[a-zA-Z]{3,9}\.?\s+[0-9]{1,2},\s+[0-9]{2,4}$").unwrap(); + } + if !RE.is_match(input) { + return None; + } + + let now = Utc::now().with_timezone(self.tz); + let dt = input.replace(", ", " ").replace(". ", " "); + NaiveDate::parse_from_str(&dt, "%B %d %y") + .or_else(|_| NaiveDate::parse_from_str(&dt, "%B %d %Y")) + .ok() + .map(|parsed| parsed.and_time(now.time())) + .and_then(|datetime| self.tz.from_local_datetime(&datetime).single()) + .map(|at_tz| at_tz.with_timezone(&Utc)) + .map(Ok) + } + + // dd Mon yyyy hh:mm:ss + // - 12 Feb 2006, 19:17 + // - 12 Feb 2006 19:17 + // - 14 May 2019 19:11:40.164 + fn month_dmy_hms(&self, input: &str) -> Option>> { + lazy_static! { + static ref RE: Regex = Regex::new( + r"^[0-9]{1,2}\s+[a-zA-Z]{3,9}\s+[0-9]{2,4},?\s+[0-9]{1,2}:[0-9]{2}(:[0-9]{2})?(\.[0-9]{1,9})?$", + ).unwrap(); + } + if !RE.is_match(input) { + return None; + } + + let dt = input.replace(", ", " "); + self.tz + .datetime_from_str(&dt, "%d %B %Y %H:%M:%S") + .or_else(|_| self.tz.datetime_from_str(&dt, "%d %B %Y %H:%M")) + .or_else(|_| self.tz.datetime_from_str(&dt, "%d %B %Y %H:%M:%S.%f")) + .or_else(|_| self.tz.datetime_from_str(&dt, "%d %B %Y %I:%M:%S %P")) + .or_else(|_| self.tz.datetime_from_str(&dt, "%d %B %Y %I:%M %P")) + .ok() + .map(|at_tz| at_tz.with_timezone(&Utc)) + .map(Ok) + } + + // dd Mon yyyy + // - 7 oct 70 + // - 7 oct 1970 + // - 03 February 2013 + // - 1 July 2013 + fn month_dmy(&self, input: &str) -> Option>> { + lazy_static! { + static ref RE: Regex = + Regex::new(r"^[0-9]{1,2}\s+[a-zA-Z]{3,9}\s+[0-9]{2,4}$").unwrap(); + } + if !RE.is_match(input) { + return None; + } + + let now = Utc::now().with_timezone(self.tz); + NaiveDate::parse_from_str(input, "%d %B %y") + .or_else(|_| NaiveDate::parse_from_str(input, "%d %B %Y")) + .ok() + .map(|parsed| parsed.and_time(now.time())) + .and_then(|datetime| self.tz.from_local_datetime(&datetime).single()) + .map(|at_tz| at_tz.with_timezone(&Utc)) + .map(Ok) + } + + // mm/dd/yyyy hh:mm:ss + // - 4/8/2014 22:05 + // - 04/08/2014 22:05 + // - 4/8/14 22:05 + // - 04/2/2014 03:00:51 + // - 8/8/1965 12:00:00 AM + // - 8/8/1965 01:00:01 PM + // - 8/8/1965 01:00 PM + // - 8/8/1965 1:00 PM + // - 8/8/1965 12:00 AM + // - 4/02/2014 03:00:51 + // - 03/19/2012 10:11:59 + // - 03/19/2012 10:11:59.3186369 + fn slash_mdy_hms(&self, input: &str) -> Option>> { + lazy_static! { + static ref RE: Regex = Regex::new( + r"^[0-9]{1,2}/[0-9]{1,2}/[0-9]{2,4}\s+[0-9]{1,2}:[0-9]{2}(:[0-9]{2})?(\.[0-9]{1,9})?\s*(am|pm|AM|PM)?$" + ) + .unwrap(); + } + if !RE.is_match(input) { + return None; + } + + self.tz + .datetime_from_str(input, "%m/%d/%y %H:%M:%S") + .or_else(|_| self.tz.datetime_from_str(input, "%m/%d/%y %H:%M")) + .or_else(|_| self.tz.datetime_from_str(input, "%m/%d/%y %H:%M:%S.%f")) + .or_else(|_| self.tz.datetime_from_str(input, "%m/%d/%y %I:%M:%S %P")) + .or_else(|_| self.tz.datetime_from_str(input, "%m/%d/%y %I:%M %P")) + .or_else(|_| self.tz.datetime_from_str(input, "%m/%d/%Y %H:%M:%S")) + .or_else(|_| self.tz.datetime_from_str(input, "%m/%d/%Y %H:%M")) + .or_else(|_| self.tz.datetime_from_str(input, "%m/%d/%Y %H:%M:%S.%f")) + .or_else(|_| self.tz.datetime_from_str(input, "%m/%d/%Y %I:%M:%S %P")) + .or_else(|_| self.tz.datetime_from_str(input, "%m/%d/%Y %I:%M %P")) + .ok() + .map(|at_tz| at_tz.with_timezone(&Utc)) + .map(Ok) + } + + // mm/dd/yyyy + // - 3/31/2014 + // - 03/31/2014 + // - 08/21/71 + // - 8/1/71 + fn slash_mdy(&self, input: &str) -> Option>> { + lazy_static! { + static ref RE: Regex = Regex::new(r"^[0-9]{1,2}/[0-9]{1,2}/[0-9]{2,4}$").unwrap(); + } + if !RE.is_match(input) { + return None; + } + + let now = Utc::now().with_timezone(self.tz); + NaiveDate::parse_from_str(input, "%m/%d/%y") + .or_else(|_| NaiveDate::parse_from_str(input, "%m/%d/%Y")) + .ok() + .map(|parsed| parsed.and_time(now.time())) + .and_then(|datetime| self.tz.from_local_datetime(&datetime).single()) + .map(|at_tz| at_tz.with_timezone(&Utc)) + .map(Ok) + } + + // yyyy/mm/dd hh:mm:ss + // - 2014/4/8 22:05 + // - 2014/04/08 22:05 + // - 2014/04/2 03:00:51 + // - 2014/4/02 03:00:51 + // - 2012/03/19 10:11:59 + // - 2012/03/19 10:11:59.3186369 + fn slash_ymd_hms(&self, input: &str) -> Option>> { + lazy_static! { + static ref RE: Regex = Regex::new( + r"^[0-9]{4}/[0-9]{1,2}/[0-9]{1,2}\s+[0-9]{1,2}:[0-9]{2}(:[0-9]{2})?(\.[0-9]{1,9})?\s*(am|pm|AM|PM)?$" + ) + .unwrap(); + } + if !RE.is_match(input) { + return None; + } + + self.tz + .datetime_from_str(input, "%Y/%m/%d %H:%M:%S") + .or_else(|_| self.tz.datetime_from_str(input, "%Y/%m/%d %H:%M")) + .or_else(|_| self.tz.datetime_from_str(input, "%Y/%m/%d %H:%M:%S.%f")) + .or_else(|_| self.tz.datetime_from_str(input, "%Y/%m/%d %I:%M:%S %P")) + .or_else(|_| self.tz.datetime_from_str(input, "%Y/%m/%d %I:%M %P")) + .ok() + .map(|at_tz| at_tz.with_timezone(&Utc)) + .map(Ok) + } + + // yyyy/mm/dd + // - 2014/3/31 + // - 2014/03/31 + fn slash_ymd(&self, input: &str) -> Option>> { + lazy_static! { + static ref RE: Regex = Regex::new(r"^[0-9]{4}/[0-9]{1,2}/[0-9]{1,2}$").unwrap(); + } + if !RE.is_match(input) { + return None; + } + + let now = Utc::now().with_timezone(self.tz); + NaiveDate::parse_from_str(input, "%Y/%m/%d") + .ok() + .map(|parsed| parsed.and_time(now.time())) + .and_then(|datetime| self.tz.from_local_datetime(&datetime).single()) + .map(|at_tz| at_tz.with_timezone(&Utc)) + .map(Ok) + } + + // mm.dd.yyyy + // - 3.31.2014 + // - 03.31.2014 + // - 08.21.71 + // yyyy.mm.dd + // - 2014.03.30 + // - 2014.03 + fn dot_mdy_or_ymd(&self, input: &str) -> Option>> { + lazy_static! { + static ref RE: Regex = Regex::new(r"[0-9]{1,4}.[0-9]{1,4}[0-9]{1,4}").unwrap(); + } + if !RE.is_match(input) { + return None; + } + + let now = Utc::now().with_timezone(self.tz); + NaiveDate::parse_from_str(input, "%m.%d.%y") + .or_else(|_| NaiveDate::parse_from_str(input, "%m.%d.%Y")) + .or_else(|_| NaiveDate::parse_from_str(input, "%Y.%m.%d")) + .or_else(|_| NaiveDate::parse_from_str(&format!("{}.{}", input, now.day()), "%Y.%m.%d")) + .ok() + .map(|parsed| parsed.and_time(now.time())) + .and_then(|datetime| self.tz.from_local_datetime(&datetime).single()) + .map(|at_tz| at_tz.with_timezone(&Utc)) + .map(Ok) + } + + // yymmdd hh:mm:ss mysql log + // - 171113 14:14:20 + fn mysql_log_timestamp(&self, input: &str) -> Option>> { + lazy_static! { + static ref RE: Regex = Regex::new(r"[0-9]{6}\s+[0-9]{2}:[0-9]{2}:[0-9]{2}").unwrap(); + } + if !RE.is_match(input) { + return None; + } + + self.tz + .datetime_from_str(input, "%y%m%d %H:%M:%S") + .ok() + .map(|at_tz| at_tz.with_timezone(&Utc)) + .map(Ok) + } + + // chinese yyyy mm dd hh mm ss + // - 2014年04月08日11时25分18秒 + fn chinese_ymd_hms(&self, input: &str) -> Option>> { + lazy_static! { + static ref RE: Regex = + Regex::new(r"^[0-9]{4}年[0-9]{2}月[0-9]{2}日[0-9]{2}时[0-9]{2}分[0-9]{2}秒$") + .unwrap(); + } + if !RE.is_match(input) { + return None; + } + + self.tz + .datetime_from_str(input, "%Y年%m月%d日%H时%M分%S秒") + .ok() + .map(|at_tz| at_tz.with_timezone(&Utc)) + .map(Ok) + } + + // chinese yyyy mm dd + // - 2014年04月08日 + fn chinese_ymd(&self, input: &str) -> Option>> { + lazy_static! { + static ref RE: Regex = Regex::new(r"^[0-9]{4}年[0-9]{2}月[0-9]{2}日$").unwrap(); + } + if !RE.is_match(input) { + return None; + } + + let now = Utc::now().with_timezone(self.tz); + NaiveDate::parse_from_str(input, "%Y年%m月%d日") + .ok() + .map(|parsed| parsed.and_time(now.time())) + .and_then(|datetime| self.tz.from_local_datetime(&datetime).single()) + .map(|at_tz| at_tz.with_timezone(&Utc)) + .map(Ok) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn unix_timestamp() { + let parse = Parse::new(&Utc); + + let test_cases = vec![ + ("0000000000", Utc.ymd(1970, 1, 1).and_hms(0, 0, 0)), + ("0000000000000", Utc.ymd(1970, 1, 1).and_hms(0, 0, 0)), + ("0000000000000000000", Utc.ymd(1970, 1, 1).and_hms(0, 0, 0)), + ("1511648546", Utc.ymd(2017, 11, 25).and_hms(22, 22, 26)), + ( + "1620021848429", + Utc.ymd(2021, 5, 3).and_hms_milli(6, 4, 8, 429), + ), + ( + "1620024872717915000", + Utc.ymd(2021, 5, 3).and_hms_nano(6, 54, 32, 717915000), + ), + ]; + + for &(input, want) in test_cases.iter() { + assert_eq!( + parse.unix_timestamp(input).unwrap().unwrap(), + want, + "unix_timestamp/{}", + input + ) + } + assert!(parse.unix_timestamp("15116").is_none()); + assert!(parse + .unix_timestamp("16200248727179150001620024872717915000") + .is_none()); + assert!(parse.unix_timestamp("not-a-ts").is_none()); + } + + #[test] + fn rfc3339() { + let parse = Parse::new(&Utc); + + let test_cases = vec![ + ( + "2021-05-01T01:17:02.604456Z", + Utc.ymd(2021, 5, 1).and_hms_nano(1, 17, 2, 604456000), + ), + ( + "2017-11-25T22:34:50Z", + Utc.ymd(2017, 11, 25).and_hms(22, 34, 50), + ), + ]; + + for &(input, want) in test_cases.iter() { + assert_eq!( + parse.rfc3339(input).unwrap().unwrap(), + want, + "rfc3339/{}", + input + ) + } + assert!(parse.rfc3339("2017-11-25 22:34:50").is_none()); + assert!(parse.rfc3339("not-date-time").is_none()); + } + + #[test] + fn rfc2822() { + let parse = Parse::new(&Utc); + + let test_cases = vec![ + ( + "Wed, 02 Jun 2021 06:31:39 GMT", + Utc.ymd(2021, 6, 2).and_hms(6, 31, 39), + ), + ( + "Wed, 02 Jun 2021 06:31:39 PDT", + Utc.ymd(2021, 6, 2).and_hms(13, 31, 39), + ), + ]; + + for &(input, want) in test_cases.iter() { + assert_eq!( + parse.rfc2822(input).unwrap().unwrap(), + want, + "rfc2822/{}", + input + ) + } + assert!(parse.rfc2822("02 Jun 2021 06:31:39").is_none()); + assert!(parse.rfc2822("not-date-time").is_none()); + } + + #[test] + fn postgres_timestamp() { + let parse = Parse::new(&Utc); + + let test_cases = vec![ + ( + "2019-11-29 08:08-08", + Utc.ymd(2019, 11, 29).and_hms(16, 8, 0), + ), + ( + "2019-11-29 08:08:05-08", + Utc.ymd(2019, 11, 29).and_hms(16, 8, 5), + ), + ( + "2021-05-02 23:31:36.0741-07", + Utc.ymd(2021, 5, 3).and_hms_nano(6, 31, 36, 741), + ), + ( + "2021-05-02 23:31:39.12689-07", + Utc.ymd(2021, 5, 3).and_hms_nano(6, 31, 39, 12689), + ), + ( + "2019-11-29 08:15:47.624504-08", + Utc.ymd(2019, 11, 29).and_hms_nano(16, 15, 47, 624504), + ), + ( + "2017-07-19 03:21:51+00:00", + Utc.ymd(2017, 7, 19).and_hms(3, 21, 51), + ), + ]; + + for &(input, want) in test_cases.iter() { + assert_eq!( + parse.postgres_timestamp(input).unwrap().unwrap(), + want, + "postgres_timestamp/{}", + input + ) + } + assert!(parse.postgres_timestamp("not-date-time").is_none()); + } + + #[test] + fn ymd_hms() { + let parse = Parse::new(&Utc); + + let test_cases = vec![ + ("2021-04-30 21:14", Utc.ymd(2021, 4, 30).and_hms(21, 14, 0)), + ( + "2021-04-30 21:14:10", + Utc.ymd(2021, 4, 30).and_hms(21, 14, 10), + ), + ( + "2021-04-30 21:14:10.052282", + Utc.ymd(2021, 4, 30).and_hms_nano(21, 14, 10, 52282), + ), + ( + "2014-04-26 05:24:37 PM", + Utc.ymd(2014, 4, 26).and_hms(17, 24, 37), + ), + ( + "2014-04-26 17:24:37.123", + Utc.ymd(2014, 4, 26).and_hms_nano(17, 24, 37, 123), + ), + ( + "2014-04-26 17:24:37.3186369", + Utc.ymd(2014, 4, 26).and_hms_nano(17, 24, 37, 3186369), + ), + ( + "2012-08-03 18:31:59.257000000", + Utc.ymd(2012, 8, 3).and_hms_nano(18, 31, 59, 257000000), + ), + ]; + + for &(input, want) in test_cases.iter() { + assert_eq!( + parse.ymd_hms(input).unwrap().unwrap(), + want, + "ymd_hms/{}", + input + ) + } + assert!(parse.ymd_hms("not-date-time").is_none()); + } + + #[test] + fn ymd_hms_z() { + let parse = Parse::new(&Utc); + + let test_cases = vec![ + ( + "2017-11-25 13:31:15 PST", + Utc.ymd(2017, 11, 25).and_hms(21, 31, 15), + ), + ( + "2017-11-25 13:31 PST", + Utc.ymd(2017, 11, 25).and_hms(21, 31, 0), + ), + ( + "2014-12-16 06:20:00 UTC", + Utc.ymd(2014, 12, 16).and_hms(6, 20, 0), + ), + ( + "2014-12-16 06:20:00 GMT", + Utc.ymd(2014, 12, 16).and_hms(6, 20, 0), + ), + ( + "2014-04-26 13:13:43 +0800", + Utc.ymd(2014, 4, 26).and_hms(5, 13, 43), + ), + ( + "2014-04-26 13:13:44 +09:00", + Utc.ymd(2014, 4, 26).and_hms(4, 13, 44), + ), + ( + "2012-08-03 18:31:59.257000000 +0000", + Utc.ymd(2012, 8, 3).and_hms_nano(18, 31, 59, 257000000), + ), + ( + "2015-09-30 18:48:56.35272715 UTC", + Utc.ymd(2015, 9, 30).and_hms_nano(18, 48, 56, 35272715), + ), + ]; + + for &(input, want) in test_cases.iter() { + assert_eq!( + parse.ymd_hms_z(input).unwrap().unwrap(), + want, + "ymd_hms_z/{}", + input + ) + } + assert!(parse.ymd_hms_z("not-date-time").is_none()); + } + + #[test] + fn ymd() { + let parse = Parse::new(&Utc); + + let test_cases = vec![( + "2021-02-21", + Utc.ymd(2021, 2, 21).and_time(Utc::now().time()), + )]; + + for &(input, want) in test_cases.iter() { + assert_eq!( + parse + .ymd(input) + .unwrap() + .unwrap() + .trunc_subsecs(0) + .with_second(0) + .unwrap(), + want.unwrap().trunc_subsecs(0).with_second(0).unwrap(), + "ymd/{}", + input + ) + } + assert!(parse.ymd("not-date-time").is_none()); + } + + #[test] + fn ymd_z() { + let parse = Parse::new(&Utc); + let now_at_pst = Utc::now().with_timezone(&FixedOffset::west(8 * 3600)); + let now_at_cst = Utc::now().with_timezone(&FixedOffset::east(8 * 3600)); + + let test_cases = vec![ + ( + "2021-02-21 PST", + FixedOffset::west(8 * 3600) + .ymd(2021, 2, 21) + .and_time(now_at_pst.time()) + .map(|dt| dt.with_timezone(&Utc)), + ), + ( + "2021-02-21 UTC", + FixedOffset::west(0) + .ymd(2021, 2, 21) + .and_time(Utc::now().time()) + .map(|dt| dt.with_timezone(&Utc)), + ), + ( + "2020-07-20+08:00", + FixedOffset::east(8 * 3600) + .ymd(2020, 7, 20) + .and_time(now_at_cst.time()) + .map(|dt| dt.with_timezone(&Utc)), + ), + ]; + + for &(input, want) in test_cases.iter() { + assert_eq!( + parse + .ymd_z(input) + .unwrap() + .unwrap() + .trunc_subsecs(0) + .with_second(0) + .unwrap(), + want.unwrap().trunc_subsecs(0).with_second(0).unwrap(), + "ymd_z/{}", + input + ) + } + assert!(parse.ymd_z("not-date-time").is_none()); + } + + #[test] + fn hms() { + let parse = Parse::new(&Utc); + + let test_cases = vec![ + ( + "01:06:06", + Utc::now().date().and_time(NaiveTime::from_hms(1, 6, 6)), + ), + ( + "4:00pm", + Utc::now().date().and_time(NaiveTime::from_hms(16, 0, 0)), + ), + ( + "6:00 AM", + Utc::now().date().and_time(NaiveTime::from_hms(6, 0, 0)), + ), + ]; + + for &(input, want) in test_cases.iter() { + assert_eq!( + parse.hms(input).unwrap().unwrap(), + want.unwrap(), + "hms/{}", + input + ) + } + assert!(parse.hms("not-date-time").is_none()); + } + + #[test] + fn hms_z() { + let parse = Parse::new(&Utc); + let now_at_pst = Utc::now().with_timezone(&FixedOffset::west(8 * 3600)); + + let test_cases = vec![ + ( + "01:06:06 PST", + FixedOffset::west(8 * 3600) + .from_local_date(&now_at_pst.date().naive_local()) + .and_time(NaiveTime::from_hms(1, 6, 6)) + .map(|dt| dt.with_timezone(&Utc)), + ), + ( + "4:00pm PST", + FixedOffset::west(8 * 3600) + .from_local_date(&now_at_pst.date().naive_local()) + .and_time(NaiveTime::from_hms(16, 0, 0)) + .map(|dt| dt.with_timezone(&Utc)), + ), + ( + "6:00 AM PST", + FixedOffset::west(8 * 3600) + .from_local_date(&now_at_pst.date().naive_local()) + .and_time(NaiveTime::from_hms(6, 0, 0)) + .map(|dt| dt.with_timezone(&Utc)), + ), + ( + "6:00pm UTC", + FixedOffset::west(0) + .from_local_date(&Utc::now().date().naive_local()) + .and_time(NaiveTime::from_hms(18, 0, 0)) + .map(|dt| dt.with_timezone(&Utc)), + ), + ]; + + for &(input, want) in test_cases.iter() { + assert_eq!( + parse.hms_z(input).unwrap().unwrap(), + want.unwrap(), + "hms_z/{}", + input + ) + } + assert!(parse.hms_z("not-date-time").is_none()); + } + + #[test] + fn month_ymd() { + let parse = Parse::new(&Utc); + + let test_cases = vec![( + "2021-Feb-21", + Utc.ymd(2021, 2, 21).and_time(Utc::now().time()), + )]; + + for &(input, want) in test_cases.iter() { + assert_eq!( + parse + .month_ymd(input) + .unwrap() + .unwrap() + .trunc_subsecs(0) + .with_second(0) + .unwrap(), + want.unwrap().trunc_subsecs(0).with_second(0).unwrap(), + "month_ymd/{}", + input + ) + } + assert!(parse.month_ymd("not-date-time").is_none()); + } + + #[test] + fn month_md_hms() { + let parse = Parse::new(&Utc); + + let test_cases = vec![ + ( + "May 6 at 9:24 PM", + Utc.ymd(Utc::now().year(), 5, 6).and_hms(21, 24, 0), + ), + ( + "May 27 02:45:27", + Utc.ymd(Utc::now().year(), 5, 27).and_hms(2, 45, 27), + ), + ]; + + for &(input, want) in test_cases.iter() { + assert_eq!( + parse.month_md_hms(input).unwrap().unwrap(), + want, + "month_md_hms/{}", + input + ) + } + assert!(parse.month_md_hms("not-date-time").is_none()); + } + + #[test] + fn month_mdy_hms() { + let parse = Parse::new(&Utc); + + let test_cases = vec![ + ( + "May 8, 2009 5:57:51 PM", + Utc.ymd(2009, 5, 8).and_hms(17, 57, 51), + ), + ( + "September 17, 2012 10:09am", + Utc.ymd(2012, 9, 17).and_hms(10, 9, 0), + ), + ( + "September 17, 2012, 10:10:09", + Utc.ymd(2012, 9, 17).and_hms(10, 10, 9), + ), + ]; + + for &(input, want) in test_cases.iter() { + assert_eq!( + parse.month_mdy_hms(input).unwrap().unwrap(), + want, + "month_mdy_hms/{}", + input + ) + } + assert!(parse.month_mdy_hms("not-date-time").is_none()); + } + + #[test] + fn month_mdy_hms_z() { + let parse = Parse::new(&Utc); + + let test_cases = vec![ + ( + "May 02, 2021 15:51:31 UTC", + Utc.ymd(2021, 5, 2).and_hms(15, 51, 31), + ), + ( + "May 02, 2021 15:51 UTC", + Utc.ymd(2021, 5, 2).and_hms(15, 51, 0), + ), + ( + "May 26, 2021, 12:49 AM PDT", + Utc.ymd(2021, 5, 26).and_hms(7, 49, 0), + ), + ( + "September 17, 2012 at 10:09am PST", + Utc.ymd(2012, 9, 17).and_hms(18, 9, 0), + ), + ]; + + for &(input, want) in test_cases.iter() { + assert_eq!( + parse.month_mdy_hms_z(input).unwrap().unwrap(), + want, + "month_mdy_hms_z/{}", + input + ) + } + assert!(parse.month_mdy_hms_z("not-date-time").is_none()); + } + + #[test] + fn month_mdy() { + let parse = Parse::new(&Utc); + + let test_cases = vec![ + ( + "May 25, 2021", + Utc.ymd(2021, 5, 25).and_time(Utc::now().time()), + ), + ( + "oct 7, 1970", + Utc.ymd(1970, 10, 7).and_time(Utc::now().time()), + ), + ( + "oct 7, 70", + Utc.ymd(1970, 10, 7).and_time(Utc::now().time()), + ), + ( + "oct. 7, 1970", + Utc.ymd(1970, 10, 7).and_time(Utc::now().time()), + ), + ( + "oct. 7, 70", + Utc.ymd(1970, 10, 7).and_time(Utc::now().time()), + ), + ( + "October 7, 1970", + Utc.ymd(1970, 10, 7).and_time(Utc::now().time()), + ), + ]; + + for &(input, want) in test_cases.iter() { + assert_eq!( + parse + .month_mdy(input) + .unwrap() + .unwrap() + .trunc_subsecs(0) + .with_second(0) + .unwrap(), + want.unwrap().trunc_subsecs(0).with_second(0).unwrap(), + "month_mdy/{}", + input + ) + } + assert!(parse.month_mdy("not-date-time").is_none()); + } + + #[test] + fn month_dmy_hms() { + let parse = Parse::new(&Utc); + + let test_cases = vec![ + ( + "12 Feb 2006, 19:17", + Utc.ymd(2006, 2, 12).and_hms(19, 17, 0), + ), + ("12 Feb 2006 19:17", Utc.ymd(2006, 2, 12).and_hms(19, 17, 0)), + ( + "14 May 2019 19:11:40.164", + Utc.ymd(2019, 5, 14).and_hms_nano(19, 11, 40, 164), + ), + ]; + + for &(input, want) in test_cases.iter() { + assert_eq!( + parse.month_dmy_hms(input).unwrap().unwrap(), + want, + "month_dmy_hms/{}", + input + ) + } + assert!(parse.month_dmy_hms("not-date-time").is_none()); + } + + #[test] + fn month_dmy() { + let parse = Parse::new(&Utc); + + let test_cases = vec![ + ("7 oct 70", Utc.ymd(1970, 10, 7).and_time(Utc::now().time())), + ( + "7 oct 1970", + Utc.ymd(1970, 10, 7).and_time(Utc::now().time()), + ), + ( + "03 February 2013", + Utc.ymd(2013, 2, 3).and_time(Utc::now().time()), + ), + ( + "1 July 2013", + Utc.ymd(2013, 7, 1).and_time(Utc::now().time()), + ), + ]; + + for &(input, want) in test_cases.iter() { + assert_eq!( + parse + .month_dmy(input) + .unwrap() + .unwrap() + .trunc_subsecs(0) + .with_second(0) + .unwrap(), + want.unwrap().trunc_subsecs(0).with_second(0).unwrap(), + "month_dmy/{}", + input + ) + } + assert!(parse.month_dmy("not-date-time").is_none()); + } + + #[test] + fn slash_mdy_hms() { + let parse = Parse::new(&Utc); + + let test_cases = vec![ + ("4/8/2014 22:05", Utc.ymd(2014, 4, 8).and_hms(22, 5, 0)), + ("04/08/2014 22:05", Utc.ymd(2014, 4, 8).and_hms(22, 5, 0)), + ("4/8/14 22:05", Utc.ymd(2014, 4, 8).and_hms(22, 5, 0)), + ("04/2/2014 03:00:51", Utc.ymd(2014, 4, 2).and_hms(3, 0, 51)), + ("8/8/1965 12:00:00 AM", Utc.ymd(1965, 8, 8).and_hms(0, 0, 0)), + ( + "8/8/1965 01:00:01 PM", + Utc.ymd(1965, 8, 8).and_hms(13, 0, 1), + ), + ("8/8/1965 01:00 PM", Utc.ymd(1965, 8, 8).and_hms(13, 0, 0)), + ("8/8/1965 1:00 PM", Utc.ymd(1965, 8, 8).and_hms(13, 0, 0)), + ("8/8/1965 12:00 AM", Utc.ymd(1965, 8, 8).and_hms(0, 0, 0)), + ("4/02/2014 03:00:51", Utc.ymd(2014, 4, 2).and_hms(3, 0, 51)), + ( + "03/19/2012 10:11:59", + Utc.ymd(2012, 3, 19).and_hms(10, 11, 59), + ), + ( + "03/19/2012 10:11:59.3186369", + Utc.ymd(2012, 3, 19).and_hms_nano(10, 11, 59, 3186369), + ), + ]; + + for &(input, want) in test_cases.iter() { + assert_eq!( + parse.slash_mdy_hms(input).unwrap().unwrap(), + want, + "slash_mdy_hms/{}", + input + ) + } + assert!(parse.slash_mdy_hms("not-date-time").is_none()); + } + + #[test] + fn slash_mdy() { + let parse = Parse::new(&Utc); + + let test_cases = vec![ + ( + "3/31/2014", + Utc.ymd(2014, 3, 31).and_time(Utc::now().time()), + ), + ( + "03/31/2014", + Utc.ymd(2014, 3, 31).and_time(Utc::now().time()), + ), + ("08/21/71", Utc.ymd(1971, 8, 21).and_time(Utc::now().time())), + ("8/1/71", Utc.ymd(1971, 8, 1).and_time(Utc::now().time())), + ]; + + for &(input, want) in test_cases.iter() { + assert_eq!( + parse + .slash_mdy(input) + .unwrap() + .unwrap() + .trunc_subsecs(0) + .with_second(0) + .unwrap(), + want.unwrap().trunc_subsecs(0).with_second(0).unwrap(), + "slash_mdy/{}", + input + ) + } + assert!(parse.slash_mdy("not-date-time").is_none()); + } + + #[test] + fn slash_ymd_hms() { + let parse = Parse::new(&Utc); + + let test_cases = vec![ + ("2014/4/8 22:05", Utc.ymd(2014, 4, 8).and_hms(22, 5, 0)), + ("2014/04/08 22:05", Utc.ymd(2014, 4, 8).and_hms(22, 5, 0)), + ("2014/04/2 03:00:51", Utc.ymd(2014, 4, 2).and_hms(3, 0, 51)), + ("2014/4/02 03:00:51", Utc.ymd(2014, 4, 2).and_hms(3, 0, 51)), + ( + "2012/03/19 10:11:59", + Utc.ymd(2012, 3, 19).and_hms(10, 11, 59), + ), + ( + "2012/03/19 10:11:59.3186369", + Utc.ymd(2012, 3, 19).and_hms_nano(10, 11, 59, 3186369), + ), + ]; + + for &(input, want) in test_cases.iter() { + assert_eq!( + parse.slash_ymd_hms(input).unwrap().unwrap(), + want, + "slash_ymd_hms/{}", + input + ) + } + assert!(parse.slash_ymd_hms("not-date-time").is_none()); + } + + #[test] + fn slash_ymd() { + let parse = Parse::new(&Utc); + + let test_cases = vec![ + ( + "2014/3/31", + Utc.ymd(2014, 3, 31).and_time(Utc::now().time()), + ), + ( + "2014/03/31", + Utc.ymd(2014, 3, 31).and_time(Utc::now().time()), + ), + ]; + + for &(input, want) in test_cases.iter() { + assert_eq!( + parse + .slash_ymd(input) + .unwrap() + .unwrap() + .trunc_subsecs(0) + .with_second(0) + .unwrap(), + want.unwrap().trunc_subsecs(0).with_second(0).unwrap(), + "slash_ymd/{}", + input + ) + } + assert!(parse.slash_ymd("not-date-time").is_none()); + } + + #[test] + fn dot_mdy_or_ymd() { + let parse = Parse::new(&Utc); + + let test_cases = vec![ + // mm.dd.yyyy + ( + "3.31.2014", + Utc.ymd(2014, 3, 31).and_time(Utc::now().time()), + ), + ( + "03.31.2014", + Utc.ymd(2014, 3, 31).and_time(Utc::now().time()), + ), + ("08.21.71", Utc.ymd(1971, 8, 21).and_time(Utc::now().time())), + // yyyy.mm.dd + ( + "2014.03.30", + Utc.ymd(2014, 3, 30).and_time(Utc::now().time()), + ), + ( + "2014.03", + Utc.ymd(2014, 3, Utc::now().day()) + .and_time(Utc::now().time()), + ), + ]; + + for &(input, want) in test_cases.iter() { + assert_eq!( + parse + .dot_mdy_or_ymd(input) + .unwrap() + .unwrap() + .trunc_subsecs(0) + .with_second(0) + .unwrap(), + want.unwrap().trunc_subsecs(0).with_second(0).unwrap(), + "dot_mdy_or_ymd/{}", + input + ) + } + assert!(parse.dot_mdy_or_ymd("not-date-time").is_none()); + } + + #[test] + fn mysql_log_timestamp() { + let parse = Parse::new(&Utc); + + let test_cases = vec![ + // yymmdd hh:mm:ss mysql log + ("171113 14:14:20", Utc.ymd(2017, 11, 13).and_hms(14, 14, 20)), + ]; + + for &(input, want) in test_cases.iter() { + assert_eq!( + parse.mysql_log_timestamp(input).unwrap().unwrap(), + want, + "mysql_log_timestamp/{}", + input + ) + } + assert!(parse.mysql_log_timestamp("not-date-time").is_none()); + } + + #[test] + fn chinese_ymd_hms() { + let parse = Parse::new(&Utc); + + let test_cases = vec![( + "2014年04月08日11时25分18秒", + Utc.ymd(2014, 4, 8).and_hms(11, 25, 18), + )]; + + for &(input, want) in test_cases.iter() { + assert_eq!( + parse.chinese_ymd_hms(input).unwrap().unwrap(), + want, + "chinese_ymd_hms/{}", + input + ) + } + assert!(parse.chinese_ymd_hms("not-date-time").is_none()); + } + + #[test] + fn chinese_ymd() { + let parse = Parse::new(&Utc); + + let test_cases = vec![( + "2014年04月08日", + Utc.ymd(2014, 4, 8).and_time(Utc::now().time()), + )]; + + for &(input, want) in test_cases.iter() { + assert_eq!( + parse + .chinese_ymd(input) + .unwrap() + .unwrap() + .trunc_subsecs(0) + .with_second(0) + .unwrap(), + want.unwrap().trunc_subsecs(0).with_second(0).unwrap(), + "chinese_ymd/{}", + input + ) + } + assert!(parse.chinese_ymd("not-date-time").is_none()); + } +} diff --git a/dateparser/src/lib.rs b/dateparser/src/lib.rs index 58c8073..a75b93e 100644 --- a/dateparser/src/lib.rs +++ b/dateparser/src/lib.rs @@ -9,14 +9,12 @@ //! use std::error::Error; //! //! fn main() -> Result<(), Box> { -//! let parsed = parse("6:15pm UTC")?; -//! let now = Local::now(); -//! //! assert_eq!( -//! parsed.format("%Y-%m-%d %H:%M:%S %z").to_string(), -//! format!("{} 18:15:00 +0000", now.format("%Y-%m-%d")) +//! parse("6:15pm UTC")?, +//! Utc::now().date().and_time( +//! NaiveTime::from_hms(18, 15, 0), +//! ).unwrap(), //! ); -//! //! Ok(()) //! } //! ``` @@ -24,14 +22,33 @@ //! Use `str`'s `parse` method: //! //! ``` +//! use chrono::prelude::*; //! use dateparser::DateTimeUtc; //! use std::error::Error; //! //! fn main() -> Result<(), Box> { -//! let parsed = "2021-05-14 18:51 PDT".parse::()?.0; +//! assert_eq!( +//! "2021-05-14 18:51 PDT".parse::()?.0, +//! Utc.ymd(2021, 5, 15).and_hms(1, 51, 0), +//! ); +//! Ok(()) +//! } +//! ``` +//! +//! Parse using a custom timezone offset for a datetime string that doesn't come with a specific +//! timezone: //! -//! assert_eq!(parsed.format("%Y-%m-%d %H:%M:%S %z").to_string(), "2021-05-15 01:51:00 +0000"); +//! ``` +//! use dateparser::parse_with_timezone; +//! use chrono::offset::Utc; +//! use std::error::Error; //! +//! fn main() -> Result<(), Box> { +//! let parsed_in_utc = parse_with_timezone("6:15pm", &Utc)?; +//! assert_eq!( +//! parsed_in_utc, +//! Utc::now().date().and_hms(18, 15, 0), +//! ); //! Ok(()) //! } //! ``` @@ -42,30 +59,125 @@ //! use dateparser::DateTimeUtc; //! //! let accepted = vec![ +//! // unix timestamp //! "1511648546", //! "1620021848429", //! "1620024872717915000", +//! // rfc3339 //! "2021-05-01T01:17:02.604456Z", //! "2017-11-25T22:34:50Z", +//! // rfc2822 //! "Wed, 02 Jun 2021 06:31:39 GMT", +//! // postgres timestamp yyyy-mm-dd hh:mm:ss z +//! "2019-11-29 08:08-08", //! "2019-11-29 08:08:05-08", //! "2021-05-02 23:31:36.0741-07", //! "2021-05-02 23:31:39.12689-07", //! "2019-11-29 08:15:47.624504-08", +//! "2017-07-19 03:21:51+00:00", +//! // yyyy-mm-dd hh:mm:ss +//! "2014-04-26 05:24:37 PM", +//! "2021-04-30 21:14", //! "2021-04-30 21:14:10", //! "2021-04-30 21:14:10.052282", +//! "2014-04-26 17:24:37.123", +//! "2014-04-26 17:24:37.3186369", +//! "2012-08-03 18:31:59.257000000", +//! // yyyy-mm-dd hh:mm:ss z //! "2017-11-25 13:31:15 PST", //! "2017-11-25 13:31 PST", +//! "2014-12-16 06:20:00 UTC", +//! "2014-12-16 06:20:00 GMT", +//! "2014-04-26 13:13:43 +0800", +//! "2014-04-26 13:13:44 +09:00", +//! "2012-08-03 18:31:59.257000000 +0000", +//! "2015-09-30 18:48:56.35272715 UTC", +//! // yyyy-mm-dd //! "2021-02-21", +//! // yyyy-mm-dd z //! "2021-02-21 PST", +//! "2021-02-21 UTC", +//! "2020-07-20+08:00", +//! // hh:mm:ss //! "01:06:06", //! "4:00pm", //! "6:00 AM", +//! // hh:mm:ss z //! "01:06:06 PST", //! "4:00pm PST", //! "6:00 AM PST", +//! "6:00pm UTC", +//! // Mon dd hh:mm:ss +//! "May 6 at 9:24 PM", +//! "May 27 02:45:27", +//! // Mon dd, yyyy, hh:mm:ss +//! "May 8, 2009 5:57:51 PM", +//! "September 17, 2012 10:09am", +//! "September 17, 2012, 10:10:09", +//! // Mon dd, yyyy hh:mm:ss z //! "May 02, 2021 15:51:31 UTC", //! "May 02, 2021 15:51 UTC", +//! "May 26, 2021, 12:49 AM PDT", +//! "September 17, 2012 at 10:09am PST", +//! // yyyy-mon-dd +//! "2021-Feb-21", +//! // Mon dd, yyyy +//! "May 25, 2021", +//! "oct 7, 1970", +//! "oct 7, 70", +//! "oct. 7, 1970", +//! "oct. 7, 70", +//! "October 7, 1970", +//! // dd Mon yyyy hh:mm:ss +//! "12 Feb 2006, 19:17", +//! "12 Feb 2006 19:17", +//! "14 May 2019 19:11:40.164", +//! // dd Mon yyyy +//! "7 oct 70", +//! "7 oct 1970", +//! "03 February 2013", +//! "1 July 2013", +//! // mm/dd/yyyy hh:mm:ss +//! "4/8/2014 22:05", +//! "04/08/2014 22:05", +//! "4/8/14 22:05", +//! "04/2/2014 03:00:51", +//! "8/8/1965 12:00:00 AM", +//! "8/8/1965 01:00:01 PM", +//! "8/8/1965 01:00 PM", +//! "8/8/1965 1:00 PM", +//! "8/8/1965 12:00 AM", +//! "4/02/2014 03:00:51", +//! "03/19/2012 10:11:59", +//! "03/19/2012 10:11:59.3186369", +//! // mm/dd/yyyy +//! "3/31/2014", +//! "03/31/2014", +//! "08/21/71", +//! "8/1/71", +//! // yyyy/mm/dd hh:mm:ss +//! "2014/4/8 22:05", +//! "2014/04/08 22:05", +//! "2014/04/2 03:00:51", +//! "2014/4/02 03:00:51", +//! "2012/03/19 10:11:59", +//! "2012/03/19 10:11:59.3186369", +//! // yyyy/mm/dd +//! "2014/3/31", +//! "2014/03/31", +//! // mm.dd.yyyy +//! "3.31.2014", +//! "03.31.2014", +//! "08.21.71", +//! // yyyy.mm.dd +//! "2014.03.30", +//! "2014.03", +//! // yymmdd hh:mm:ss mysql log +//! "171113 14:14:20", +//! // chinese yyyy mm dd hh mm ss +//! "2014年04月08日11时25分18秒", +//! // chinese yyyy mm dd +//! "2014年04月08日", //! ]; //! //! for date_str in accepted { @@ -74,12 +186,54 @@ //! } //! ``` +/// Datetime string parser +/// +/// ``` +/// use chrono::prelude::*; +/// use dateparser::datetime::Parse; +/// use std::error::Error; +/// +/// fn main() -> Result<(), Box> { +/// let parse_with_local = Parse::new(&Local); +/// assert_eq!( +/// parse_with_local.parse("2021-06-05 06:19 PM")?, +/// Local.ymd(2021, 6, 5).and_hms(18, 19, 0).with_timezone(&Utc), +/// ); +/// +/// let parse_with_utc = Parse::new(&Utc); +/// assert_eq!( +/// parse_with_utc.parse("2021-06-05 06:19 PM")?, +/// Utc.ymd(2021, 6, 5).and_hms(18, 19, 0), +/// ); +/// +/// Ok(()) +/// } +/// ``` +pub mod datetime; + +/// Timezone offset string parser +/// +/// ``` +/// use chrono::prelude::*; +/// use dateparser::timezone::parse; +/// use std::error::Error; +/// +/// fn main() -> Result<(), Box> { +/// assert_eq!(parse("-0800")?, FixedOffset::west(8 * 3600)); +/// assert_eq!(parse("+10:00")?, FixedOffset::east(10 * 3600)); +/// assert_eq!(parse("PST")?, FixedOffset::west(8 * 3600)); +/// assert_eq!(parse("PDT")?, FixedOffset::west(7 * 3600)); +/// assert_eq!(parse("UTC")?, FixedOffset::west(0)); +/// assert_eq!(parse("GMT")?, FixedOffset::west(0)); +/// +/// Ok(()) +/// } +/// ``` pub mod timezone; -use anyhow::{anyhow, Error, Result}; +use crate::datetime::Parse; +use anyhow::{Error, Result}; use chrono::prelude::*; -use lazy_static::lazy_static; -use regex::Regex; /// DateTimeUtc is an alias for `chrono`'s `DateTime`. It implements `std::str::FromStr`'s /// `from_str` method, and it makes `str`'s `parse` method to understand the accepted date formats @@ -88,39 +242,10 @@ use regex::Regex; /// ``` /// use dateparser::DateTimeUtc; /// -/// let accepted = vec![ -/// "1511648546", -/// "1620021848429", -/// "1620024872717915000", -/// "2021-05-01T01:17:02.604456Z", -/// "2017-11-25T22:34:50Z", -/// "Wed, 02 Jun 2021 06:31:39 GMT", -/// "2019-11-29 08:08:05-08", -/// "2021-05-02 23:31:36.0741-07", -/// "2021-05-02 23:31:39.12689-07", -/// "2019-11-29 08:15:47.624504-08", -/// "2021-04-30 21:14:10", -/// "2021-04-30 21:14:10.052282", -/// "2017-11-25 13:31:15 PST", -/// "2017-11-25 13:31 PST", -/// "2021-02-21", -/// "2021-02-21 PST", -/// "01:06:06", -/// "4:00pm", -/// "6:00 AM", -/// "01:06:06 PST", -/// "4:00pm PST", -/// "6:00 AM PST", -/// "May 02, 2021 15:51:31 UTC", -/// "May 02, 2021 15:51 UTC", -/// ]; -/// -/// for date_str in accepted { -/// // parsed is DateTimeUTC and parsed.0 is chrono's DateTime -/// match date_str.parse::() { -/// Ok(parsed) => println!("PARSED {} into UTC datetime {:?}", date_str, parsed.0), -/// Err(err) => println!("ERROR from parsing {}: {}", date_str, err) -/// } +/// // parsed is DateTimeUTC and parsed.0 is chrono's DateTime +/// match "May 02, 2021 15:51:31 UTC".parse::() { +/// Ok(parsed) => println!("PARSED into UTC datetime {:?}", parsed.0), +/// Err(err) => println!("ERROR from parsing datetime string: {}", err) /// } /// ``` pub struct DateTimeUtc(pub DateTime); @@ -133,285 +258,61 @@ impl std::str::FromStr for DateTimeUtc { } } -/// parse tries to interpret the input date and/or time string with a list of parsing functions. -/// Each function can understand a specific date format. When all options are exhausted, parse will -/// return an error to let the caller know that no formats were matched. +/// This function tries to recognize the input datetime string with a list of accepted formats. +/// When timezone is not provided, this function assumes it's a [`chrono::Local`] datetime. For +/// custom timezone, use [`parse_with_timezone()`] instead.If all options are exhausted, +/// [`parse()`] will return an error to let the caller know that no formats were matched. /// /// ``` /// use dateparser::parse; +/// use chrono::offset::{Local, Utc}; /// use chrono_tz::US::Pacific; /// /// let parsed = parse("6:15pm").unwrap(); /// -/// // print out parsed datetime in UTC -/// println!("{:?}", parsed); +/// assert_eq!( +/// parsed, +/// Local::now().date().and_hms(18, 15, 0).with_timezone(&Utc), +/// ); /// -/// // print parsed datetime in pacific time -/// println!("{:?}", parsed.with_timezone(&Pacific)); +/// assert_eq!( +/// parsed.with_timezone(&Pacific), +/// Local::now().date().and_hms(18, 15, 0).with_timezone(&Utc).with_timezone(&Pacific), +/// ); /// ``` pub fn parse(input: &str) -> Result> { - parse_unix_timestamp(input) - .or_else(|| parse_unix_timestamp_millis(input)) - .or_else(|| parse_unix_timestamp_nanos(input)) - .or_else(|| parse_rfc3339(input)) - .or_else(|| parse_rfc2822(input)) - .or_else(|| parse_postgres_timestamp(input)) - .or_else(|| parse_postgres_timestamp_nanos(input)) - .or_else(|| parse_ymd_hms(input)) - .or_else(|| parse_ymd_hms_nanos(input)) - .or_else(|| parse_ymd_hms_z(input)) - .or_else(|| parse_ymd(input)) - .or_else(|| parse_ymd_z(input)) - .or_else(|| parse_hms_imp(input)) - .or_else(|| parse_hms_imp_z(input)) - .or_else(|| parse_bey_hms_z(input)) - .unwrap_or_else(|| Err(anyhow!("{} did not match any formats.", input))) -} - -// 1511648546 -fn parse_unix_timestamp(input: &str) -> Option>> { - lazy_static! { - static ref RE: Regex = Regex::new(r"^[0-9]{10}$").unwrap(); - } - if RE.is_match(input) { - return input - .parse::() - .ok() - .map(|timestamp| Utc.timestamp(timestamp, 0).with_timezone(&Utc)) - .map(Ok); - } - None -} - -// 1620021848429 -fn parse_unix_timestamp_millis(input: &str) -> Option>> { - lazy_static! { - static ref RE: Regex = Regex::new(r"^[0-9]{13}$").unwrap(); - } - if RE.is_match(input) { - return input - .parse::() - .ok() - .map(|timestamp| Utc.timestamp_millis(timestamp).with_timezone(&Utc)) - .map(Ok); - } - None -} - -// 1620024872717915000 -fn parse_unix_timestamp_nanos(input: &str) -> Option>> { - lazy_static! { - static ref RE: Regex = Regex::new(r"^[0-9]{19}$").unwrap(); - } - if RE.is_match(input) { - return input - .parse::() - .ok() - .map(|timestamp| Utc.timestamp_nanos(timestamp).with_timezone(&Utc)) - .map(Ok); - } - None -} - -// 2021-05-01T01:17:02.604456Z -// 2017-11-25T22:34:50Z -fn parse_rfc3339(input: &str) -> Option>> { - DateTime::parse_from_rfc3339(input) - .ok() - .map(|parsed| parsed.with_timezone(&Utc)) - .map(Ok) -} - -// Wed, 02 Jun 2021 06:31:39 GMT -fn parse_rfc2822(input: &str) -> Option>> { - DateTime::parse_from_rfc2822(input) - .ok() - .map(|parsed| parsed.with_timezone(&Utc)) - .map(Ok) + Parse::new(&Local).parse(input) } -// 2019-11-29 08:08:05-08 -fn parse_postgres_timestamp(input: &str) -> Option>> { - DateTime::parse_from_str(input, "%Y-%m-%d %H:%M:%S%#z") - .ok() - .map(|parsed| parsed.with_timezone(&Utc)) - .map(Ok) -} - -// 2021-05-02 23:31:36.0741-07 -// 2021-05-02 23:31:39.12689-07 -// 2019-11-29 08:15:47.624504-08 -fn parse_postgres_timestamp_nanos(input: &str) -> Option>> { - DateTime::parse_from_str(input, "%Y-%m-%d %H:%M:%S.%f%#z") - .ok() - .map(|parsed| parsed.with_timezone(&Utc)) - .map(Ok) -} - -// 2021-04-30 21:14:10 -fn parse_ymd_hms(input: &str) -> Option>> { - Local - .datetime_from_str(input, "%Y-%m-%d %H:%M:%S") - .ok() - .map(|parsed| parsed.with_timezone(&Utc)) - .map(Ok) -} - -// 2021-04-30 21:14:10.052282 -fn parse_ymd_hms_nanos(input: &str) -> Option>> { - Local - .datetime_from_str(input, "%Y-%m-%d %H:%M:%S.%f") - .ok() - .map(|parsed| parsed.with_timezone(&Utc)) - .map(Ok) -} - -// 2017-11-25 13:31:15 PST -// 2017-11-25 13:31 PST -fn parse_ymd_hms_z(input: &str) -> Option>> { - lazy_static! { - static ref RE: Regex = Regex::new( - r"^(?P
[0-9]{4}-[0-9]{2}-[0-9]{2}\s+[0-9]{2}:[0-9]{2}(:[0-9]{2})?)\s+(?P[+-:a-zA-Z0-9]{3,6})$", - ).unwrap(); - } - if let Some(caps) = RE.captures(input) { - if let Some(matched_dt) = caps.name("dt") { - if let Some(matched_tz) = caps.name("tz") { - return match timezone::parse(matched_tz.as_str()) { - Ok(offset) => { - NaiveDateTime::parse_from_str(matched_dt.as_str(), "%Y-%m-%d %H:%M:%S") - .or_else(|_| { - NaiveDateTime::parse_from_str(matched_dt.as_str(), "%Y-%m-%d %H:%M") - }) - .ok() - .and_then(|parsed| offset.from_local_datetime(&parsed).single()) - .map(|datetime| datetime.with_timezone(&Utc)) - .map(Ok) - } - Err(err) => Some(Err(err)), - }; - } - } - } - None -} - -// 2021-02-21 -fn parse_ymd(input: &str) -> Option>> { - NaiveDate::parse_from_str(input, "%Y-%m-%d") - .ok() - .map(|parsed| parsed.and_time(Local::now().time())) - .and_then(|datetime| Local.from_local_datetime(&datetime).single()) - .map(|local| local.with_timezone(&Utc)) - .map(Ok) -} - -// 2021-02-21 PST -fn parse_ymd_z(input: &str) -> Option>> { - lazy_static! { - static ref RE: Regex = - Regex::new(r"^(?P[0-9]{4}-[0-9]{2}-[0-9]{2})\s+(?P[+-:a-zA-Z0-9]{3,6})$") - .unwrap(); - } - if let Some(caps) = RE.captures(input) { - if let Some(matched_date) = caps.name("date") { - if let Some(matched_tz) = caps.name("tz") { - return match timezone::parse(matched_tz.as_str()) { - Ok(offset) => NaiveDate::parse_from_str(matched_date.as_str(), "%Y-%m-%d") - .ok() - .and_then(|parsed| { - offset - .from_local_datetime(&parsed.and_time(Local::now().time())) - .single() - }) - .map(|datetime| datetime.with_timezone(&Utc)) - .map(Ok), - Err(err) => Some(Err(err)), - }; - } - } - } - None -} - -// 01:06:06 -// 4:00pm -// 6:00 AM -fn parse_hms_imp(input: &str) -> Option>> { - NaiveTime::parse_from_str(input, "%H:%M:%S") - .or_else(|_| NaiveTime::parse_from_str(input, "%I:%M%P")) - .or_else(|_| NaiveTime::parse_from_str(input, "%I:%M %P")) - .ok() - .and_then(|parsed| Local::now().date().and_time(parsed)) - .map(|datetime| datetime.with_timezone(&Utc)) - .map(Ok) -} - -// 01:06:06 PST -// 4:00pm PST -// 6:00 AM PST -fn parse_hms_imp_z(input: &str) -> Option>> { - lazy_static! { - static ref RE: Regex = Regex::new( - r"^(?P
[a-zA-Z]{3}\s+[0-9]{1,2},\s+[0-9]{4}\s+[0-9]{2}:[0-9]{2}(:[0-9]{2})?)\s+(?P[+-:a-zA-Z0-9]{3,6})$", - ).unwrap(); - } - if let Some(caps) = RE.captures(input) { - if let Some(matched_dt) = caps.name("dt") { - if let Some(matched_tz) = caps.name("tz") { - return match timezone::parse(matched_tz.as_str()) { - Ok(offset) => { - NaiveDateTime::parse_from_str(matched_dt.as_str(), "%b %e, %Y %H:%M:%S") - .or_else(|_| { - NaiveDateTime::parse_from_str( - matched_dt.as_str(), - "%b %e, %Y %H:%M", - ) - }) - .ok() - .and_then(|parsed| offset.from_local_datetime(&parsed).single()) - .map(|datetime| datetime.with_timezone(&Utc)) - .map(Ok) - } - Err(err) => Some(Err(err)), - }; - } - } - } - None +/// Similar to [`parse()`], this function takes a datetime string and a custom [`chrono::TimeZone`], +/// and tries to parse the datetime string. When timezone is not given in the string, this function +/// will assume and parse the datetime by the custom timezone provided in this function's arguments. +/// +/// ``` +/// use dateparser::parse_with_timezone; +/// use chrono::offset::{Local, Utc}; +/// use chrono_tz::US::Pacific; +/// +/// let parsed_in_local = parse_with_timezone("6:15pm", &Local).unwrap(); +/// assert_eq!( +/// parsed_in_local, +/// Local::now().date().and_hms(18, 15, 0).with_timezone(&Utc), +/// ); +/// +/// let parsed_in_utc = parse_with_timezone("6:15pm", &Utc).unwrap(); +/// assert_eq!( +/// parsed_in_utc, +/// Utc::now().date().and_hms(18, 15, 0), +/// ); +/// +/// let parsed_in_pacific = parse_with_timezone("6:15pm", &Pacific).unwrap(); +/// assert_eq!( +/// parsed_in_pacific, +/// Utc::now().with_timezone(&Pacific).date().and_hms(18, 15, 0).with_timezone(&Utc), +/// ); +/// ``` +pub fn parse_with_timezone(input: &str, tz: &Tz2) -> Result> { + Parse::new(tz).parse(input) } #[cfg(test)] @@ -425,76 +326,52 @@ mod tests { } #[test] - fn test_parse() { + fn parse_in_local() { let test_cases = vec![ ( - "unix timestamp", + "unix_timestamp", "1511648546", - Utc::ymd(&Utc, 2017, 11, 25).and_hms(22, 22, 26), - Trunc::None, - ), - ( - "unix timestamp millis", - "1620021848429", - Utc::ymd(&Utc, 2021, 5, 3).and_hms_milli(6, 4, 8, 429), - Trunc::None, - ), - ( - "unix_timestamp_nanos", - "1620024872717915000", - Utc::ymd(&Utc, 2021, 5, 3).and_hms_nano(6, 54, 32, 717915000), + Utc.ymd(2017, 11, 25).and_hms(22, 22, 26), Trunc::None, ), ( "rfc3339", "2017-11-25T22:34:50Z", - Utc::ymd(&Utc, 2017, 11, 25).and_hms(22, 34, 50), + Utc.ymd(2017, 11, 25).and_hms(22, 34, 50), Trunc::None, ), ( "rfc2822", "Wed, 02 Jun 2021 06:31:39 GMT", - Utc::ymd(&Utc, 2021, 6, 2).and_hms(6, 31, 39), + Utc.ymd(2021, 6, 2).and_hms(6, 31, 39), Trunc::None, ), ( "postgres_timestamp", "2019-11-29 08:08:05-08", - Utc::ymd(&Utc, 2019, 11, 29).and_hms(16, 8, 5), - Trunc::None, - ), - ( - "postgres_timestamp_nanos", - "2019-11-29 08:15:47.624504-08", - Utc::ymd(&Utc, 2019, 11, 29).and_hms_nano(16, 15, 47, 624504), + Utc.ymd(2019, 11, 29).and_hms(16, 8, 5), Trunc::None, ), ( "ymd_hms", "2021-04-30 21:14:10", - Local::ymd(&Local, 2021, 4, 30) + Local + .ymd(2021, 4, 30) .and_hms(21, 14, 10) .with_timezone(&Utc), Trunc::None, ), - ( - "ymd_hms_nanos", - "2021-04-30 21:14:10.052282", - Local::ymd(&Local, 2021, 4, 30) - .and_hms_nano(21, 14, 10, 52282) - .with_timezone(&Utc), - Trunc::None, - ), ( "ymd_hms_z", "2017-11-25 13:31:15 PST", - Utc::ymd(&Utc, 2017, 11, 25).and_hms(21, 31, 15), + Utc.ymd(2017, 11, 25).and_hms(21, 31, 15), Trunc::None, ), ( "ymd", "2021-02-21", - Local::ymd(&Local, 2021, 2, 21) + Local + .ymd(2021, 2, 21) .and_time(Local::now().time()) .unwrap() .with_timezone(&Utc), @@ -505,13 +382,17 @@ mod tests { "2021-02-21 PST", FixedOffset::west(8 * 3600) .ymd(2021, 2, 21) - .and_time(Local::now().time()) + .and_time( + Utc::now() + .with_timezone(&FixedOffset::west(8 * 3600)) + .time(), + ) .unwrap() .with_timezone(&Utc), Trunc::Seconds, ), ( - "hms_imp", + "hms", "4:00pm", Local::now() .date() @@ -521,36 +402,176 @@ mod tests { Trunc::None, ), ( - "hms_imp_z", + "hms_z", "6:00 AM PST", - FixedOffset::west(8 * 3600) - .from_local_date(&Local::now().date().naive_local()) + Utc::now() + .with_timezone(&FixedOffset::west(8 * 3600)) + .date() .and_time(NaiveTime::from_hms(6, 0, 0)) .unwrap() .with_timezone(&Utc), Trunc::None, ), ( - "bey_hms_z", + "month_ymd", + "2021-Feb-21", + Local + .ymd(2021, 2, 21) + .and_time(Local::now().time()) + .unwrap() + .with_timezone(&Utc), + Trunc::Seconds, + ), + ( + "month_md_hms", + "May 27 02:45:27", + Local + .ymd(Local::now().year(), 5, 27) + .and_hms(2, 45, 27) + .with_timezone(&Utc), + Trunc::None, + ), + ( + "month_mdy_hms", + "May 8, 2009 5:57:51 PM", + Local + .ymd(2009, 5, 8) + .and_hms(17, 57, 51) + .with_timezone(&Utc), + Trunc::None, + ), + ( + "month_mdy_hms_z", "May 02, 2021 15:51 UTC", - Utc::ymd(&Utc, 2021, 5, 2).and_hms(15, 51, 0), + Utc.ymd(2021, 5, 2).and_hms(15, 51, 0), + Trunc::None, + ), + ( + "month_mdy", + "May 25, 2021", + Local + .ymd(2021, 5, 25) + .and_time(Local::now().time()) + .unwrap() + .with_timezone(&Utc), + Trunc::Seconds, + ), + ( + "month_dmy_hms", + "14 May 2019 19:11:40.164", + Local + .ymd(2019, 5, 14) + .and_hms_nano(19, 11, 40, 164) + .with_timezone(&Utc), Trunc::None, ), + ( + "month_dmy", + "1 July 2013", + Local + .ymd(2013, 7, 1) + .and_time(Local::now().time()) + .unwrap() + .with_timezone(&Utc), + Trunc::Seconds, + ), + ( + "slash_mdy_hms", + "03/19/2012 10:11:59", + Local + .ymd(2012, 3, 19) + .and_hms(10, 11, 59) + .with_timezone(&Utc), + Trunc::None, + ), + ( + "slash_mdy", + "08/21/71", + Local + .ymd(1971, 8, 21) + .and_time(Local::now().time()) + .unwrap() + .with_timezone(&Utc), + Trunc::Seconds, + ), + ( + "slash_ymd_hms", + "2012/03/19 10:11:59", + Local + .ymd(2012, 3, 19) + .and_hms(10, 11, 59) + .with_timezone(&Utc), + Trunc::None, + ), + ( + "slash_ymd", + "2014/3/31", + Local + .ymd(2014, 3, 31) + .and_time(Local::now().time()) + .unwrap() + .with_timezone(&Utc), + Trunc::Seconds, + ), + ( + "dot_mdy_or_ymd", + "2014.03.30", + Local + .ymd(2014, 3, 30) + .and_time(Local::now().time()) + .unwrap() + .with_timezone(&Utc), + Trunc::Seconds, + ), + ( + "mysql_log_timestamp", + "171113 14:14:20", + Local + .ymd(2017, 11, 13) + .and_hms(14, 14, 20) + .with_timezone(&Utc), + Trunc::None, + ), + ( + "chinese_ymd_hms", + "2014年04月08日11时25分18秒", + Local + .ymd(2014, 4, 8) + .and_hms(11, 25, 18) + .with_timezone(&Utc), + Trunc::None, + ), + ( + "chinese_ymd", + "2014年04月08日", + Local + .ymd(2014, 4, 8) + .and_time(Local::now().time()) + .unwrap() + .with_timezone(&Utc), + Trunc::Seconds, + ), ]; for &(test, input, want, trunc) in test_cases.iter() { match trunc { Trunc::None => { - assert_eq!(parse(input).unwrap(), want, "test_parse/{}/{}", test, input) + assert_eq!( + super::parse(input).unwrap(), + want, + "parse_in_local/{}/{}", + test, + input + ) } Trunc::Seconds => assert_eq!( - parse(input) + super::parse(input) .unwrap() .trunc_subsecs(0) .with_second(0) .unwrap(), want.trunc_subsecs(0).with_second(0).unwrap(), - "test_parse/{}/{}", + "parse_in_local/{}/{}", test, input ), @@ -559,400 +580,203 @@ mod tests { } #[test] - fn test_parse_unix_timestamp() { + fn parse_with_timezone_in_utc() { let test_cases = vec![ - ("0000000000", Utc::ymd(&Utc, 1970, 1, 1).and_hms(0, 0, 0)), ( + "unix_timestamp", "1511648546", - Utc::ymd(&Utc, 2017, 11, 25).and_hms(22, 22, 26), + Utc.ymd(2017, 11, 25).and_hms(22, 22, 26), + Trunc::None, ), - ]; - - for &(input, want) in test_cases.iter() { - assert_eq!( - parse_unix_timestamp(input).unwrap().unwrap(), - want, - "parse_unix_timestamp/{}", - input - ) - } - assert!(parse_unix_timestamp("15116485461").is_none()); - assert!(parse_unix_timestamp("not-a-ts").is_none()); - } - - #[test] - fn test_parse_unix_timestamp_millis() { - let test_cases = vec![ - ("0000000000000", Utc::ymd(&Utc, 1970, 1, 1).and_hms(0, 0, 0)), ( - "1620021848429", - Utc::ymd(&Utc, 2021, 5, 3).and_hms_milli(6, 4, 8, 429), + "rfc3339", + "2017-11-25T22:34:50Z", + Utc.ymd(2017, 11, 25).and_hms(22, 34, 50), + Trunc::None, ), - ]; - - for &(input, want) in test_cases.iter() { - assert_eq!( - parse_unix_timestamp_millis(input).unwrap().unwrap(), - want, - "parse_unix_timestamp_millis/{}", - input - ) - } - assert!(parse_unix_timestamp_millis("16200218484291").is_none()); - assert!(parse_unix_timestamp_millis("not-a-ts").is_none()); - } - - #[test] - fn test_parse_unix_timestamp_nanos() { - let test_cases = vec![ ( - "0000000000000000000", - Utc::ymd(&Utc, 1970, 1, 1).and_hms(0, 0, 0), + "rfc2822", + "Wed, 02 Jun 2021 06:31:39 GMT", + Utc.ymd(2021, 6, 2).and_hms(6, 31, 39), + Trunc::None, ), ( - "1620024872717915000", - Utc::ymd(&Utc, 2021, 5, 3).and_hms_nano(6, 54, 32, 717915000), + "postgres_timestamp", + "2019-11-29 08:08:05-08", + Utc.ymd(2019, 11, 29).and_hms(16, 8, 5), + Trunc::None, ), - ]; - - for &(input, want) in test_cases.iter() { - assert_eq!( - parse_unix_timestamp_nanos(input).unwrap().unwrap(), - want, - "parse_unix_timestamp_nanos/{}", - input - ) - } - assert!(parse_unix_timestamp_nanos("16200248727179150001").is_none()); - assert!(parse_unix_timestamp_nanos("not-a-ts").is_none()); - } - - #[test] - fn test_parse_rfc3339() { - let test_cases = vec![ ( - "2021-05-01T01:17:02.604456Z", - Utc::ymd(&Utc, 2021, 5, 1).and_hms_nano(1, 17, 2, 604456000), + "ymd_hms", + "2021-04-30 21:14:10", + Utc.ymd(2021, 4, 30).and_hms(21, 14, 10), + Trunc::None, ), ( - "2017-11-25T22:34:50Z", - Utc::ymd(&Utc, 2017, 11, 25).and_hms(22, 34, 50), + "ymd_hms_z", + "2017-11-25 13:31:15 PST", + Utc.ymd(2017, 11, 25).and_hms(21, 31, 15), + Trunc::None, ), - ]; - - for &(input, want) in test_cases.iter() { - assert_eq!( - parse_rfc3339(input).unwrap().unwrap(), - want, - "parse_rfc3339/{}", - input - ) - } - assert!(parse_rfc3339("2017-11-25 22:34:50").is_none()); - assert!(parse_rfc3339("not-date-time").is_none()); - } - - #[test] - fn test_parse_rfc2822() { - let test_cases = vec![ ( - "Wed, 02 Jun 2021 06:31:39 GMT", - Utc::ymd(&Utc, 2021, 6, 2).and_hms(6, 31, 39), + "ymd", + "2021-02-21", + Utc.ymd(2021, 2, 21).and_time(Utc::now().time()).unwrap(), + Trunc::Seconds, ), ( - "Wed, 02 Jun 2021 06:31:39 PDT", - Utc::ymd(&Utc, 2021, 6, 2).and_hms(13, 31, 39), + "ymd_z", + "2021-02-21 PST", + FixedOffset::west(8 * 3600) + .ymd(2021, 2, 21) + .and_time( + Utc::now() + .with_timezone(&FixedOffset::west(8 * 3600)) + .time(), + ) + .unwrap() + .with_timezone(&Utc), + Trunc::Seconds, ), - ]; - - for &(input, want) in test_cases.iter() { - assert_eq!( - parse_rfc2822(input).unwrap().unwrap(), - want, - "parse_rfc2822/{}", - input - ) - } - assert!(parse_rfc2822("02 Jun 2021 06:31:39").is_none()); - assert!(parse_rfc2822("not-date-time").is_none()); - } - - #[test] - fn test_parse_postgres_timestamp() { - let test_cases = vec![( - "2019-11-29 08:08:05-08", - Utc::ymd(&Utc, 2019, 11, 29).and_hms(16, 8, 5), - )]; - - for &(input, want) in test_cases.iter() { - assert_eq!( - parse_postgres_timestamp(input).unwrap().unwrap(), - want, - "parse_postgres_timestamp/{}", - input - ) - } - assert!(parse_postgres_timestamp("not-date-time").is_none()); - } - - #[test] - fn test_parse_postgres_timestamp_nanos() { - let test_cases = vec![ ( - "2021-05-02 23:31:36.0741-07", - Utc::ymd(&Utc, 2021, 5, 3).and_hms_nano(6, 31, 36, 741), + "hms", + "4:00pm", + Utc::now() + .date() + .and_time(NaiveTime::from_hms(16, 0, 0)) + .unwrap(), + Trunc::None, ), ( - "2021-05-02 23:31:39.12689-07", - Utc::ymd(&Utc, 2021, 5, 3).and_hms_nano(6, 31, 39, 12689), + "hms_z", + "6:00 AM PST", + FixedOffset::west(8 * 3600) + .from_local_date( + &Utc::now() + .with_timezone(&FixedOffset::west(8 * 3600)) + .date() + .naive_local(), + ) + .and_time(NaiveTime::from_hms(6, 0, 0)) + .unwrap() + .with_timezone(&Utc), + Trunc::None, ), ( - "2019-11-29 08:15:47.624504-08", - Utc::ymd(&Utc, 2019, 11, 29).and_hms_nano(16, 15, 47, 624504), + "month_ymd", + "2021-Feb-21", + Utc.ymd(2021, 2, 21).and_time(Utc::now().time()).unwrap(), + Trunc::Seconds, ), - ]; - - for &(input, want) in test_cases.iter() { - assert_eq!( - parse_postgres_timestamp_nanos(input).unwrap().unwrap(), - want, - "parse_postgres_timestamp_nanos/{}", - input - ) - } - assert!(parse_postgres_timestamp_nanos("not-date-time").is_none()); - } - - #[test] - fn test_parse_ymd_hms() { - let test_cases = vec![( - "2021-04-30 21:14:10", - Local::ymd(&Local, 2021, 4, 30) - .and_hms(21, 14, 10) - .with_timezone(&Utc), - )]; - - for &(input, want) in test_cases.iter() { - assert_eq!( - parse_ymd_hms(input).unwrap().unwrap(), - want, - "parse_ymd_hms/{}", - input - ) - } - assert!(parse_ymd_hms("not-date-time").is_none()); - } - - #[test] - fn test_parse_ymd_hms_nanos() { - let test_cases = vec![( - "2021-04-30 21:14:10.052282", - Local::ymd(&Local, 2021, 4, 30) - .and_hms_nano(21, 14, 10, 52282) - .with_timezone(&Utc), - )]; - - for &(input, want) in test_cases.iter() { - assert_eq!( - parse_ymd_hms_nanos(input).unwrap().unwrap(), - want, - "parse_ymd_hms_nanos/{}", - input - ) - } - assert!(parse_ymd_hms_nanos("not-date-time").is_none()); - } - - #[test] - fn test_parse_ymd_hms_z() { - let test_cases = vec![ ( - "2017-11-25 13:31:15 PST", - Utc::ymd(&Utc, 2017, 11, 25).and_hms(21, 31, 15), + "month_md_hms", + "May 27 02:45:27", + Utc.ymd(Utc::now().year(), 5, 27).and_hms(2, 45, 27), + Trunc::None, ), ( - "2017-11-25 13:31 PST", - Utc::ymd(&Utc, 2017, 11, 25).and_hms(21, 31, 0), + "month_mdy_hms", + "May 8, 2009 5:57:51 PM", + Utc.ymd(2009, 5, 8).and_hms(17, 57, 51), + Trunc::None, ), ( - "2017-11-25 13:31:15 UTC", - Utc::ymd(&Utc, 2017, 11, 25).and_hms(13, 31, 15), + "month_mdy_hms_z", + "May 02, 2021 15:51 UTC", + Utc.ymd(2021, 5, 2).and_hms(15, 51, 0), + Trunc::None, ), - ]; - - for &(input, want) in test_cases.iter() { - assert_eq!( - parse_ymd_hms_z(input).unwrap().unwrap(), - want, - "parse_ymd_hms_z/{}", - input - ) - } - assert!(parse_ymd_hms_z("not-date-time").is_none()); - } - - #[test] - fn test_parse_ymd() { - let test_cases = vec![( - "2021-02-21", - Local::ymd(&Local, 2021, 2, 21) - .and_time(Local::now().time()) - .map(|dt| dt.with_timezone(&Utc)), - )]; - - for &(input, want) in test_cases.iter() { - assert_eq!( - parse_ymd(input) - .unwrap() - .unwrap() - .trunc_subsecs(0) - .with_second(0) - .unwrap(), - want.unwrap().trunc_subsecs(0).with_second(0).unwrap(), - "parse_ymd/{}", - input - ) - } - assert!(parse_ymd("not-date-time").is_none()); - } - - #[test] - fn test_parse_ymd_z() { - let test_cases = vec![ ( - "2021-02-21 PST", - FixedOffset::west(8 * 3600) - .ymd(2021, 2, 21) - .and_time(Local::now().time()) - .map(|dt| dt.with_timezone(&Utc)), + "month_mdy", + "May 25, 2021", + Utc.ymd(2021, 5, 25).and_time(Utc::now().time()).unwrap(), + Trunc::Seconds, ), ( - "2021-02-21 UTC", - FixedOffset::west(0) - .ymd(2021, 2, 21) - .and_time(Local::now().time()) - .map(|dt| dt.with_timezone(&Utc)), + "month_dmy_hms", + "14 May 2019 19:11:40.164", + Utc.ymd(2019, 5, 14).and_hms_nano(19, 11, 40, 164), + Trunc::None, ), - ]; - - for &(input, want) in test_cases.iter() { - assert_eq!( - parse_ymd_z(input) - .unwrap() - .unwrap() - .trunc_subsecs(0) - .with_second(0) - .unwrap(), - want.unwrap().trunc_subsecs(0).with_second(0).unwrap(), - "parse_ymd_z/{}", - input - ) - } - assert!(parse_ymd_z("not-date-time").is_none()); - } - - #[test] - fn test_parse_hms_imp() { - let test_cases = vec![ ( - "01:06:06", - Local::now() - .date() - .and_time(NaiveTime::from_hms(1, 6, 6)) - .map(|dt| dt.with_timezone(&Utc)), + "month_dmy", + "1 July 2013", + Utc.ymd(2013, 7, 1).and_time(Utc::now().time()).unwrap(), + Trunc::Seconds, ), ( - "4:00pm", - Local::now() - .date() - .and_time(NaiveTime::from_hms(16, 0, 0)) - .map(|dt| dt.with_timezone(&Utc)), + "slash_mdy_hms", + "03/19/2012 10:11:59", + Utc.ymd(2012, 3, 19).and_hms(10, 11, 59), + Trunc::None, ), ( - "6:00 AM", - Local::now() - .date() - .and_time(NaiveTime::from_hms(6, 0, 0)) - .map(|dt| dt.with_timezone(&Utc)), + "slash_mdy", + "08/21/71", + Utc.ymd(1971, 8, 21).and_time(Utc::now().time()).unwrap(), + Trunc::Seconds, ), - ]; - - for &(input, want) in test_cases.iter() { - assert_eq!( - parse_hms_imp(input).unwrap().unwrap(), - want.unwrap(), - "parse_hms_imp/{}", - input - ) - } - assert!(parse_hms_imp("not-date-time").is_none()); - } - - #[test] - fn test_parse_hms_imp_z() { - let test_cases = vec![ ( - "01:06:06 PST", - FixedOffset::west(8 * 3600) - .from_local_date(&Local::now().date().naive_local()) - .and_time(NaiveTime::from_hms(1, 6, 6)) - .map(|dt| dt.with_timezone(&Utc)), + "slash_ymd_hms", + "2012/03/19 10:11:59", + Utc.ymd(2012, 3, 19).and_hms(10, 11, 59), + Trunc::None, ), ( - "4:00pm PST", - FixedOffset::west(8 * 3600) - .from_local_date(&Local::now().date().naive_local()) - .and_time(NaiveTime::from_hms(16, 0, 0)) - .map(|dt| dt.with_timezone(&Utc)), + "slash_ymd", + "2014/3/31", + Utc.ymd(2014, 3, 31).and_time(Utc::now().time()).unwrap(), + Trunc::Seconds, ), ( - "6:00 AM PST", - FixedOffset::west(8 * 3600) - .from_local_date(&Local::now().date().naive_local()) - .and_time(NaiveTime::from_hms(6, 0, 0)) - .map(|dt| dt.with_timezone(&Utc)), + "dot_mdy_or_ymd", + "2014.03.30", + Utc.ymd(2014, 3, 30).and_time(Utc::now().time()).unwrap(), + Trunc::Seconds, ), ( - "6:00pm UTC", - FixedOffset::west(0) - .from_local_date(&Local::now().date().naive_local()) - .and_time(NaiveTime::from_hms(18, 0, 0)) - .map(|dt| dt.with_timezone(&Utc)), + "mysql_log_timestamp", + "171113 14:14:20", + Utc.ymd(2017, 11, 13).and_hms(14, 14, 20), + Trunc::None, ), - ]; - - for &(input, want) in test_cases.iter() { - assert_eq!( - parse_hms_imp_z(input).unwrap().unwrap(), - want.unwrap(), - "parse_hms_imp_z/{}", - input - ) - } - assert!(parse_hms_imp_z("not-date-time").is_none()); - } - - #[test] - fn test_parse_bey_hms_z() { - let test_cases = vec![ ( - "May 02, 2021 15:51:31 UTC", - Utc::ymd(&Utc, 2021, 5, 2).and_hms(15, 51, 31), + "chinese_ymd_hms", + "2014年04月08日11时25分18秒", + Utc.ymd(2014, 4, 8).and_hms(11, 25, 18), + Trunc::None, ), ( - "May 02, 2021 15:51 UTC", - Utc::ymd(&Utc, 2021, 5, 2).and_hms(15, 51, 0), + "chinese_ymd", + "2014年04月08日", + Utc.ymd(2014, 4, 8).and_time(Utc::now().time()).unwrap(), + Trunc::Seconds, ), ]; - for &(input, want) in test_cases.iter() { - assert_eq!( - parse_bey_hms_z(input).unwrap().unwrap(), - want, - "parse_bey_hms_z/{}", - input - ) + for &(test, input, want, trunc) in test_cases.iter() { + match trunc { + Trunc::None => { + assert_eq!( + super::parse_with_timezone(input, &Utc).unwrap(), + want, + "parse_with_timezone_in_utc/{}/{}", + test, + input + ) + } + Trunc::Seconds => assert_eq!( + super::parse_with_timezone(input, &Utc) + .unwrap() + .trunc_subsecs(0) + .with_second(0) + .unwrap(), + want.trunc_subsecs(0).with_second(0).unwrap(), + "parse_with_timezone_in_utc/{}/{}", + test, + input + ), + }; } - assert!(parse_bey_hms_z("not-date-time").is_none()); } } diff --git a/dateparser/src/timezone.rs b/dateparser/src/timezone.rs index 85038bb..ca9121e 100644 --- a/dateparser/src/timezone.rs +++ b/dateparser/src/timezone.rs @@ -6,23 +6,6 @@ use chrono::offset::FixedOffset; /// /// The additional `colon` may be used to parse a mandatory or optional `:` between hours and minutes, /// and should return a valid FixedOffset or `Err` when parsing fails. -/// -/// ``` -/// use chrono::prelude::*; -/// use dateparser::timezone::parse; -/// use std::error::Error; -/// -/// fn main() -> Result<(), Box> { -/// assert_eq!(parse("-0800")?, FixedOffset::west(8 * 3600)); -/// assert_eq!(parse("+10:00")?, FixedOffset::east(10 * 3600)); -/// assert_eq!(parse("PST")?, FixedOffset::west(8 * 3600)); -/// assert_eq!(parse("PDT")?, FixedOffset::west(7 * 3600)); -/// assert_eq!(parse("UTC")?, FixedOffset::west(0)); -/// assert_eq!(parse("GMT")?, FixedOffset::west(0)); -/// -/// Ok(()) -/// } -/// ``` pub fn parse(s: &str) -> Result { let offset = if s.contains(':') { parse_offset_internal(s, colon_or_space, false)? @@ -138,7 +121,7 @@ fn equals(s: &str, pattern: &str) -> bool { } /// Consumes any number (including zero) of colon or spaces. -pub fn colon_or_space(s: &str) -> Result<&str> { +fn colon_or_space(s: &str) -> Result<&str> { Ok(s.trim_start_matches(|c: char| c == ':' || c.is_whitespace())) } @@ -147,7 +130,7 @@ mod tests { use super::*; #[test] - fn test_parse() { + fn parse() { let test_cases = vec![ ("-0800", FixedOffset::west(8 * 3600)), ("+10:00", FixedOffset::east(10 * 3600)), @@ -158,7 +141,7 @@ mod tests { ]; for &(input, want) in test_cases.iter() { - assert_eq!(parse(input).unwrap(), want, "parse/{}", input) + assert_eq!(super::parse(input).unwrap(), want, "parse/{}", input) } } }