From 3fa72a357b4242ec3f252df77a90387fcc528d9d Mon Sep 17 00:00:00 2001 From: Guillaume Girol Date: Fri, 22 Nov 2024 12:00:00 +0000 Subject: [PATCH] fix parsing dates with months abbreviated in some locales Current code first attempts to detect english abbreviations for %b and only if the string does not start with one of the months, tries the locale specific names. This is incorrect for some locales. For example in fr_FR %b for november is `nov.`. Parsing `nov. 29` as `%b %d` would fail because we wrongly assume that the month is just `nov` and not `nov.`. Then we attempt to parse `. 29` as ` %d`. The correct solution would be to try english and if later the string does not match to backtrack, but this does not match the existing flow of the code. Instead this restricts the fast path to matching full words to the english locale, no only prefixes. Fixes: https://github.com/tstack/lnav/issues/1086 --- src/ptimec.hh | 12 +++++++++++- test/test_date_time_scanner.cc | 4 ++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/src/ptimec.hh b/src/ptimec.hh index e7aaf6ef5d..00e8821805 100644 --- a/src/ptimec.hh +++ b/src/ptimec.hh @@ -95,7 +95,17 @@ bool ptime_b_slow(struct exttm* dst, inline bool ptime_b(struct exttm* dst, const char* str, off_t& off_inout, ssize_t len) { - if (off_inout + 3 < len) { + // fast path to detect english abbreviated months + // + // only detect english abbreviated months if they end at a word boundary. + // if the abbreviated month in the current locale is longer than 3 letters, + // and starts with the same letters as an english locale month abbreviation, + // then the computation of off_inout is incorrect. + // + // Ex: in fr_FR november is `nov.`. Parsing `nov. 29` as `%b %d` fails if + // this fast path is taken as later we will attempt to parse `. 29` as + // ` %d`. + if (off_inout + 3 < len && isspace(str[off_inout+3])) { auto month_start = (unsigned char*) &str[off_inout]; uint32_t month_int = ABR_TO_INT(month_start[0] & ~0x20UL, month_start[1] & ~0x20UL, diff --git a/test/test_date_time_scanner.cc b/test/test_date_time_scanner.cc index a868222c92..06e9f73ef1 100644 --- a/test/test_date_time_scanner.cc +++ b/test/test_date_time_scanner.cc @@ -203,6 +203,7 @@ TEST_CASE("date_time_scanner") { const char* en_date = "Jan 1 12:00:00"; const char* fr_date = "août 19 11:08:37"; + const char* fr_date2 = "nov. 29 20:23:37"; struct timeval en_tv, fr_tv; struct exttm en_tm, fr_tm; date_time_scanner dts; @@ -213,6 +214,9 @@ TEST_CASE("date_time_scanner") dts.clear(); assert(dts.scan(fr_date, strlen(fr_date), nullptr, &fr_tm, fr_tv) != nullptr); + dts.clear(); + assert(dts.scan(fr_date2, strlen(fr_date), nullptr, &fr_tm, fr_tv) + != nullptr); } }