From 3fa72a357b4242ec3f252df77a90387fcc528d9d Mon Sep 17 00:00:00 2001
From: Guillaume Girol <symphorien+git@xlumurb.eu>
Date: Fri, 22 Nov 2024 12:00:00 +0000
Subject: [PATCH] fix parsing dates with months abbreviated in some locales

Current code first attempts to detect english abbreviations for %b and
only if the string does not start with one of the months, tries
the locale specific names.

This is incorrect for some locales.
For example in fr_FR %b for november is `nov.`. Parsing `nov. 29` as `%b
%d` would fail because we wrongly assume that the month is just `nov`
and not `nov.`. Then we attempt to parse `. 29` as ` %d`.

The correct solution would be to try english and if later the string
does not match to backtrack, but this does not match the existing flow
of the code.

Instead this restricts the fast path to matching full words to the
english locale, no only prefixes.

Fixes: https://github.com/tstack/lnav/issues/1086
---
 src/ptimec.hh                  | 12 +++++++++++-
 test/test_date_time_scanner.cc |  4 ++++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/src/ptimec.hh b/src/ptimec.hh
index e7aaf6ef5d..00e8821805 100644
--- a/src/ptimec.hh
+++ b/src/ptimec.hh
@@ -95,7 +95,17 @@ bool ptime_b_slow(struct exttm* dst,
 inline bool
 ptime_b(struct exttm* dst, const char* str, off_t& off_inout, ssize_t len)
 {
-    if (off_inout + 3 < len) {
+    // fast path to detect english abbreviated months
+    //
+    // only detect english abbreviated months if they end at a word boundary.
+    // if the abbreviated month in the current locale is longer than 3 letters,
+    // and starts with the same letters as an english locale month abbreviation,
+    // then the computation of off_inout is incorrect.
+    //
+    // Ex: in fr_FR november is `nov.`. Parsing `nov. 29` as `%b %d` fails if
+    // this fast path is taken as later we will attempt to parse `. 29` as
+    // ` %d`.
+    if (off_inout + 3 < len && isspace(str[off_inout+3])) {
         auto month_start = (unsigned char*) &str[off_inout];
         uint32_t month_int = ABR_TO_INT(month_start[0] & ~0x20UL,
                                         month_start[1] & ~0x20UL,
diff --git a/test/test_date_time_scanner.cc b/test/test_date_time_scanner.cc
index a868222c92..06e9f73ef1 100644
--- a/test/test_date_time_scanner.cc
+++ b/test/test_date_time_scanner.cc
@@ -203,6 +203,7 @@ TEST_CASE("date_time_scanner")
     {
         const char* en_date = "Jan  1 12:00:00";
         const char* fr_date = "août 19 11:08:37";
+        const char* fr_date2 = "nov. 29 20:23:37";
         struct timeval en_tv, fr_tv;
         struct exttm en_tm, fr_tm;
         date_time_scanner dts;
@@ -213,6 +214,9 @@ TEST_CASE("date_time_scanner")
             dts.clear();
             assert(dts.scan(fr_date, strlen(fr_date), nullptr, &fr_tm, fr_tv)
                    != nullptr);
+            dts.clear();
+            assert(dts.scan(fr_date2, strlen(fr_date), nullptr, &fr_tm, fr_tv)
+                   != nullptr);
         }
     }