From 55de927708bc3ce019da330aa9a26a9043b1b748 Mon Sep 17 00:00:00 2001 From: Jon Stovell Date: Mon, 30 Dec 2024 14:18:38 -0700 Subject: [PATCH 1/2] Clamps timestamps to accepted range in SMF\Time methods Signed-off-by: Jon Stovell --- Sources/Time.php | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Sources/Time.php b/Sources/Time.php index 0b028ec262..19cc9012c2 100644 --- a/Sources/Time.php +++ b/Sources/Time.php @@ -196,6 +196,10 @@ public function __construct(string $datetime = 'now', \DateTimeZone|string|null $datetime = self::sanitize($datetime); + if (str_starts_with($datetime, '@')) { + $datetime = '@' . min(max((int) ltrim($datetime, '@'), PHP_INT_MIN), PHP_INT_MAX); + } + if ( // If $datetime was a Unix timestamp, set the time zone to the one // we were told to use. Honestly, it's a mystery why the \DateTime From 8b1a6ddc29dba730e8ee5cf2a77a395614efb8e1 Mon Sep 17 00:00:00 2001 From: Jon Stovell Date: Mon, 9 Dec 2024 14:33:11 -0700 Subject: [PATCH 2/2] Improves handling of the [time] BBCode Signed-off-by: Jon Stovell --- Sources/Msg.php | 26 ++- Sources/Parsers/BBCodeParser.php | 351 ++++++++++++++++++++++++++++++- 2 files changed, 369 insertions(+), 8 deletions(-) diff --git a/Sources/Msg.php b/Sources/Msg.php index 5dcf71f3e0..29ef811f62 100644 --- a/Sources/Msg.php +++ b/Sources/Msg.php @@ -758,9 +758,23 @@ function ($m) { // Let's look at the time tags... $message = preg_replace_callback( - '~\[time(?:=(absolute))*\](.+?)\[/time\]~i', - function ($m) { - return '[time]' . (is_numeric("{$m[2]}") || @strtotime("{$m[2]}") == 0 ? "{$m[2]}" : strtotime("{$m[2]}") - ("{$m[1]}" == 'absolute' ? 0 : ((Config::$modSettings['time_offset'] + User::$me->time_offset) * 3600))) . '[/time]'; + '~\[time(?:=([^\\]]*))?\](.+?)\[/time\]~i', + function ($matches) { + return preg_replace( + [ + '~^]*\bdatetime="([^"]+)"[^>]*>(.*)$~', + '~^]*>.*$~', + ], + [ + // If it parsed successfully, insert the resolved datetime value. + // This ensures that "[time]today[/time]" ends up resolving to + // the date the post was written, not the date it is being read. + '[time=$1]$2[/time]', + // If it didn't parse successfully, remove the BBC entirely. + $matches[2], + ], + Parser::transform($matches[0], Parser::INPUT_BBC), + ); }, $message, ); @@ -943,10 +957,14 @@ function ($matches) { } // Attempt to un-parse the time to something less awful. + // This form will never be created by Msg::preparsecode() in SMF 3.0+ + // but it might be present in old data. $message = preg_replace_callback( '~\[time\](\d{0,10})\[/time\]~i', function ($matches) { - return '[time]' . Time::create('@' . $matches[1])->setTimezone(new \DateTimeZone(User::getTimezone()))->format(Time::getDateFormat()) . '[/time]'; + $time = Time::create('@' . $matches[1]); + + return '[time=' . $time->format('Y-m-d\TH:i:sP') . ']' . $time->format(null, false) . '[/time]'; }, $message, ); diff --git a/Sources/Parsers/BBCodeParser.php b/Sources/Parsers/BBCodeParser.php index 4dc7096bd2..c43f5f6919 100644 --- a/Sources/Parsers/BBCodeParser.php +++ b/Sources/Parsers/BBCodeParser.php @@ -25,6 +25,7 @@ use SMF\Sapi; use SMF\Theme; use SMF\Time; +use SMF\TimeInterval; use SMF\Url; use SMF\Utils; @@ -718,10 +719,16 @@ class BBCodeParser extends Parser 'disabled_before' => '', 'disabled_after' => '', ], + [ + 'tag' => 'time', + 'type' => 'unparsed_equals_content', + 'content' => '', + 'validate' => __CLASS__ . '::timeValidate', + ], [ 'tag' => 'time', 'type' => 'unparsed_content', - 'content' => '$1', + 'content' => '', 'validate' => __CLASS__ . '::timeValidate', ], [ @@ -2104,11 +2111,347 @@ public static function sizeValidate(array &$tag, array|string &$data, array $dis */ public static function timeValidate(array &$tag, array|string &$data, array $disabled, array $params): void { - if (is_numeric($data)) { - $data = Time::create('@' . $data)->format(); + // Data may be an array or a string, depending on the version of the BBC. + if (is_array($data)) { + $text = $data[0]; + $date = $data[1]; + } else { + $text = $data; + $date = $data; + } + + // Get sanitized versions of the data. + $sanitized_text = ltrim(Time::sanitize(Time::convertToEnglish($text)), '@'); + $sanitized_date = ltrim(Time::sanitize(Time::convertToEnglish($date)), '@'); + + // Special case if $date is actually a duration string. + if (preg_match('/^P((\d+D)(T(\d+[HMS])+)?|T(\d+[HMS])+)$/', $date)) { + if ($tag['type'] === 'unparsed_content') { + $tag['content'] = ''; + } else { + $data = [$text, $date]; + } + + return; + } + + if (preg_match('/^\d+\.?\d*\s+(year|month|week|day|hour|minute|second)s?$/', $sanitized_date)) { + try { + $duration = TimeInterval::createFromDateInterval(\DateInterval::createFromDateString($sanitized_date)); + + if ($tag['type'] === 'unparsed_content') { + $tag['content'] = ''; + } else { + $data = [$text, (string) $duration]; + } + + return; + } catch (\Throwable $e) { + } + } + + // Special handling for [time=absolute]...[/time] + if ($date === 'absolute') { + if ($sanitized_text === '') { + $tag['content'] = '$1'; + + return; + } + + $time = Time::create((is_numeric($sanitized_text) ? '@' : '') . $sanitized_text); + + $when = !empty((int) $time->format('u')) ? $time->format('Y-m-d\TH:i:s.uP') : $time->format('Y-m-d\TH:i:sP'); + + if (is_numeric(ltrim($text, '@'))) { + $text = Utils::normalizeSpaces(Utils::entityDecode($time->format(null, false)), true, true, ['collapse_hspace' => true]); + } + } + + // If there is a valid date value already, use it. + if (!isset($when) && (is_numeric($sanitized_date) || (!empty($sanitized_date) && strtotime($sanitized_date) !== false))) { + $when = $sanitized_date; + } + + // Special handling for month and day with no year. + if (!isset($when) && preg_match('/^(0?\d|1[0-2])-(0?\d|[12]\d|3[01])$/', $date, $matches)) { + $when = sprintf('%1$02d-%2$02d', $matches[1], $matches[2]); + + if ($tag['type'] === 'unparsed_content') { + $tag['content'] = ''; + } else { + $data = [$text, $when]; + } + + return; + } + + // If we have a text value, try that. + if (!isset($when) && (is_numeric($sanitized_text) || (!empty($sanitized_text) && strtotime($sanitized_text) !== false))) { + $when = $sanitized_text; + } + + // PHP's date parser gets confused by AM/PM combined with fractional seconds. + if ( + !isset($when) + && preg_match('/[AaPp]\.?[Mm]\.?/', $sanitized_text, $matches) + && preg_match('/\d\.\d+/', $sanitized_text) + ) { + $parsed = date_parse($sanitized_text); + + if (is_int($parsed['hour']) && is_int($parsed['minute'])) { + if ( + $parsed['hour'] === 12 + && str_starts_with(strtolower($matches[0]), 'a') + ) { + $sanitized_text = preg_replace( + '~(? 0 + && str_starts_with(strtolower($matches[0]), 'p') + ) { + $sanitized_text = preg_replace( + '~(?$1'; + // Found nothing parsable. + if (!isset($when)) { + $tag['content'] = '$1'; + + return; + } + + // We have a (probably) parsable value, so try to work with it. + try { + if (is_numeric($when)) { + $time = Time::create('@' . $when); + + // Get the formatted value for the datetime attribute. + $when = !empty((int) $time->format('u')) ? $time->format('Y-m-d\TH:i:s.uP') : $time->format('Y-m-d\TH:i:sP'); + + // Replace the raw Unix timestamp with something more pleasant. + if ($tag['type'] === 'unparsed_content') { + $text = Utils::normalizeSpaces(Utils::entityDecode($time->format(null, false)), true, true, ['collapse_hspace' => true]); + } + } else { + // Parse the date. + $parsed = date_parse($when); + + // Is this time implied to be a 12 hour post-meridian time? + // E.g.: "today at 2:30" with no AM or PM usually means 2:30 PM + // in locales that use 12 hour time. + if ( + // Time is between 1:00 and 7:59. + is_int($parsed['hour']) + && $parsed['hour'] > 0 + && $parsed['hour'] < 8 + // No explicit AM or PM was given. + && !preg_match('/[AaPp]\.?[Mm]\.?/', $when) + // The hour was given with a single digit (no preceeding zero). + && !preg_match('/(?format('Y'); + + if ($word !== 'year') { + $parsed['month'] = (int) $time->format('m'); + + if ($word !== 'month') { + $parsed['day'] = (int) $time->format('d'); + } + } + + if ( + $parsed['hour'] === 0 + && $parsed['minute'] === 0 + && $parsed['second'] === 0 + && $parsed['fraction'] === 0.0 + ) { + $parsed['hour'] = false; + $parsed['minute'] = false; + $parsed['second'] = false; + $parsed['fraction'] = false; + } + + if ($word !== 'year' && $word !== 'month') { + break; + } + } + } + } + + // If any time parts are populated, populate any missing ones. + if ( + !is_bool($parsed['hour']) + || !is_bool($parsed['minute']) + || !is_bool($parsed['second']) + || !is_bool($parsed['fraction']) + || $parsed['is_localtime'] + ) { + $parsed['hour'] = (int) $parsed['hour']; + $parsed['minute'] = (int) $parsed['minute']; + $parsed['second'] = (int) $parsed['second']; + + // If we have time parts and at least some date parts, + // populate all the date parts. + if ( + $parsed['year'] !== false + || $parsed['month'] !== false + || $parsed['day'] !== false + ) { + $temp = $time; + + if ($time < date_create('now')) { + if ($parsed['year'] === false) { + $temp->modify('+1 year'); + } elseif ($parsed['month'] === false) { + $temp->modify('+1 month'); + } elseif ($parsed['day'] === false) { + $temp->modify('+1 day'); + } + } + + $parsed['year'] = (int) ($parsed['year'] || $temp->format('Y')); + $parsed['month'] = (int) ($parsed['month'] || $temp->format('m')); + $parsed['day'] = (int) ($parsed['day'] || $temp->format('d')); + } + } + + // Special case for a year and month with no day of the month. + if ( + $parsed['year'] !== false + && $parsed['month'] !== false + && $parsed['day'] === 1 + && $parsed['hour'] === false + && $parsed['minute'] === false + && $parsed['second'] === false + && $parsed['fraction'] === false + && $parsed['is_localtime'] === false + && !preg_match('/sun|mon|tue|wed|thu|fri|sat/i', $when) + ) { + $month_names = [ + 1 => 'jan(uary)?', + 2 => 'feb(ruary)?', + 3 => 'mar(ch)?', + 4 => 'apr(il)?', + 5 => 'may', + 6 => 'jun(e)?', + 7 => 'jul(y)?', + 8 => 'aug(ust)?', + 9 => 'sep(t(ember)?)?', + 10 => 'oct(ober)?', + 11 => 'nov(ember)?', + 12 => 'dec(ember)?', + ]; + + $temp = str_replace((string) $parsed['year'], '', $when); + + if (preg_match('/' . $month_names[$parsed['month']] . '/', $temp)) { + $temp = preg_replace('/' . $month_names[$parsed['month']] . '/', '', $temp); + } else { + $temp = preg_replace('/0?' . $parsed['month'] . '/', '', $temp, 1); + } + + if (!str_contains((string) $parsed['day'], $when)) { + $parsed['day'] = false; + } + } + + // Now figure out the appropriate format for the datetime attribute. + $key_chars = [ + 'year' => 'Y', + 'month' => 'm', + 'day' => 'd', + 'hour' => 'H', + 'minute' => 'i', + 'second' => 's', + 'fraction' => 'u', + 'is_localtime' => 'P', + ]; + + foreach ($key_chars as $key => $char) { + $fmt[$char] = $parsed[$key] === false ? false : $char; + } + + $fmt_string = implode('-', array_filter( + array_slice($fmt, 0, 3), + fn($arg) => $arg !== false, + )); + + if ($fmt_string !== '' && $fmt['H'] !== false) { + $fmt_string .= '\\T'; + } + + $fmt_string .= implode(':', array_filter( + array_slice($fmt, 3, 3), + fn($arg) => $arg !== false, + )); + + if ($fmt['u'] !== false && !empty((int) $time->format('u'))) { + $fmt_string .= (empty($fmt_string) ? '0' : '') . '.u'; + } + + if ($fmt_string !== '' && $fmt['P'] !== false) { + $fmt_string .= 'P'; + } + + if ($fmt_string === '') { + $fmt_string = 'Y-m-d\TH:i:sP'; + } + + // Get the formatted value for the datetime attribute. + $when = $time->format($fmt_string); + } + + if ($tag['type'] === 'unparsed_content') { + $tag['content'] = ''; + $data = $text; + } else { + $data = [$text, $when]; + } + } catch (\Throwable $e) { + $tag['content'] = '$1'; + } } /*******************