diff --git a/src/ValueParsers/YearMonthTimeParser.php b/src/ValueParsers/YearMonthTimeParser.php index 3021cdc..0f712e6 100644 --- a/src/ValueParsers/YearMonthTimeParser.php +++ b/src/ValueParsers/YearMonthTimeParser.php @@ -15,8 +15,6 @@ * @license GPL-2.0-or-later * @author Addshore * @author Thiemo Kreuz - * - * @todo match BCE dates in here */ class YearMonthTimeParser extends StringValueParser { @@ -32,21 +30,29 @@ class YearMonthTimeParser extends StringValueParser { */ private $isoTimestampParser; + /** + * @var EraParser + */ + private $eraParser; + /** * @see StringValueParser::__construct * * @param MonthNameProvider $monthNameProvider * @param ParserOptions|null $options + * @param EraParser|null $eraParser */ public function __construct( MonthNameProvider $monthNameProvider, - ParserOptions $options = null + ParserOptions $options = null, + EraParser $eraParser = null ) { parent::__construct( $options ); $languageCode = $this->getOption( ValueParser::OPT_LANG ); $this->monthNumbers = $monthNameProvider->getMonthNumbers( $languageCode ); $this->isoTimestampParser = new IsoTimestampParser( null, $this->options ); + $this->eraParser = $eraParser ?: new EraParser(); } /** @@ -58,40 +64,72 @@ public function __construct( * @return TimeValue */ protected function stringParse( $value ) { + list( $newValue, $sign ) = $this->splitBySignAndEra( $value ); + // Matches year and month separated by a separator. // \p{L} matches letters outside the ASCII range. $regex = '/^(-?[\d\p{L}]+)\s*?[\/\-\s.,]\s*(-?[\d\p{L}]+)$/u'; - if ( !preg_match( $regex, trim( $value ), $matches ) ) { + if ( !preg_match( $regex, $newValue, $matches ) ) { throw new ParseException( 'Failed to parse year and month', $value, self::FORMAT_NAME ); } list( , $a, $b ) = $matches; - $aIsInt = preg_match( '/^-?\d+$/', $a ); - $bIsInt = preg_match( '/^-?\d+$/', $b ); + // non-empty sign indicates the era (e.g. "BCE") was specified + // don't accept a negative number as the year + $intRegex = $sign !== '' ? '/^\d+$/' : '/^-?\d+$/'; + $aIsInt = preg_match( $intRegex, $a ); + $bIsInt = preg_match( $intRegex, $b ); if ( $aIsInt && $bIsInt ) { - if ( $this->canBeMonth( $a ) ) { - return $this->getTimeFromYearMonth( $b, $a ); - } elseif ( $this->canBeMonth( $b ) ) { - return $this->getTimeFromYearMonth( $a, $b ); + // stuff like "1 234 BCE" can be interpreted as "1234 BCE" + // this is for YearTimeParser, don't interfere with it + if ( $sign !== '-' ) { + if ( $this->canBeMonth( $a ) ) { + return $this->getTimeFromYearMonth( $sign . $b, $a ); + } elseif ( $this->canBeMonth( $b ) ) { + return $this->getTimeFromYearMonth( $sign . $a, $b ); + } } } elseif ( $aIsInt ) { $month = $this->parseMonth( $b ); if ( $month ) { - return $this->getTimeFromYearMonth( $a, $month ); + return $this->getTimeFromYearMonth( $sign . $a, $month ); } } elseif ( $bIsInt ) { $month = $this->parseMonth( $a ); if ( $month ) { - return $this->getTimeFromYearMonth( $b, $month ); + return $this->getTimeFromYearMonth( $sign . $b, $month ); } } throw new ParseException( 'Failed to parse year and month', $value, self::FORMAT_NAME ); } + /** + * @param string $value + * + * @return array( string $newValue, string $sign ) + */ + private function splitBySignAndEra( $value ) { + $trimmedValue = trim( $value ); + $init = substr( $trimmedValue, 0, 1 ); + // we want to handle signs at the beginning ourselves + if ( $init === '+' || $init === '-' ) { + $newValue = $trimmedValue; + $sign = ''; + } else { + list( $sign, $newValue ) = $this->eraParser->parse( $trimmedValue ); + if ( $newValue === $trimmedValue ) { + // EraParser defaults to "+" but we need to indicate "unspecified era" + $sign = ''; + } + } + + return [ $newValue, $sign ]; + } + /** * @param string $month * @@ -114,7 +152,7 @@ private function parseMonth( $month ) { * @return TimeValue */ private function getTimeFromYearMonth( $year, $month ) { - if ( $year[0] !== '-' ) { + if ( $year[0] !== '-' && $year[0] !== '+' ) { $year = '+' . $year; } diff --git a/tests/ValueParsers/YearMonthTimeParserTest.php b/tests/ValueParsers/YearMonthTimeParserTest.php index 03d6c0b..b1c3460 100644 --- a/tests/ValueParsers/YearMonthTimeParserTest.php +++ b/tests/ValueParsers/YearMonthTimeParserTest.php @@ -59,6 +59,10 @@ public function validInputProvider() { array( '+2016-01-00T00:00:00Z' ), ' January 2016 ' => array( '+2016-01-00T00:00:00Z' ), + ' January 2016 CE ' => + array( '+2016-01-00T00:00:00Z' ), + ' January 2016 BCE ' => + array( '-2016-01-00T00:00:00Z', TimeValue::PRECISION_MONTH, $julian ), // leading zeros '1 00001999' => @@ -85,8 +89,16 @@ public function validInputProvider() { array( '+0001-01-00T00:00:00Z', TimeValue::PRECISION_MONTH, $julian ), '1999 January' => array( '+1999-01-00T00:00:00Z' ), + '1999 January CE' => + array( '+1999-01-00T00:00:00Z' ), + '1999 January BCE' => + array( '-1999-01-00T00:00:00Z', TimeValue::PRECISION_MONTH, $julian ), 'January 1999' => array( '+1999-01-00T00:00:00Z' ), + 'January 1999 CE' => + array( '+1999-01-00T00:00:00Z' ), + 'January 1999 BCE' => + array( '-1999-01-00T00:00:00Z', TimeValue::PRECISION_MONTH, $julian ), 'January-1' => array( '+0001-01-00T00:00:00Z', TimeValue::PRECISION_MONTH, $julian ), 'JanuARY-1' => @@ -100,7 +112,9 @@ public function validInputProvider() { // Unicode 'Březen 1999' => array( '+1999-03-00T00:00:00Z' ), + 'Březen 1999 BCE' => array( '-1999-03-00T00:00:00Z', TimeValue::PRECISION_MONTH, $julian ), 'březen 1999' => array( '+1999-03-00T00:00:00Z' ), + 'březen 1999 BCE' => array( '-1999-03-00T00:00:00Z', TimeValue::PRECISION_MONTH, $julian ), // use different date separators '1-1999' => @@ -170,6 +184,11 @@ public function invalidInputProvider() { $argLists = parent::NON_VALID_CASES; $invalid = array( + '', + ' ', + '+', + '-', + // These are just wrong 'June June June', 'June June', @@ -190,7 +209,19 @@ public function invalidInputProvider() { '00001 1999', '000000001 100001999', - // Dont parse stuff with separators in the year + // Possible years BCE with digit groups + '1 2 BC', + '1 23 BC', + '12 3 BC', + '12 30 BC', + '1 000 BC', + '1,000 BC', + '1 234 BCE', + '1.234 BCE', + '12 345 BCE', + '12,345 BCE', + + // Don't parse stuff with separators in the year 'june 200,000,000', 'june 200.000.000', @@ -198,11 +229,13 @@ public function invalidInputProvider() { '1 June 20000', '20000', '-1998', + '1998 BCE', - // BCE is not supported yet - 'April 1998 BCE', - '1998 April BCE', - '1998 BCE April', + // era in conjunction with sign + 'April -1998 BCE', + 'April -1998 CE', + '-1998 April BCE', + '-1998 April CE', ); foreach ( $invalid as $value ) {