Skip to content

Commit

Permalink
Merge pull request #148 from matejsuchanek/era
Browse files Browse the repository at this point in the history
Equip YearMonthTimeParser with EraParser
  • Loading branch information
lucaswerkmeister authored Jul 15, 2021
2 parents 2e69a43 + 14dd75e commit 771b632
Show file tree
Hide file tree
Showing 2 changed files with 89 additions and 18 deletions.
64 changes: 51 additions & 13 deletions src/ValueParsers/YearMonthTimeParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
* @license GPL-2.0-or-later
* @author Addshore
* @author Thiemo Kreuz
*
* @todo match BCE dates in here
*/
class YearMonthTimeParser extends StringValueParser {

Expand All @@ -32,21 +30,29 @@ class YearMonthTimeParser extends StringValueParser {
*/
private $isoTimestampParser;

/**
* @var EraParser
*/
private $eraParser;

/**
* @see StringValueParser::__construct
*
* @param MonthNameProvider $monthNameProvider
* @param ParserOptions|null $options
* @param EraParser|null $eraParser
*/
public function __construct(
MonthNameProvider $monthNameProvider,
ParserOptions $options = null
ParserOptions $options = null,
EraParser $eraParser = null
) {
parent::__construct( $options );

$languageCode = $this->getOption( ValueParser::OPT_LANG );
$this->monthNumbers = $monthNameProvider->getMonthNumbers( $languageCode );
$this->isoTimestampParser = new IsoTimestampParser( null, $this->options );
$this->eraParser = $eraParser ?: new EraParser();
}

/**
Expand All @@ -58,40 +64,72 @@ public function __construct(
* @return TimeValue
*/
protected function stringParse( $value ) {
list( $newValue, $sign ) = $this->splitBySignAndEra( $value );

// Matches year and month separated by a separator.
// \p{L} matches letters outside the ASCII range.
$regex = '/^(-?[\d\p{L}]+)\s*?[\/\-\s.,]\s*(-?[\d\p{L}]+)$/u';
if ( !preg_match( $regex, trim( $value ), $matches ) ) {
if ( !preg_match( $regex, $newValue, $matches ) ) {
throw new ParseException( 'Failed to parse year and month', $value, self::FORMAT_NAME );
}
list( , $a, $b ) = $matches;

$aIsInt = preg_match( '/^-?\d+$/', $a );
$bIsInt = preg_match( '/^-?\d+$/', $b );
// non-empty sign indicates the era (e.g. "BCE") was specified
// don't accept a negative number as the year
$intRegex = $sign !== '' ? '/^\d+$/' : '/^-?\d+$/';
$aIsInt = preg_match( $intRegex, $a );
$bIsInt = preg_match( $intRegex, $b );

if ( $aIsInt && $bIsInt ) {
if ( $this->canBeMonth( $a ) ) {
return $this->getTimeFromYearMonth( $b, $a );
} elseif ( $this->canBeMonth( $b ) ) {
return $this->getTimeFromYearMonth( $a, $b );
// stuff like "1 234 BCE" can be interpreted as "1234 BCE"
// this is for YearTimeParser, don't interfere with it
if ( $sign !== '-' ) {
if ( $this->canBeMonth( $a ) ) {
return $this->getTimeFromYearMonth( $sign . $b, $a );
} elseif ( $this->canBeMonth( $b ) ) {
return $this->getTimeFromYearMonth( $sign . $a, $b );
}
}
} elseif ( $aIsInt ) {
$month = $this->parseMonth( $b );

if ( $month ) {
return $this->getTimeFromYearMonth( $a, $month );
return $this->getTimeFromYearMonth( $sign . $a, $month );
}
} elseif ( $bIsInt ) {
$month = $this->parseMonth( $a );

if ( $month ) {
return $this->getTimeFromYearMonth( $b, $month );
return $this->getTimeFromYearMonth( $sign . $b, $month );
}
}

throw new ParseException( 'Failed to parse year and month', $value, self::FORMAT_NAME );
}

/**
* @param string $value
*
* @return array( string $newValue, string $sign )
*/
private function splitBySignAndEra( $value ) {
$trimmedValue = trim( $value );
$init = substr( $trimmedValue, 0, 1 );
// we want to handle signs at the beginning ourselves
if ( $init === '+' || $init === '-' ) {
$newValue = $trimmedValue;
$sign = '';
} else {
list( $sign, $newValue ) = $this->eraParser->parse( $trimmedValue );
if ( $newValue === $trimmedValue ) {
// EraParser defaults to "+" but we need to indicate "unspecified era"
$sign = '';
}
}

return [ $newValue, $sign ];
}

/**
* @param string $month
*
Expand All @@ -114,7 +152,7 @@ private function parseMonth( $month ) {
* @return TimeValue
*/
private function getTimeFromYearMonth( $year, $month ) {
if ( $year[0] !== '-' ) {
if ( $year[0] !== '-' && $year[0] !== '+' ) {
$year = '+' . $year;
}

Expand Down
43 changes: 38 additions & 5 deletions tests/ValueParsers/YearMonthTimeParserTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@ public function validInputProvider() {
array( '+2016-01-00T00:00:00Z' ),
' January 2016 ' =>
array( '+2016-01-00T00:00:00Z' ),
' January 2016 CE ' =>
array( '+2016-01-00T00:00:00Z' ),
' January 2016 BCE ' =>
array( '-2016-01-00T00:00:00Z', TimeValue::PRECISION_MONTH, $julian ),

// leading zeros
'1 00001999' =>
Expand All @@ -85,8 +89,16 @@ public function validInputProvider() {
array( '+0001-01-00T00:00:00Z', TimeValue::PRECISION_MONTH, $julian ),
'1999 January' =>
array( '+1999-01-00T00:00:00Z' ),
'1999 January CE' =>
array( '+1999-01-00T00:00:00Z' ),
'1999 January BCE' =>
array( '-1999-01-00T00:00:00Z', TimeValue::PRECISION_MONTH, $julian ),
'January 1999' =>
array( '+1999-01-00T00:00:00Z' ),
'January 1999 CE' =>
array( '+1999-01-00T00:00:00Z' ),
'January 1999 BCE' =>
array( '-1999-01-00T00:00:00Z', TimeValue::PRECISION_MONTH, $julian ),
'January-1' =>
array( '+0001-01-00T00:00:00Z', TimeValue::PRECISION_MONTH, $julian ),
'JanuARY-1' =>
Expand All @@ -100,7 +112,9 @@ public function validInputProvider() {

// Unicode
'Březen 1999' => array( '+1999-03-00T00:00:00Z' ),
'Březen 1999 BCE' => array( '-1999-03-00T00:00:00Z', TimeValue::PRECISION_MONTH, $julian ),
'březen 1999' => array( '+1999-03-00T00:00:00Z' ),
'březen 1999 BCE' => array( '-1999-03-00T00:00:00Z', TimeValue::PRECISION_MONTH, $julian ),

// use different date separators
'1-1999' =>
Expand Down Expand Up @@ -170,6 +184,11 @@ public function invalidInputProvider() {
$argLists = parent::NON_VALID_CASES;

$invalid = array(
'',
' ',
'+',
'-',

// These are just wrong
'June June June',
'June June',
Expand All @@ -190,19 +209,33 @@ public function invalidInputProvider() {
'00001 1999',
'000000001 100001999',

// Dont parse stuff with separators in the year
// Possible years BCE with digit groups
'1 2 BC',
'1 23 BC',
'12 3 BC',
'12 30 BC',
'1 000 BC',
'1,000 BC',
'1 234 BCE',
'1.234 BCE',
'12 345 BCE',
'12,345 BCE',

// Don't parse stuff with separators in the year
'june 200,000,000',
'june 200.000.000',

// Not within the scope of this parser
'1 June 20000',
'20000',
'-1998',
'1998 BCE',

// BCE is not supported yet
'April 1998 BCE',
'1998 April BCE',
'1998 BCE April',
// era in conjunction with sign
'April -1998 BCE',
'April -1998 CE',
'-1998 April BCE',
'-1998 April CE',
);

foreach ( $invalid as $value ) {
Expand Down

0 comments on commit 771b632

Please sign in to comment.