-
-
Notifications
You must be signed in to change notification settings - Fork 60
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Tokenizer/PHP: add tests for heredoc/nowdoc tokenization
The PHP tokenizer contains logic to: * Retokenize the start/end tokens for nowdocs from `T_(START|END)_HEREDOC` to `T_(START|END)_NOWDOC`; * Retokenize the _contents_ of a heredoc/nowdoc to `T_HEREDOC`/`T_NOWDOC` tokens. * Retokenize the start token from `T_START_(HERE|NOW)DOC` to `T_STRING` if the heredoc/nowdoc is unclosed; * Ensure that each line in the contents has its own token. This commit adds tests safeguarding and documenting this part of the tokenizer.
- Loading branch information
Showing
4 changed files
with
304 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
<?php | ||
|
||
/* testHeredocSingleLine */ | ||
echo <<<EOD | ||
Some $var text | ||
EOD; | ||
|
||
/* testNowdocSingleLine */ | ||
echo <<<'MARKER' | ||
Some text | ||
MARKER; | ||
|
||
/* testHeredocMultiLine */ | ||
echo <<<"😬" | ||
Lorum ipsum | ||
Some $var text | ||
dolor sit amet | ||
😬; | ||
|
||
/* testNowdocMultiLine */ | ||
echo <<<'multi_line' | ||
Lorum ipsum | ||
Some text | ||
dolor sit amet | ||
multi_line; | ||
|
||
/* testHeredocEndsOnBlankLine */ | ||
echo <<<EOD | ||
Lorum ipsum | ||
dolor sit amet | ||
EOD; | ||
|
||
/* testNowdocEndsOnBlankLine */ | ||
echo <<<'EOD' | ||
Lorum ipsum | ||
dolor sit amet | ||
|
||
EOD; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,213 @@ | ||
<?php | ||
/** | ||
* Tests the tokenization for heredoc/nowdoc constructs. | ||
* | ||
* @author Juliette Reinders Folmer <[email protected]> | ||
* @copyright 2024 PHPCSStandards and contributors | ||
* @license https://github.com/PHPCSStandards/PHP_CodeSniffer/blob/master/licence.txt BSD Licence | ||
*/ | ||
|
||
namespace PHP_CodeSniffer\Tests\Core\Tokenizer\PHP; | ||
|
||
use PHP_CodeSniffer\Tests\Core\Tokenizer\AbstractTokenizerTestCase; | ||
use PHP_CodeSniffer\Util\Tokens; | ||
|
||
/** | ||
* Tests the tokenization for heredoc/nowdoc constructs. | ||
* | ||
* Verifies that: | ||
* - Nowdoc opener/closers are retokenized from `T_[START_|END_]HEREDOC` to `T_[START_|END_]NOWDOC`. | ||
* - The contents of the heredoc/nowdoc is tokenized as `T_HEREDOC`/`T_NOWDOC`. | ||
* - Each line of the contents has its own token, which includes the new line char. | ||
* | ||
* @covers PHP_CodeSniffer\Tokenizers\PHP::tokenize | ||
*/ | ||
final class HeredocNowdocTest extends AbstractTokenizerTestCase | ||
{ | ||
|
||
|
||
/** | ||
* Verify tokenization a heredoc construct. | ||
* | ||
* @phpcs:disable Squiz.Arrays.ArrayDeclaration.SpaceBeforeDoubleArrow -- Readability is better with alignment. | ||
* | ||
* @return void | ||
*/ | ||
public function testHeredocSingleLine() | ||
{ | ||
$expectedSequence = [ | ||
[T_START_HEREDOC => '<<<EOD'."\n"], | ||
[T_HEREDOC => 'Some $var text'."\n"], | ||
[T_END_HEREDOC => 'EOD'], | ||
]; | ||
|
||
$target = $this->getTargetToken('/* '.__FUNCTION__.' */', T_START_HEREDOC); | ||
|
||
$this->checkTokenSequence($target, $expectedSequence); | ||
|
||
}//end testHeredocSingleLine() | ||
|
||
|
||
/** | ||
* Verify tokenization a nowdoc construct. | ||
* | ||
* @phpcs:disable Squiz.Arrays.ArrayDeclaration.SpaceBeforeDoubleArrow -- Readability is better with alignment. | ||
* | ||
* @return void | ||
*/ | ||
public function testNowdocSingleLine() | ||
{ | ||
$expectedSequence = [ | ||
[T_START_NOWDOC => "<<<'MARKER'\n"], | ||
[T_NOWDOC => 'Some text'."\n"], | ||
[T_END_NOWDOC => 'MARKER'], | ||
]; | ||
|
||
$target = $this->getTargetToken('/* '.__FUNCTION__.' */', T_START_NOWDOC); | ||
|
||
$this->checkTokenSequence($target, $expectedSequence); | ||
|
||
}//end testNowdocSingleLine() | ||
|
||
|
||
/** | ||
* Verify tokenization a multiline heredoc construct. | ||
* | ||
* @phpcs:disable Squiz.Arrays.ArrayDeclaration.SpaceBeforeDoubleArrow -- Readability is better with alignment. | ||
* | ||
* @return void | ||
*/ | ||
public function testHeredocMultiLine() | ||
{ | ||
$expectedSequence = [ | ||
[T_START_HEREDOC => '<<<"😬"'."\n"], | ||
[T_HEREDOC => 'Lorum ipsum'."\n"], | ||
[T_HEREDOC => 'Some $var text'."\n"], | ||
[T_HEREDOC => 'dolor sit amet'."\n"], | ||
[T_END_HEREDOC => '😬'], | ||
]; | ||
|
||
$target = $this->getTargetToken('/* '.__FUNCTION__.' */', T_START_HEREDOC); | ||
|
||
$this->checkTokenSequence($target, $expectedSequence); | ||
|
||
}//end testHeredocMultiLine() | ||
|
||
|
||
/** | ||
* Verify tokenization a multiline testNowdocSingleLine construct. | ||
* | ||
* @phpcs:disable Squiz.Arrays.ArrayDeclaration.SpaceBeforeDoubleArrow -- Readability is better with alignment. | ||
* | ||
* @return void | ||
*/ | ||
public function testNowdocMultiLine() | ||
{ | ||
$expectedSequence = [ | ||
[T_START_NOWDOC => "<<<'multi_line'\n"], | ||
[T_NOWDOC => 'Lorum ipsum'."\n"], | ||
[T_NOWDOC => 'Some text'."\n"], | ||
[T_NOWDOC => 'dolor sit amet'."\n"], | ||
[T_END_NOWDOC => 'multi_line'], | ||
]; | ||
|
||
$target = $this->getTargetToken('/* '.__FUNCTION__.' */', T_START_NOWDOC); | ||
|
||
$this->checkTokenSequence($target, $expectedSequence); | ||
|
||
}//end testNowdocMultiLine() | ||
|
||
|
||
/** | ||
* Verify tokenization a multiline heredoc construct. | ||
* | ||
* @phpcs:disable Squiz.Arrays.ArrayDeclaration.SpaceBeforeDoubleArrow -- Readability is better with alignment. | ||
* | ||
* @return void | ||
*/ | ||
public function testHeredocEndsOnBlankLine() | ||
{ | ||
$expectedSequence = [ | ||
[T_START_HEREDOC => '<<<EOD'."\n"], | ||
[T_HEREDOC => 'Lorum ipsum'."\n"], | ||
[T_HEREDOC => 'dolor sit amet'."\n"], | ||
[T_HEREDOC => "\n"], | ||
[T_END_HEREDOC => 'EOD'], | ||
]; | ||
|
||
$target = $this->getTargetToken('/* '.__FUNCTION__.' */', T_START_HEREDOC); | ||
|
||
$this->checkTokenSequence($target, $expectedSequence); | ||
|
||
}//end testHeredocEndsOnBlankLine() | ||
|
||
|
||
/** | ||
* Verify tokenization a multiline testNowdocSingleLine construct. | ||
* | ||
* @phpcs:disable Squiz.Arrays.ArrayDeclaration.SpaceBeforeDoubleArrow -- Readability is better with alignment. | ||
* | ||
* @return void | ||
*/ | ||
public function testNowdocEndsOnBlankLine() | ||
{ | ||
$expectedSequence = [ | ||
[T_START_NOWDOC => "<<<'EOD'\n"], | ||
[T_NOWDOC => 'Lorum ipsum'."\n"], | ||
[T_NOWDOC => 'dolor sit amet'."\n"], | ||
[T_NOWDOC => "\n"], | ||
[T_END_NOWDOC => 'EOD'], | ||
]; | ||
|
||
$target = $this->getTargetToken('/* '.__FUNCTION__.' */', T_START_NOWDOC); | ||
|
||
$this->checkTokenSequence($target, $expectedSequence); | ||
|
||
}//end testNowdocEndsOnBlankLine() | ||
|
||
|
||
/** | ||
* Test helper. Check a token sequence complies with an expected token sequence. | ||
* | ||
* @param int $startPtr The position in the file to start checking from. | ||
* @param array<array<int|string, string>> $expectedSequence The consecutive token constants and their contents to expect. | ||
* | ||
* @return void | ||
*/ | ||
private function checkTokenSequence($startPtr, array $expectedSequence) | ||
{ | ||
$tokens = $this->phpcsFile->getTokens(); | ||
|
||
$sequenceKey = 0; | ||
$sequenceCount = count($expectedSequence); | ||
|
||
for ($i = $startPtr; $sequenceKey < $sequenceCount; $i++, $sequenceKey++) { | ||
$currentItem = $expectedSequence[$sequenceKey]; | ||
$expectedCode = key($currentItem); | ||
$expectedType = Tokens::tokenName($expectedCode); | ||
$expectedContent = current($currentItem); | ||
$errorMsgSuffix = PHP_EOL.'(StackPtr: '.$i.' | Position in sequence: '.$sequenceKey.' | Expected: '.$expectedType.')'; | ||
|
||
$this->assertSame( | ||
$expectedCode, | ||
$tokens[$i]['code'], | ||
'Token tokenized as '.Tokens::tokenName($tokens[$i]['code']).', not '.$expectedType.' (code)'.$errorMsgSuffix | ||
); | ||
|
||
$this->assertSame( | ||
$expectedType, | ||
$tokens[$i]['type'], | ||
'Token tokenized as '.$tokens[$i]['type'].', not '.$expectedType.' (type)'.$errorMsgSuffix | ||
); | ||
|
||
$this->assertSame( | ||
$expectedContent, | ||
$tokens[$i]['content'], | ||
'Token content did not match expectations'.$errorMsgSuffix | ||
); | ||
}//end for | ||
|
||
}//end checkTokenSequence() | ||
|
||
|
||
}//end class |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
<?php | ||
|
||
// This is an intentional parse error. This test should be the only test in the file! | ||
// NOTE: this is NOT a _real_ merge conflict, but a valid test. | ||
|
||
/* testUnclosedHeredoc */ | ||
<<<<<<< HEAD | ||
$a = 10; | ||
======= | ||
$a = 20; | ||
>>>>>>> master |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
<?php | ||
/** | ||
* Tests the tokenization for an unclosed heredoc construct. | ||
* | ||
* @author Juliette Reinders Folmer <[email protected]> | ||
* @copyright 2024 PHPCSStandards and contributors | ||
* @license https://github.com/PHPCSStandards/PHP_CodeSniffer/blob/master/licence.txt BSD Licence | ||
*/ | ||
|
||
namespace PHP_CodeSniffer\Tests\Core\Tokenizer\PHP; | ||
|
||
use PHP_CodeSniffer\Tests\Core\Tokenizer\AbstractTokenizerTestCase; | ||
|
||
/** | ||
* Tests the tokenization for an unclosed heredoc construct. | ||
* | ||
* @covers PHP_CodeSniffer\Tokenizers\PHP::tokenize | ||
*/ | ||
final class HeredocParseErrorTest extends AbstractTokenizerTestCase | ||
{ | ||
|
||
|
||
/** | ||
* Verify that a heredoc (and nowdoc) start token is retokenized to T_STRING if no closer is found. | ||
* | ||
* @return void | ||
*/ | ||
public function testMergeConflict() | ||
{ | ||
$tokens = $this->phpcsFile->getTokens(); | ||
|
||
$token = $this->getTargetToken('/* testUnclosedHeredoc */', [T_START_HEREDOC, T_STRING], '<<< HEAD'."\n"); | ||
$tokenArray = $tokens[$token]; | ||
|
||
$this->assertSame(T_STRING, $tokenArray['code'], 'Token tokenized as '.$tokenArray['type'].', not T_START_HEREDOC (code)'); | ||
$this->assertSame('T_STRING', $tokenArray['type'], 'Token tokenized as '.$tokenArray['type'].', not T_START_HEREDOC (type)'); | ||
|
||
}//end testMergeConflict() | ||
|
||
|
||
}//end class |