Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tokenizers/Comment: add tests + fix two edge case bugs #494

Merged
merged 4 commits into from
May 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 15 additions & 9 deletions src/Tokenizers/Comment.php
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class Comment
* @param string $eolChar The EOL character to use for splitting strings.
* @param int $stackPtr The position of the first token in the file.
*
* @return array
* @return array<int, array<string, string|int|array<int>>>
*/
public function tokenizeString($string, $eolChar, $stackPtr)
{
Expand All @@ -41,9 +41,16 @@ public function tokenizeString($string, $eolChar, $stackPtr)
extra star when they are used for function and class comments.
*/

$char = ($numChars - strlen(ltrim($string, '/*')));
$openTag = substr($string, 0, $char);
$string = ltrim($string, '/*');
$char = ($numChars - strlen(ltrim($string, '/*')));
$lastChars = substr($string, -2);
if ($char === $numChars && $lastChars === '*/') {
// Edge case: docblock without whitespace or contents.
$openTag = substr($string, 0, -2);
$string = $lastChars;
} else {
$openTag = substr($string, 0, $char);
$string = ltrim($string, '/*');
}

$tokens[$stackPtr] = [
'content' => $openTag,
Expand Down Expand Up @@ -74,6 +81,7 @@ public function tokenizeString($string, $eolChar, $stackPtr)
];

if ($closeTag['content'] === false) {
// In PHP < 8.0 substr() can return `false` instead of always returning a string.
$closeTag['content'] = '';
}

Expand Down Expand Up @@ -171,7 +179,7 @@ public function tokenizeString($string, $eolChar, $stackPtr)
* @param int $start The position in the string to start processing.
* @param int $end The position in the string to end processing.
*
* @return array
* @return array<int, array<string, string|int>>
*/
private function processLine($string, $eolChar, $start, $end)
{
Expand Down Expand Up @@ -246,7 +254,7 @@ private function processLine($string, $eolChar, $start, $end)
* @param int $start The position in the string to start processing.
* @param int $end The position in the string to end processing.
*
* @return array|null
* @return array<string, string|int>|null
*/
private function collectWhitespace($string, $start, $end)
{
Expand All @@ -263,14 +271,12 @@ private function collectWhitespace($string, $start, $end)
return null;
}

$token = [
return [
'content' => $space,
'code' => T_DOC_COMMENT_WHITESPACE,
'type' => 'T_DOC_COMMENT_WHITESPACE',
];

return $token;

}//end collectWhitespace()


Expand Down
2 changes: 1 addition & 1 deletion src/Tokenizers/PHP.php
Original file line number Diff line number Diff line change
Expand Up @@ -786,7 +786,7 @@ protected function tokenize($string)

if ($tokenIsArray === true
&& ($token[0] === T_DOC_COMMENT
|| ($token[0] === T_COMMENT && strpos($token[1], '/**') === 0))
|| ($token[0] === T_COMMENT && strpos($token[1], '/**') === 0 && $token[1] !== '/**/'))
) {
$commentTokens = $commentTokenizer->tokenizeString($token[1], $this->eolChar, $newStackPtr);
foreach ($commentTokens as $commentToken) {
Expand Down
117 changes: 117 additions & 0 deletions tests/Core/Tokenizer/Comment/CommentTestCase.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
<?php
/**
* Base class for testing DocBlock comment tokenization.
*
* @author Juliette Reinders Folmer <[email protected]>
* @copyright 2024 PHPCSStandards and contributors
* @license https://github.com/PHPCSStandards/PHP_CodeSniffer/blob/master/licence.txt BSD Licence
*/

namespace PHP_CodeSniffer\Tests\Core\Tokenizer\Comment;

use PHP_CodeSniffer\Tests\Core\Tokenizer\AbstractTokenizerTestCase;
use PHP_CodeSniffer\Util\Tokens;

/**
* Base class for testing DocBlock comment tokenization.
*
* @covers PHP_CodeSniffer\Tokenizers\Comment
*/
abstract class CommentTestCase extends AbstractTokenizerTestCase
{


/**
* Test whether the docblock opener and closer have the expected extra keys.
*
* @param string $marker The comment prefacing the target token.
* @param int $closerOffset The offset of the closer from the opener.
* @param array<int> $expectedTags The expected tags offsets array.
*
* @dataProvider dataDocblockOpenerCloser
*
* @return void
*/
public function testDocblockOpenerCloser($marker, $closerOffset, $expectedTags)
{
$tokens = $this->phpcsFile->getTokens();
$target = $this->getTargetToken($marker, [T_DOC_COMMENT_OPEN_TAG]);

$opener = $tokens[$target];

$this->assertArrayHasKey('comment_closer', $opener, 'Comment opener: comment_closer index is not set');
$this->assertArrayHasKey('comment_tags', $opener, 'Comment opener: comment_tags index is not set');

$expectedCloser = ($target + $closerOffset);
$this->assertSame($expectedCloser, $opener['comment_closer'], 'Comment opener: comment_closer not set to the expected stack pointer');

// Update the tags expectations.
foreach ($expectedTags as $i => $ptr) {
$expectedTags[$i] += $target;
}

$this->assertSame($expectedTags, $opener['comment_tags'], 'Comment opener: recorded tags do not match expected tags');

$closer = $tokens[$opener['comment_closer']];

$this->assertArrayHasKey('comment_opener', $closer, 'Comment closer: comment_opener index is not set');
$this->assertSame($target, $closer['comment_opener'], 'Comment closer: comment_opener not set to the expected stack pointer');

}//end testDocblockOpenerCloser()


/**
* Data provider.
*
* @see testDocblockOpenerCloser()
*
* @return array<string, array<string, string|int|array<int>>>
*/
abstract public static function dataDocblockOpenerCloser();


/**
* Test helper. Check a token sequence complies with an expected token sequence.
*
* @param int $startPtr The position in the file to start checking from.
* @param array<array<int|string, string>> $expectedSequence The consecutive token constants and their contents to expect.
*
* @return void
*/
protected function checkTokenSequence($startPtr, array $expectedSequence)
{
$tokens = $this->phpcsFile->getTokens();

$sequenceKey = 0;
$sequenceCount = count($expectedSequence);

for ($i = $startPtr; $sequenceKey < $sequenceCount; $i++, $sequenceKey++) {
$currentItem = $expectedSequence[$sequenceKey];
$expectedCode = key($currentItem);
$expectedType = Tokens::tokenName($expectedCode);
$expectedContent = current($currentItem);
$errorMsgSuffix = PHP_EOL.'(StackPtr: '.$i.' | Position in sequence: '.$sequenceKey.' | Expected: '.$expectedType.')';

$this->assertSame(
$expectedCode,
$tokens[$i]['code'],
'Token tokenized as '.Tokens::tokenName($tokens[$i]['code']).', not '.$expectedType.' (code)'.$errorMsgSuffix
);

$this->assertSame(
$expectedType,
$tokens[$i]['type'],
'Token tokenized as '.$tokens[$i]['type'].', not '.$expectedType.' (type)'.$errorMsgSuffix
);

$this->assertSame(
$expectedContent,
$tokens[$i]['content'],
'Token content did not match expectations'.$errorMsgSuffix
);
}//end for

}//end checkTokenSequence()


}//end class
6 changes: 6 additions & 0 deletions tests/Core/Tokenizer/Comment/LiveCoding1Test.inc
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<?php

/* testLiveCoding */
/**
* Unclosed docblock, live coding.... with no blank line at end of file.
*
69 changes: 69 additions & 0 deletions tests/Core/Tokenizer/Comment/LiveCoding1Test.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
<?php
/**
* Tests that unclosed docblocks during live coding are handled correctly.
*
* @author Juliette Reinders Folmer <[email protected]>
* @copyright 2024 PHPCSStandards and contributors
* @license https://github.com/PHPCSStandards/PHP_CodeSniffer/blob/master/licence.txt BSD Licence
*/

namespace PHP_CodeSniffer\Tests\Core\Tokenizer\Comment;

/**
* Tests that unclosed docblocks during live coding are handled correctly.
*
* @covers PHP_CodeSniffer\Tokenizers\Comment
*/
final class LiveCoding1Test extends CommentTestCase
{


/**
* Data provider.
*
* @see testDocblockOpenerCloser()
*
* @return array<string, array<string, string|int|array<int>>>
*/
public static function dataDocblockOpenerCloser()
{
return [
'live coding: unclosed docblock, no blank line at end of file' => [
'marker' => '/* testLiveCoding */',
'closerOffset' => 8,
'expectedTags' => [],
],
];

}//end dataDocblockOpenerCloser()


/**
* Verify tokenization of the DocBlock.
*
* @phpcs:disable Squiz.Arrays.ArrayDeclaration.SpaceBeforeDoubleArrow -- Readability is better with alignment.
*
* @return void
*/
public function testLiveCoding()
{
$expectedSequence = [
[T_DOC_COMMENT_OPEN_TAG => '/**'],
[T_DOC_COMMENT_WHITESPACE => "\n"],
[T_DOC_COMMENT_WHITESPACE => ' '],
[T_DOC_COMMENT_STAR => '*'],
[T_DOC_COMMENT_WHITESPACE => ' '],
[T_DOC_COMMENT_STRING => 'Unclosed docblock, live coding.... with no blank line at end of file.'],
[T_DOC_COMMENT_WHITESPACE => "\n"],
[T_DOC_COMMENT_WHITESPACE => ' '],
[T_DOC_COMMENT_CLOSE_TAG => '*'],
];

$target = $this->getTargetToken('/* '.__FUNCTION__.' */', T_DOC_COMMENT_OPEN_TAG);

$this->checkTokenSequence($target, $expectedSequence);

}//end testLiveCoding()


}//end class
5 changes: 5 additions & 0 deletions tests/Core/Tokenizer/Comment/LiveCoding2Test.inc
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<?php

/* testLiveCoding */
/**
* Unclosed docblock, live coding.... with a blank line at end of file.
68 changes: 68 additions & 0 deletions tests/Core/Tokenizer/Comment/LiveCoding2Test.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
<?php
/**
* Tests that unclosed docblocks during live coding are handled correctly.
*
* @author Juliette Reinders Folmer <[email protected]>
* @copyright 2024 PHPCSStandards and contributors
* @license https://github.com/PHPCSStandards/PHP_CodeSniffer/blob/master/licence.txt BSD Licence
*/

namespace PHP_CodeSniffer\Tests\Core\Tokenizer\Comment;

/**
* Tests that unclosed docblocks during live coding are handled correctly.
*
* @covers PHP_CodeSniffer\Tokenizers\Comment
*/
final class LiveCoding2Test extends CommentTestCase
{


/**
* Data provider.
*
* @see testDocblockOpenerCloser()
*
* @return array<string, array<string, string|int|array<int>>>
*/
public static function dataDocblockOpenerCloser()
{
return [
'live coding: unclosed docblock with blank line at end of file' => [
'marker' => '/* testLiveCoding */',
'closerOffset' => 7,
'expectedTags' => [],
],
];

}//end dataDocblockOpenerCloser()


/**
* Verify tokenization of the DocBlock.
*
* @phpcs:disable Squiz.Arrays.ArrayDeclaration.SpaceBeforeDoubleArrow -- Readability is better with alignment.
*
* @return void
*/
public function testLiveCoding()
{
$expectedSequence = [
[T_DOC_COMMENT_OPEN_TAG => '/**'],
[T_DOC_COMMENT_WHITESPACE => "\n"],
[T_DOC_COMMENT_WHITESPACE => ' '],
[T_DOC_COMMENT_STAR => '*'],
[T_DOC_COMMENT_WHITESPACE => ' '],
[T_DOC_COMMENT_STRING => 'Unclosed docblock, live coding.... with a blank line at end of file.'],
[T_DOC_COMMENT_WHITESPACE => "\n"],
[T_DOC_COMMENT_CLOSE_TAG => ''],
];

$target = $this->getTargetToken('/* '.__FUNCTION__.' */', T_DOC_COMMENT_OPEN_TAG);

$this->checkTokenSequence($target, $expectedSequence);

}//end testLiveCoding()


}//end class
4 changes: 4 additions & 0 deletions tests/Core/Tokenizer/Comment/LiveCoding3Test.inc
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
<?php

/* testLiveCoding */
/**
Loading