From 9840f032815c0f427ae729a02d038cd5b0d2c919 Mon Sep 17 00:00:00 2001 From: Carlos Bravo Date: Wed, 27 Nov 2024 09:55:53 +0000 Subject: [PATCH] HTML API: Recognize all uppercase tag names in tag processor. Fixes a missing "D" in the character list used by strspn to find tag openers, causing tags starting with D to be skipped by the tag processor in some circumstances. Follow-up to [58613]. Props jonsurrell, santosguillamot, wongjn, cbravobernal. Fixes #62522. git-svn-id: https://develop.svn.wordpress.org/trunk@59464 602fd350-edb4-49c9-b593-d223f7449a82 --- .../html-api/class-wp-html-tag-processor.php | 2 +- .../tests/html-api/wpHtmlTagProcessor.php | 62 +++++++++++++++++++ 2 files changed, 63 insertions(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index e2632c80f6da5..39390621e86a6 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -1668,7 +1668,7 @@ private function parse_next_tag(): bool { * * @see https://html.spec.whatwg.org/#tag-open-state */ - if ( 1 !== strspn( $html, '!/?abcdefghijklmnopqrstuvwxyzABCEFGHIJKLMNOPQRSTUVWXYZ', $at + 1, 1 ) ) { + if ( 1 !== strspn( $html, '!/?abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ', $at + 1, 1 ) ) { ++$at; continue; } diff --git a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php index 393fd2cda06db..cd8faee4ed6a4 100644 --- a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php @@ -2984,4 +2984,66 @@ public function test_doctype_doc_name() { $this->assertNull( $doctype->public_identifier ); $this->assertNull( $doctype->system_identifier ); } + + /** + * @ticket 62522 + * + * @dataProvider data_alphabet_by_characters_lowercase + */ + public function test_recognizes_lowercase_tag_name( string $char ) { + /* + * The spacing in the HTML string is important to the problematic + * codepath in ticket #62522. + */ + $html = " <{$char}> "; + $processor = new WP_HTML_Tag_Processor( $html ); + $this->assertTrue( $processor->next_tag(), "Failed to find open tag in '{$html}'." ); + $this->assertTrue( + $processor->next_tag( array( 'tag_closers' => 'visit' ) ), + "Failed to find close tag in '{$html}'." + ); + } + + /** + * @ticket 62522 + * + * @dataProvider data_alphabet_by_characters_uppercase + */ + public function test_recognizes_uppercase_tag_name( string $char ) { + /* + * The spacing in the HTML string is important to the problematic + * codepath in ticket #62522. + */ + $html = " <{$char}> "; + $processor = new WP_HTML_Tag_Processor( $html ); + $this->assertTrue( $processor->next_tag(), "Failed to find open tag in '{$html}'." ); + $this->assertTrue( + $processor->next_tag( array( 'tag_closers' => 'visit' ) ), + "Failed to find close tag in '{$html}'." + ); + } + + /** + * Data provider. + * + * @return Generator + */ + public static function data_alphabet_by_characters_lowercase() { + $char = 'a'; + while ( $char <= 'z' ) { + yield $char => array( $char ); + $char = chr( ord( $char ) + 1 ); + } + } + + /** + * Data provider. + * + * @return Generator + */ + public static function data_alphabet_by_characters_uppercase() { + foreach ( self::data_alphabet_by_characters_lowercase() as $data ) { + yield strtoupper( $data[0] ) => array( strtoupper( $data[0] ) ); + } + } }