diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index 7aea3d40cda72..818dca6a0f875 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -626,6 +626,94 @@ public function next_tag( $query = null ) { } + /** + * Generator for a foreach loop to step through each class name for the matched tag. + * + * This generator function is designed to be used inside a "foreach" loop. + * + * Example: + * + * $p = new WP_HTML_Tag_Processor( "
" ); + * $p->next_tag(); + * foreach ( $p->class_list() as $class_name ) { + * echo "{$class_name} "; + * } + * // Outputs: "free lang-en " + * + * @since 6.4.0 + */ + public function class_list() { + /** @var string $class contains the string value of the class attribute, with character references decoded. */ + $class = $this->get_attribute( 'class' ); + + if ( ! is_string( $class ) ) { + return; + } + + $seen = array(); + + $at = 0; + while ( $at < strlen( $class ) ) { + // Skip past any initial boundary characters. + $at += strspn( $class, " \t\f\r\n", $at ); + if ( $at >= strlen( $class ) ) { + return; + } + + // Find the byte length until the next boundary. + $length = strcspn( $class, " \t\f\r\n", $at ); + if ( 0 === $length ) { + return; + } + + /* + * CSS class names are case-insensitive in the ASCII range. + * + * @see https://www.w3.org/TR/CSS2/syndata.html#x1 + */ + $name = strtolower( substr( $class, $at, $length ) ); + $at += $length; + + /* + * It's expected that the number of class names for a given tag is relatively small. + * Given this, it is probably faster overall to scan an array for a value rather + * than to use the class name as a key and check if it's a key of $seen. + */ + if ( in_array( $name, $seen, true ) ) { + continue; + } + + $seen[] = $name; + yield $name; + } + } + + + /** + * Returns if a matched tag contains the given ASCII case-insensitive class name. + * + * @since 6.4.0 + * + * @param string $wanted_class Look for this CSS class name, ASCII case-insensitive. + * @return bool|null Whether the matched tag contains the given class name, or null if not matched. + */ + public function has_class( $wanted_class ) { + if ( ! $this->tag_name_starts_at ) { + return null; + } + + $wanted_class = strtolower( $wanted_class ); + + foreach ( $this->class_list() as $class_name ) { + if ( $class_name === $wanted_class ) { + return true; + } + } + + return false; + } + + /** * Sets a bookmark in the HTML document. * @@ -2347,64 +2435,7 @@ private function matches() { } } - $needs_class_name = null !== $this->sought_class_name; - - if ( $needs_class_name && ! isset( $this->attributes['class'] ) ) { - return false; - } - - /* - * Match byte-for-byte (case-sensitive and encoding-form-sensitive) on the class name. - * - * This will overlook certain classes that exist in other lexical variations - * than was supplied to the search query, but requires more complicated searching. - */ - if ( $needs_class_name ) { - $class_start = $this->attributes['class']->value_starts_at; - $class_end = $class_start + $this->attributes['class']->value_length; - $class_at = $class_start; - - /* - * Ensure that boundaries surround the class name to avoid matching on - * substrings of a longer name. For example, the sequence "not-odd" - * should not match for the class "odd" even though "odd" is found - * within the class attribute text. - * - * See https://html.spec.whatwg.org/#attributes-3 - * See https://html.spec.whatwg.org/#space-separated-tokens - */ - while ( - // phpcs:ignore WordPress.CodeAnalysis.AssignmentInCondition.FoundInWhileCondition - false !== ( $class_at = strpos( $this->html, $this->sought_class_name, $class_at ) ) && - $class_at < $class_end - ) { - /* - * Verify this class starts at a boundary. - */ - if ( $class_at > $class_start ) { - $character = $this->html[ $class_at - 1 ]; - - if ( ' ' !== $character && "\t" !== $character && "\f" !== $character && "\r" !== $character && "\n" !== $character ) { - $class_at += strlen( $this->sought_class_name ); - continue; - } - } - - /* - * Verify this class ends at a boundary as well. - */ - if ( $class_at + strlen( $this->sought_class_name ) < $class_end ) { - $character = $this->html[ $class_at + strlen( $this->sought_class_name ) ]; - - if ( ' ' !== $character && "\t" !== $character && "\f" !== $character && "\r" !== $character && "\n" !== $character ) { - $class_at += strlen( $this->sought_class_name ); - continue; - } - } - - return true; - } - + if ( null !== $this->sought_class_name && ! $this->has_class( $this->sought_class_name ) ) { return false; } diff --git a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php index 7111571b1f113..4469f90c4f276 100644 --- a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php @@ -498,6 +498,17 @@ public function test_next_tag_should_return_false_for_a_non_existing_tag() { $this->assertFalse( $p->next_tag( 'p' ), 'Querying a non-existing tag did not return false' ); } + /** + * @ticket 59209 + * + * @covers WP_HTML_Tag_Processor::next_tag + */ + public function test_next_tag_matches_decoded_class_names() { + $p = new WP_HTML_Tag_Processor( '
' ); + + $this->assertTrue( $p->next_tag( array( 'class_name' => '' ) ), 'Failed to find tag with HTML-encoded class name.' ); + } + /** * @ticket 56299 * @ticket 57852 @@ -1957,6 +1968,150 @@ public function data_next_tag_ignores_contents_of_rawtext_tags() { ); } + /** + * @ticket 59209 + * + * @covers WP_HTML_Tag_Processor::class_list + */ + public function test_class_list_empty_when_missing_class() { + $p = new WP_HTML_Tag_Processor( '
' ); + $p->next_tag(); + + $found_classes = false; + foreach ( $p->class_list() as $class ) { + $found_classes = true; + } + + $this->assertFalse( $found_classes, 'Found classes when none exist.' ); + } + + /** + * @ticket 59209 + * + * @covers WP_HTML_Tag_Processor::class_list + */ + public function test_class_list_empty_when_class_is_boolean() { + $p = new WP_HTML_Tag_Processor( '
' ); + $p->next_tag(); + + $found_classes = false; + foreach ( $p->class_list() as $class ) { + $found_classes = true; + } + + $this->assertFalse( $found_classes, 'Found classes when none exist.' ); + } + + /** + * @ticket 59209 + * + * @covers WP_HTML_Tag_Processor::class_list + */ + public function test_class_list_empty_when_class_is_empty() { + $p = new WP_HTML_Tag_Processor( '
' ); + $p->next_tag(); + + $found_classes = false; + foreach ( $p->class_list() as $class ) { + $found_classes = true; + } + + $this->assertFalse( $found_classes, 'Found classes when none exist.' ); + } + + /** + * @ticket 59209 + * + * @covers WP_HTML_Tag_Processor::class_list + */ + public function test_class_list_visits_each_class_in_order() { + $p = new WP_HTML_Tag_Processor( '
' ); + $p->next_tag(); + + $found_classes = array(); + foreach ( $p->class_list() as $class ) { + $found_classes[] = $class; + } + + $this->assertSame( array( 'one', 'two', 'three' ), $found_classes, 'Failed to visit the class names in their original order.' ); + } + + /** + * @ticket 59209 + * + * @covers WP_HTML_Tag_Processor::class_list + */ + public function test_class_list_decodes_class_names() { + $p = new WP_HTML_Tag_Processor( '
' ); + $p->next_tag(); + + $found_classes = array(); + foreach ( $p->class_list() as $class ) { + $found_classes[] = $class; + } + + $this->assertSame( array( '∉-class', '', "\u{ff03}" ), $found_classes, 'Failed to report class names in their decoded form.' ); + } + + /** + * @ticket 59209 + * + * @covers WP_HTML_Tag_Processor::class_list + */ + public function test_class_list_visits_unique_class_names_only_once() { + $p = new WP_HTML_Tag_Processor( '
' ); + $p->next_tag(); + + $found_classes = array(); + foreach ( $p->class_list() as $class ) { + $found_classes[] = $class; + } + + $this->assertSame( array( 'one' ), $found_classes, 'Visited multiple copies of the same class name when it should have skipped the duplicates.' ); + } + + /** + * @ticket 59209 + * + * @covers WP_HTML_Tag_Processor::has_class + * + * @dataProvider data_html_with_variations_of_class_values_and_sought_class_names + * + * @param string $html Contains a tag optionally containing a `class` attribute. + * @param string $sought_class Name of class to find in the input tag's `class`. + * @param bool $has_class Whether the sought class exists in the given HTML. + */ + public function test_has_class_handles_expected_class_name_variations( $html, $sought_class, $has_class ) { + $p = new WP_HTML_Tag_Processor( $html ); + $p->next_tag(); + + if ( $has_class ) { + $this->assertTrue( $p->has_class( $sought_class ), "Failed to find expected class {$sought_class}." ); + } else { + $this->assertFalse( $p->has_class( $sought_class ), "Found class {$sought_class} when it doesn't exist." ); + } + } + + /** + * Data provider. + * + * @return array[] + */ + public function data_html_with_variations_of_class_values_and_sought_class_names() { + return array( + 'Tag without any classes' => array( '
', 'foo', false ), + 'Tag with boolean class' => array( '', 'foo', false ), + 'Tag with empty class' => array( '

', 'foo', false ), + 'Tag with exact match' => array( '