From 0e8c4fb381ef4d41ceb948fc408f4b52ab5f545d Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 19 Nov 2024 16:30:04 +0100 Subject: [PATCH 001/129] WIP class skeleton --- .../html-api/class-wp-css-selector.php | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 src/wp-includes/html-api/class-wp-css-selector.php diff --git a/src/wp-includes/html-api/class-wp-css-selector.php b/src/wp-includes/html-api/class-wp-css-selector.php new file mode 100644 index 0000000000000..7ec6b5a69ced2 --- /dev/null +++ b/src/wp-includes/html-api/class-wp-css-selector.php @@ -0,0 +1,31 @@ + Date: Wed, 20 Nov 2024 16:57:19 +0100 Subject: [PATCH 002/129] Document class --- .../html-api/class-wp-css-selector.php | 29 ++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-css-selector.php b/src/wp-includes/html-api/class-wp-css-selector.php index 7ec6b5a69ced2..1684aefef2024 100644 --- a/src/wp-includes/html-api/class-wp-css-selector.php +++ b/src/wp-includes/html-api/class-wp-css-selector.php @@ -12,20 +12,47 @@ * * This class is designed for internal use by the HTML processor. * + * This class is instantiated via the `WP_CSS_Selector::from_selector( string $selector )` method. + * It accepts a CSS selector string and returns an instance of itself or `null` if the selector + * is invalid or unsupported. + * + * A subset of the CSS selector grammar is supported. The grammar is defined in the CSS Syntax + * specification, which is available at https://www.w3.org/TR/css-syntax-3/. + * + * Supported selector syntax: + * - Type selectors (tag names, e.g. `div`) + * - Class selectors (e.g. `.class-name`) + * - ID selectors (e.g. `#unique-id`) + * - Attribute selectors (e.g. `[attribute-name]` or `[attribute-name="value"]`) + * - The following combinators: + * - descendant (e.g. `.parent .descendant`) + * - child (`.parent > .child`) + * - Comma-separated selector lists (e.g. `.selector-1, .selector-2`) + * + * Unsupported selector syntax: + * - The following combinators: + * - Next sibling (`.sibling + .sibling`) + * - Subsequent sibling (`.sibling ~ .sibling`) + * - Pseudo-element selectors (e.g. `::before`) + * - Pseudo-class selectors (e.g. `:hover` or `:nth-child(2)`) + * * @since TBD * * @access private * + * @see https://www.w3.org/TR/css-syntax-3/#consume-a-token * @see https://www.w3.org/tr/selectors/#parse-selector + * */ class WP_CSS_Selector { private function __construct() {} /** - * @return static + * @return static|null */ public static function from_selector( string $selector ) { $res = new static(); return $res; } + } From 40222d30200afdf998586cb127c35c880bfe7df8 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 21 Nov 2024 11:57:28 +0100 Subject: [PATCH 003/129] Do not support namespaced selectors --- src/wp-includes/html-api/class-wp-css-selector.php | 1 + 1 file changed, 1 insertion(+) diff --git a/src/wp-includes/html-api/class-wp-css-selector.php b/src/wp-includes/html-api/class-wp-css-selector.php index 1684aefef2024..fb8934bec06f4 100644 --- a/src/wp-includes/html-api/class-wp-css-selector.php +++ b/src/wp-includes/html-api/class-wp-css-selector.php @@ -35,6 +35,7 @@ * - Subsequent sibling (`.sibling ~ .sibling`) * - Pseudo-element selectors (e.g. `::before`) * - Pseudo-class selectors (e.g. `:hover` or `:nth-child(2)`) + * - Namespace prefixes that need to be resolved (e.g. `svg|title` or `[xlink|href]`) * * @since TBD * From 60926421295e58229637891c853ab50f0920ae23 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 22 Nov 2024 16:04:42 +0100 Subject: [PATCH 004/129] Flesh out stuff --- .../html-api/class-wp-css-selector.php | 59 ----- .../html-api/class-wp-css-selectors.php | 248 ++++++++++++++++++ 2 files changed, 248 insertions(+), 59 deletions(-) delete mode 100644 src/wp-includes/html-api/class-wp-css-selector.php create mode 100644 src/wp-includes/html-api/class-wp-css-selectors.php diff --git a/src/wp-includes/html-api/class-wp-css-selector.php b/src/wp-includes/html-api/class-wp-css-selector.php deleted file mode 100644 index fb8934bec06f4..0000000000000 --- a/src/wp-includes/html-api/class-wp-css-selector.php +++ /dev/null @@ -1,59 +0,0 @@ - .child`) - * - Comma-separated selector lists (e.g. `.selector-1, .selector-2`) - * - * Unsupported selector syntax: - * - The following combinators: - * - Next sibling (`.sibling + .sibling`) - * - Subsequent sibling (`.sibling ~ .sibling`) - * - Pseudo-element selectors (e.g. `::before`) - * - Pseudo-class selectors (e.g. `:hover` or `:nth-child(2)`) - * - Namespace prefixes that need to be resolved (e.g. `svg|title` or `[xlink|href]`) - * - * @since TBD - * - * @access private - * - * @see https://www.w3.org/TR/css-syntax-3/#consume-a-token - * @see https://www.w3.org/tr/selectors/#parse-selector - * - */ -class WP_CSS_Selector { - private function __construct() {} - - /** - * @return static|null - */ - public static function from_selector( string $selector ) { - $res = new static(); - return $res; - } - -} diff --git a/src/wp-includes/html-api/class-wp-css-selectors.php b/src/wp-includes/html-api/class-wp-css-selectors.php new file mode 100644 index 0000000000000..acc5db02752c3 --- /dev/null +++ b/src/wp-includes/html-api/class-wp-css-selectors.php @@ -0,0 +1,248 @@ + .child`) + * + * Unsupported selector syntax: + * - Pseudo-element selectors (e.g. `::before`) + * - Pseudo-class selectors (e.g. `:hover` or `:nth-child(2)`) + * - Namespace prefixes (e.g. `svg|title` or `[xlink|href]`) + * - The following combinators: + * - Next sibling (`.sibling + .sibling`) + * - Subsequent sibling (`.sibling ~ .sibling`) + * + * @since TBD + * + * @access private + * + * @see https://www.w3.org/TR/css-syntax-3/#consume-a-token + * @see https://www.w3.org/tr/selectors/#parse-selector + * @see https://www.w3.org/TR/selectors-api2/ + * @see https://www.w3.org/TR/selectors-4/ + * + */ +class WP_CSS_Selectors { + + /** + * Takes a CSS selectors string and returns an instance of itself or `null` if the selector + * is invalid or unsupported. + * + * @since TBD + * + * @param string $selectors CSS selectors string. + * @return static|null + */ + public static function from_selectors( string $selectors ) { + $res = new static(); + return $res; + } + + /** + * Returns a list of selectors. + * + * @since TBD + * + * @return WP_CSS_Selector[] + */ + private static function parse( string $input ) { + // > A selector string is a list of one or more complex selectors ([SELECTORS4], section 3.1) that may be surrounded by whitespace and matches the dom_selectors_group production. + $input = trim( $input, " \t\r\n\r" ); + + if ( '' === $input ) { + null; + } + + /* + * > The input stream consists of the filtered code points pushed into it as the input byte stream is decoded. + * > + * > To filter code points from a stream of (unfiltered) code points input: + * > Replace any U+000D CARRIAGE RETURN (CR) code points, U+000C FORM FEED (FF) code points, or pairs of U+000D CARRIAGE RETURN (CR) followed by U+000A LINE FEED (LF) in input by a single U+000A LINE FEED (LF) code point. + * > Replace any U+0000 NULL or surrogate code points in input with U+FFFD REPLACEMENT CHARACTER (�). + * + * https://www.w3.org/TR/css-syntax-3/#input-preprocessing + */ + $input = str_replace( array( "\r\n" ), "\n", $input ); + $input = str_replace( array( "\r", "\f" ), "\n", $input ); + $input = str_replace( "\0", "\u{FFFD}", $input ); + + $at = 0; + $length = strlen( $input ); + $selectors = array(); + + $at = strspn( $input, "\n\t ", $at ); + while ( $at < $length ) { + } + } +} + +interface IWP_CSS_Selector_Parser { + public static function parse( string $input, string $offset, ?int $consumed_bytes = null ): ?self; +} + +abstract class WP_CSS_Selector_Parser implements IWP_CSS_Selector_Parser { + public static function parse_whitespace( string $input, string &$offset ): bool { + $length = strspn( $input, " \t\r\n\f", $offset ); + $advanced = $length > 0; + $offset += $length; + return $advanced; + } + + /* + * Utiltities + * ========== + * + * The following functions do not consume any input. + */ + + /** + * > 4.3.8. Check if two code points are a valid escape + * > This section describes how to check if two code points are a valid escape. The algorithm described here can be called explicitly with two code points, or can be called with the input stream itself. In the latter case, the two code points in question are the current input code point and the next input code point, in that order. + * > + * > Note: This algorithm will not consume any additional code point. + * > + * > If the first code point is not U+005C REVERSE SOLIDUS (\), return false. + * > + * > Otherwise, if the second code point is a newline, return false. + * > + * > Otherwise, return true. + * + * https://www.w3.org/TR/css-syntax-3/#starts-with-a-valid-escape + * + * @todo this does not check whether the second codepoint is valid. + */ + public static function next_two_are_valid_escape( string $input, string $offset ): bool { + if ( $offset + 1 >= strlen( $input ) ) { + return false; + } + return '\\' === $input[ $offset ] && "\n" !== $input[ $offset + 1 ]; + } + + /** + * > ident-start code point + * > A letter, a non-ASCII code point, or U+005F LOW LINE (_). + * > uppercase letter + * > A code point between U+0041 LATIN CAPITAL LETTER A (A) and U+005A LATIN CAPITAL LETTER Z (Z) inclusive. + * > lowercase letter + * > A code point between U+0061 LATIN SMALL LETTER A (a) and U+007A LATIN SMALL LETTER Z (z) inclusive. + * > letter + * > An uppercase letter or a lowercase letter. + * > non-ASCII code point + * > A code point with a value equal to or greater than U+0080 . + */ + public static function is_ident_start_codepoint( string $input, string $offset ): bool { + if ( $offset >= strlen( $input ) ) { + return false; + } + + return ( + '_' === $input[ $offset ] || + ( 'a' <= $input[ $offset ] && $input[ $offset ] <= 'z' ) || + ( 'A' <= $input[ $offset ] && $input[ $offset ] <= 'Z' ) || + $input[ $offset ] <= '\x7F' + ); + } + + /** + * > ident code point + * > An ident-start code point, a digit, or U+002D HYPHEN-MINUS (-). + * > digit + * > A code point between U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9) inclusive. + */ + public static function is_ident_codepoint( string $input, string $offset ): bool { + return '-' === $input[ $offset ] || + ( '0' <= $input[ $offset ] && $input[ $offset ] <= '9' ) || + self::is_ident_start_codepoint( $input, $offset ); + } + + /** + * > 4.3.9. Check if three code points would start an ident sequence + * > This section describes how to check if three code points would start an ident sequence. The algorithm described here can be called explicitly with three code points, or can be called with the input stream itself. In the latter case, the three code points in question are the current input code point and the next two input code points, in that order. + * > + * > Note: This algorithm will not consume any additional code points. + * > + * > Look at the first code point: + * > + * > U+002D HYPHEN-MINUS + * > If the second code point is an ident-start code point or a U+002D HYPHEN-MINUS, or the second and third code points are a valid escape, return true. Otherwise, return false. + * > ident-start code point + * > Return true. + * > U+005C REVERSE SOLIDUS (\) + * > If the first and second code points are a valid escape, return true. Otherwise, return false. + * > anything else + * > Return false. + * + * https://www.w3.org/TR/css-syntax-3/#would-start-an-identifier + */ + public static function check_if_three_code_points_would_start_an_ident_sequence( string $input, string $offset ): bool { + if ( $offset >= strlen( $input ) ) { + return false; + } + + // > U+005C REVERSE SOLIDUS (\) + if ( '\\' === $input[ $offset ] ) { + return self::next_two_are_valid_escape( $input, $offset ); + } + + // > U+002D HYPHEN-MINUS + if ( '-' === $input[ $offset ] ) { + $after_initial_hyphen_minus_offset = $offset + 1; + if ( $offset >= strlen( $input ) ) { + return false; + } + + // > If the second code point is… U+002D HYPHEN-MINUS… return true + if ( '-' === $input[ $after_initial_hyphen_minus_offset ] ) { + return true; + } + + // > If the second and third code points are a valid escape, return true. + if ( self::next_two_are_valid_escape( $input, $after_initial_hyphen_minus_offset ) ) { + return true; + } + + // > If the second code point is an ident-start code point… return true. + if ( self::is_ident_start_codepoint( $input, $after_initial_hyphen_minus_offset ) ) { + return true; + } + + // > Otherwise, return false. + return false; + } + + // > ident-start code point + // > Return true. + // > anything else + // > Return false. + return self::is_ident_start_codepoint( $input, $offset ); + } +} From 3e3b2b200696d9e5f51c29f86f8ec48a20df1bf4 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 22 Nov 2024 17:06:20 +0100 Subject: [PATCH 005/129] Starting to actually parse --- .../html-api/class-wp-css-selectors.php | 213 ++++++++++++++++-- 1 file changed, 199 insertions(+), 14 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-css-selectors.php b/src/wp-includes/html-api/class-wp-css-selectors.php index acc5db02752c3..53417a0f1967c 100644 --- a/src/wp-includes/html-api/class-wp-css-selectors.php +++ b/src/wp-includes/html-api/class-wp-css-selectors.php @@ -52,6 +52,11 @@ * */ class WP_CSS_Selectors { + private $selectors; + + private function __construct( array $selectors ) { + $this->selectors = $selectors; + } /** * Takes a CSS selectors string and returns an instance of itself or `null` if the selector @@ -60,11 +65,10 @@ class WP_CSS_Selectors { * @since TBD * * @param string $selectors CSS selectors string. - * @return static|null + * @return self|null */ - public static function from_selectors( string $selectors ) { - $res = new static(); - return $res; + public static function from_selectors( string $selectors ): ?self { + return self::parse( $selectors ); } /** @@ -72,7 +76,7 @@ public static function from_selectors( string $selectors ) { * * @since TBD * - * @return WP_CSS_Selector[] + * @return WP_CSS_Selectors|null */ private static function parse( string $input ) { // > A selector string is a list of one or more complex selectors ([SELECTORS4], section 3.1) that may be surrounded by whitespace and matches the dom_selectors_group production. @@ -95,28 +99,209 @@ private static function parse( string $input ) { $input = str_replace( array( "\r", "\f" ), "\n", $input ); $input = str_replace( "\0", "\u{FFFD}", $input ); - $at = 0; $length = strlen( $input ); $selectors = array(); - $at = strspn( $input, "\n\t ", $at ); - while ( $at < $length ) { + $offset = 0; + + while ( $offset < $length ) { + $sel = WP_CSS_ID_Selector::parse( $input, $offset ); + if ( $sel ) { + $selectors[] = $sel; + } + } + if ( count( $selectors ) ) { + return new WP_CSS_Selectors( $selectors ); + } + return null; + } +} + +final class WP_CSS_ID_Selector extends WP_CSS_Selector_Parser { + /** @var string */ + public $ident; + + private function __construct( string $ident ) { + $this->ident = $ident; + } + + public static function parse( string $input, string &$offset ): ?self { + $ident = self::parse_hash_token( $input, $offset ); + if ( null === $ident ) { + return null; } + return new self( $ident ); } } interface IWP_CSS_Selector_Parser { - public static function parse( string $input, string $offset, ?int $consumed_bytes = null ): ?self; + /** + * @return static|null + */ + public static function parse( string $input, string &$offset ); } abstract class WP_CSS_Selector_Parser implements IWP_CSS_Selector_Parser { - public static function parse_whitespace( string $input, string &$offset ): bool { + const UTF8_MAX_CODEPOINT_VALUE = 0x10FFFF; + + protected static function parse_whitespace( string $input, string &$offset ): bool { $length = strspn( $input, " \t\r\n\f", $offset ); $advanced = $length > 0; $offset += $length; return $advanced; } + /** + * Tokenization of hash tokens + * + * > U+0023 NUMBER SIGN (#) + * > If the next input code point is an ident code point or the next two input code points are a valid escape, then: + * > 1. Create a . + * > 2. If the next 3 input code points would start an ident sequence, set the + * > ’s type flag to "id". + * > 3. Consume an ident sequence, and set the ’s value to the + * > returned string. + * > 4. Return the . + * > Otherwise, return a with its value set to the current input code point. + * + * This implementation is not interested in the , a '#' delim token is not relevant for selectors. + */ + protected static function parse_hash_token( string $input, string &$offset ): ?string { + if ( $offset + 1 >= strlen( $input ) || '#' !== $input[ $offset ] ) { + return null; + } + + $offset_after_hash = $offset + 1; + if ( self::check_if_three_code_points_would_start_an_ident_sequence( $input, $offset_after_hash ) ) { + $offset = $offset_after_hash; + return self::parse_ident( $input, $offset ); + } + return null; + } + + /** + * Parse an ident token + * + * CAUTION: This method is _not_ for parsing and ID selector! + * + * > 4.3.11. Consume an ident sequence + * > This section describes how to consume an ident sequence from a stream of code points. It returns a string containing the largest name that can be formed from adjacent code points in the stream, starting from the first. + * > + * > Note: This algorithm does not do the verification of the first few code points that are necessary to ensure the returned code points would constitute an . If that is the intended use, ensure that the stream starts with an ident sequence before calling this algorithm. + * > + * > Let result initially be an empty string. + * > + * > Repeatedly consume the next input code point from the stream: + * > + * > ident code point + * > Append the code point to result. + * > the stream starts with a valid escape + * > Consume an escaped code point. Append the returned code point to result. + * > anything else + * > Reconsume the current input code point. Return result. + * + * https://www.w3.org/TR/css-syntax-3/#consume-name + */ + protected static function parse_ident( string $input, string &$offset ): ?string { + if ( ! self::check_if_three_code_points_would_start_an_ident_sequence( $input, $offset ) ) { + return null; + } + + $ident = ''; + + while ( $offset < strlen( $input ) ) { + if ( self::next_two_are_valid_escape( $input, $offset ) ) { + $ident .= self::consume_escaped_codepoint( $input, $offset ); + continue; + } elseif ( self::is_ident_codepoint( $input, $offset ) ) { + // @todo this should append and advance the correct number of bytes. + $ident .= $input[ $offset ]; + $offset += 1; + continue; + } + break; + } + + return $ident; + } + + /** + * Consume an escaped code point. + * + * > 4.3.7. Consume an escaped code point + * > This section describes how to consume an escaped code point. It assumes that the U+005C + * > REVERSE SOLIDUS (\) has already been consumed and that the next input code point has + * > already been verified to be part of a valid escape. It will return a code point. + * > + * > Consume the next input code point. + * > + * > hex digit + * > Consume as many hex digits as possible, but no more than 5. Note that this means 1-6 + * > hex digits have been consumed in total. If the next input code point is whitespace, + * > consume it as well. Interpret the hex digits as a hexadecimal number. If this number is + * > zero, or is for a surrogate, or is greater than the maximum allowed code point, return + * > U+FFFD REPLACEMENT CHARACTER (�). Otherwise, return the code point with that value. + * > EOF + * > This is a parse error. Return U+FFFD REPLACEMENT CHARACTER (�). + * > anything else + * > Return the current input code point. + */ + protected static function consume_escaped_codepoint( $input, &$offset ): ?string { + $char = $input[ $offset ]; + if ( + ( '0' <= $char && $char <= '9' ) || + ( 'a' <= $char && $char <= 'f' ) || + ( 'A' <= $char && $char <= 'F' ) + ) { + $hex_end_offset = $offset + 1; + while ( + strlen( $input ) > $hex_end_offset && + $hex_end_offset - $offset < 6 && + ( + ( '0' <= $char && $char <= '9' ) || + ( 'a' <= $char && $char <= 'f' ) || + ( 'A' <= $char && $char <= 'F' ) + ) + ) { + $hex_end_offset += 1; + } + + $codepoint_value = hexdec( substr( $input, $offset, $hex_end_offset - $offset ) ); + + // > A surrogate is a leading surrogate or a trailing surrogate. + // > A leading surrogate is a code point that is in the range U+D800 to U+DBFF, inclusive. + // > A trailing surrogate is a code point that is in the range U+DC00 to U+DFFF, inclusive. + // The surrogate ranges are adjacent, so the complete range is 0xD800..=0xDFFF, + // inclusive. + $codepoint_char = ( + 0 === $codepoint_value || + $codepoint_value > self::UTF8_MAX_CODEPOINT_VALUE || + ( 0xD800 <= $codepoint_value || $codepoint_value <= 0xDFFF ) + ) ? + "\u{FFFD}" : + mb_chr( $codepoint_value, 'UTF-8' ); + + $offset = $hex_end_offset; + + // If the next input code point is whitespace, consume it as well. + if ( + strlen( $input ) > $offset && + ( + "\n" === $input[ $offset ] || + "\t" === $input[ $offset ] || + ' ' === $input[ $offset ] + ) + ) { + ++$offset; + } + return $codepoint_char; + } + + $codepoint_char = mb_substr( $input, $offset, 1, 'UTF-8' ); + $offset += strlen( $codepoint_char ); + return $codepoint_char; + } + /* * Utiltities * ========== @@ -140,7 +325,7 @@ public static function parse_whitespace( string $input, string &$offset ): bool * * @todo this does not check whether the second codepoint is valid. */ - public static function next_two_are_valid_escape( string $input, string $offset ): bool { + protected static function next_two_are_valid_escape( string $input, string $offset ): bool { if ( $offset + 1 >= strlen( $input ) ) { return false; } @@ -159,7 +344,7 @@ public static function next_two_are_valid_escape( string $input, string $offset * > non-ASCII code point * > A code point with a value equal to or greater than U+0080 . */ - public static function is_ident_start_codepoint( string $input, string $offset ): bool { + protected static function is_ident_start_codepoint( string $input, string $offset ): bool { if ( $offset >= strlen( $input ) ) { return false; } @@ -178,7 +363,7 @@ public static function is_ident_start_codepoint( string $input, string $offset ) * > digit * > A code point between U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9) inclusive. */ - public static function is_ident_codepoint( string $input, string $offset ): bool { + protected static function is_ident_codepoint( string $input, string $offset ): bool { return '-' === $input[ $offset ] || ( '0' <= $input[ $offset ] && $input[ $offset ] <= '9' ) || self::is_ident_start_codepoint( $input, $offset ); @@ -203,7 +388,7 @@ public static function is_ident_codepoint( string $input, string $offset ): bool * * https://www.w3.org/TR/css-syntax-3/#would-start-an-identifier */ - public static function check_if_three_code_points_would_start_an_ident_sequence( string $input, string $offset ): bool { + protected static function check_if_three_code_points_would_start_an_ident_sequence( string $input, string $offset ): bool { if ( $offset >= strlen( $input ) ) { return false; } From 967557fb01f0e016d63fa2b391d351aec90090bc Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 22 Nov 2024 17:41:16 +0100 Subject: [PATCH 006/129] Add ident tests --- .../phpunit/tests/html-api/wpCssSelectors.php | 50 +++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 tests/phpunit/tests/html-api/wpCssSelectors.php diff --git a/tests/phpunit/tests/html-api/wpCssSelectors.php b/tests/phpunit/tests/html-api/wpCssSelectors.php new file mode 100644 index 0000000000000..2857603360e79 --- /dev/null +++ b/tests/phpunit/tests/html-api/wpCssSelectors.php @@ -0,0 +1,50 @@ +assertSame( $ident, $result ); + $this->assertSame( substr( $input, $offset ), $rest ); + } +} From 2ec1db32af13f1248935ee5e8bb2d634430afc31 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 22 Nov 2024 17:41:42 +0100 Subject: [PATCH 007/129] Fix ident non-ascii bug --- src/wp-includes/html-api/class-wp-css-selectors.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-css-selectors.php b/src/wp-includes/html-api/class-wp-css-selectors.php index 53417a0f1967c..547a51293bb11 100644 --- a/src/wp-includes/html-api/class-wp-css-selectors.php +++ b/src/wp-includes/html-api/class-wp-css-selectors.php @@ -353,7 +353,7 @@ protected static function is_ident_start_codepoint( string $input, string $offse '_' === $input[ $offset ] || ( 'a' <= $input[ $offset ] && $input[ $offset ] <= 'z' ) || ( 'A' <= $input[ $offset ] && $input[ $offset ] <= 'Z' ) || - $input[ $offset ] <= '\x7F' + $input[ $offset ] > '\x7F' ); } From ee2c7cefa987ef4cb208447aad489a700ab7f91f Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 22 Nov 2024 17:42:12 +0100 Subject: [PATCH 008/129] Use class after defined --- .../html-api/class-wp-css-selectors.php | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-css-selectors.php b/src/wp-includes/html-api/class-wp-css-selectors.php index 547a51293bb11..55396c8851294 100644 --- a/src/wp-includes/html-api/class-wp-css-selectors.php +++ b/src/wp-includes/html-api/class-wp-css-selectors.php @@ -117,23 +117,6 @@ private static function parse( string $input ) { } } -final class WP_CSS_ID_Selector extends WP_CSS_Selector_Parser { - /** @var string */ - public $ident; - - private function __construct( string $ident ) { - $this->ident = $ident; - } - - public static function parse( string $input, string &$offset ): ?self { - $ident = self::parse_hash_token( $input, $offset ); - if ( null === $ident ) { - return null; - } - return new self( $ident ); - } -} - interface IWP_CSS_Selector_Parser { /** * @return static|null @@ -431,3 +414,20 @@ protected static function check_if_three_code_points_would_start_an_ident_sequen return self::is_ident_start_codepoint( $input, $offset ); } } + +final class WP_CSS_ID_Selector extends WP_CSS_Selector_Parser { + /** @var string */ + public $ident; + + private function __construct( string $ident ) { + $this->ident = $ident; + } + + public static function parse( string $input, string &$offset ): ?self { + $ident = self::parse_hash_token( $input, $offset ); + if ( null === $ident ) { + return null; + } + return new self( $ident ); + } +} From 0f708ba4892a50249d0c2267640acf2a256beb21 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 22 Nov 2024 18:01:07 +0100 Subject: [PATCH 009/129] Fix some char stuff --- .../html-api/class-wp-css-selectors.php | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-css-selectors.php b/src/wp-includes/html-api/class-wp-css-selectors.php index 55396c8851294..408d25395febb 100644 --- a/src/wp-includes/html-api/class-wp-css-selectors.php +++ b/src/wp-includes/html-api/class-wp-css-selectors.php @@ -194,6 +194,8 @@ protected static function parse_ident( string $input, string &$offset ): ?string while ( $offset < strlen( $input ) ) { if ( self::next_two_are_valid_escape( $input, $offset ) ) { + // Move past the `\` character. + ++$offset; $ident .= self::consume_escaped_codepoint( $input, $offset ); continue; } elseif ( self::is_ident_codepoint( $input, $offset ) ) { @@ -230,20 +232,19 @@ protected static function parse_ident( string $input, string &$offset ): ?string * > Return the current input code point. */ protected static function consume_escaped_codepoint( $input, &$offset ): ?string { - $char = $input[ $offset ]; if ( - ( '0' <= $char && $char <= '9' ) || - ( 'a' <= $char && $char <= 'f' ) || - ( 'A' <= $char && $char <= 'F' ) + ( '0' <= $input[ $offset ] && $input[ $offset ] <= '9' ) || + ( 'a' <= $input[ $offset ] && $input[ $offset ] <= 'f' ) || + ( 'A' <= $input[ $offset ] && $input[ $offset ] <= 'F' ) ) { $hex_end_offset = $offset + 1; while ( strlen( $input ) > $hex_end_offset && $hex_end_offset - $offset < 6 && ( - ( '0' <= $char && $char <= '9' ) || - ( 'a' <= $char && $char <= 'f' ) || - ( 'A' <= $char && $char <= 'F' ) + ( '0' <= $input[ $hex_end_offset ] && $input[ $hex_end_offset ] <= '9' ) || + ( 'a' <= $input[ $hex_end_offset ] && $input[ $hex_end_offset ] <= 'f' ) || + ( 'A' <= $input[ $hex_end_offset ] && $input[ $hex_end_offset ] <= 'F' ) ) ) { $hex_end_offset += 1; @@ -259,7 +260,7 @@ protected static function consume_escaped_codepoint( $input, &$offset ): ?string $codepoint_char = ( 0 === $codepoint_value || $codepoint_value > self::UTF8_MAX_CODEPOINT_VALUE || - ( 0xD800 <= $codepoint_value || $codepoint_value <= 0xDFFF ) + ( 0xD800 <= $codepoint_value && $codepoint_value <= 0xDFFF ) ) ? "\u{FFFD}" : mb_chr( $codepoint_value, 'UTF-8' ); From 3cb455d41f7923d4b4be9fec3b7cf3f72686dfdc Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 22 Nov 2024 18:01:17 +0100 Subject: [PATCH 010/129] Improve tests --- .../phpunit/tests/html-api/wpCssSelectors.php | 29 ++++++++++--------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/tests/phpunit/tests/html-api/wpCssSelectors.php b/tests/phpunit/tests/html-api/wpCssSelectors.php index 2857603360e79..a55463ec7122e 100644 --- a/tests/phpunit/tests/html-api/wpCssSelectors.php +++ b/tests/phpunit/tests/html-api/wpCssSelectors.php @@ -15,19 +15,20 @@ class Tests_HtmlApi_WpCssSelectors extends WP_UnitTestCase { public static function data_valid_idents() { return array( - array( '_-foo123#xyz', '_-foo123', '#xyz' ), - array( '😍foo123.xyz', '😍foo123', '.xyz' ), - array( '\\xyz', 'xyz', '' ), - array( '\\ x', ' x', '' ), - array( '\\😍', '😍', '' ), - array( '\\abcd', 'ꯍ', '' ), + 'trailing #' => array( '_-foo123#xyz', '_-foo123', '#xyz' ), + 'trailing .' => array( '😍foo123.xyz', '😍foo123', '.xyz' ), + 'trailing " "' => array( '😍foo123 more', '😍foo123', ' more' ), + 'escaped ASCII character' => array( '\\xyz', 'xyz', '' ), + 'escaped space' => array( '\\ x', ' x', '' ), + 'escaped emoji' => array( '\\😍', '😍', '' ), + 'hex unicode codepoint' => array( '\\abcd', 'ꯍ', '' ), - array( "\\31\t23", '123', '' ), - array( "\\31\n23", '123', '' ), - array( "\\31 23", '123', '' ), - array( '\\9', "\t", '' ), - array( '\\61 bc', 'abc', '' ), - array( '\\000061bc', 'abc', '' ), + 'hex tab-suffixed 1' => array( "\\31\t23", '123', '' ), + 'hex newline-suffixed 1' => array( "\\31\n23", '123', '' ), + 'hex space-suffixed 1' => array( "\\31 23", '123', '' ), + 'hex tab' => array( '\\9', "\t", '' ), + 'hex a' => array( '\\61 bc', 'abc', '' ), + 'hex a max escape length' => array( '\\000061bc', 'abc', '' ), ); } @@ -44,7 +45,7 @@ public static function test( string $input, &$offset ) { $offset = 0; $ident = $c::test( $input, $offset ); - $this->assertSame( $ident, $result ); - $this->assertSame( substr( $input, $offset ), $rest ); + $this->assertSame( $ident, $result, 'Ident did not match.' ); + $this->assertSame( substr( $input, $offset ), $rest, 'Offset was not updated correctly.' ); } } From 5609e509ef589afbe23654fe629ce85fc06ad7ec Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 22 Nov 2024 19:53:10 +0100 Subject: [PATCH 011/129] Housekeeping --- src/wp-includes/html-api/class-wp-css-selectors.php | 4 +--- tests/phpunit/tests/html-api/wpCssSelectors.php | 7 ++++++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-css-selectors.php b/src/wp-includes/html-api/class-wp-css-selectors.php index 408d25395febb..f9c85f9b48a3c 100644 --- a/src/wp-includes/html-api/class-wp-css-selectors.php +++ b/src/wp-includes/html-api/class-wp-css-selectors.php @@ -1,14 +1,12 @@ array( '_-foo123#xyz', '_-foo123', '#xyz' ), @@ -33,6 +36,8 @@ public static function data_valid_idents() { } /** + * @ticket TBD + * * @dataProvider data_valid_idents */ public function test_valid_idents( string $input, string $result, string $rest ) { From 4f25bc21f907369c899ea2c8c07e7461bdb731e3 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 22 Nov 2024 19:56:30 +0100 Subject: [PATCH 012/129] Require new file in WP --- src/wp-settings.php | 1 + 1 file changed, 1 insertion(+) diff --git a/src/wp-settings.php b/src/wp-settings.php index 635f6de248dd5..6c799d5c95140 100644 --- a/src/wp-settings.php +++ b/src/wp-settings.php @@ -265,6 +265,7 @@ require ABSPATH . WPINC . '/html-api/class-wp-html-stack-event.php'; require ABSPATH . WPINC . '/html-api/class-wp-html-processor-state.php'; require ABSPATH . WPINC . '/html-api/class-wp-html-processor.php'; +require ABSPATH . WPINC . '/html-api/class-wp-css-selectors.php'; require ABSPATH . WPINC . '/class-wp-http.php'; require ABSPATH . WPINC . '/class-wp-http-streams.php'; require ABSPATH . WPINC . '/class-wp-http-curl.php'; From 943293f2f840988546c84d17d59dfe4d37e05448 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 22 Nov 2024 20:14:21 +0100 Subject: [PATCH 013/129] Fix offset type --- .../html-api/class-wp-css-selectors.php | 18 +++++++++--------- .../phpunit/tests/html-api/wpCssSelectors.php | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-css-selectors.php b/src/wp-includes/html-api/class-wp-css-selectors.php index f9c85f9b48a3c..897cf4b59d752 100644 --- a/src/wp-includes/html-api/class-wp-css-selectors.php +++ b/src/wp-includes/html-api/class-wp-css-selectors.php @@ -119,13 +119,13 @@ interface IWP_CSS_Selector_Parser { /** * @return static|null */ - public static function parse( string $input, string &$offset ); + public static function parse( string $input, int &$offset ); } abstract class WP_CSS_Selector_Parser implements IWP_CSS_Selector_Parser { const UTF8_MAX_CODEPOINT_VALUE = 0x10FFFF; - protected static function parse_whitespace( string $input, string &$offset ): bool { + protected static function parse_whitespace( string $input, int &$offset ): bool { $length = strspn( $input, " \t\r\n\f", $offset ); $advanced = $length > 0; $offset += $length; @@ -147,7 +147,7 @@ protected static function parse_whitespace( string $input, string &$offset ): bo * * This implementation is not interested in the , a '#' delim token is not relevant for selectors. */ - protected static function parse_hash_token( string $input, string &$offset ): ?string { + protected static function parse_hash_token( string $input, int &$offset ): ?string { if ( $offset + 1 >= strlen( $input ) || '#' !== $input[ $offset ] ) { return null; } @@ -183,7 +183,7 @@ protected static function parse_hash_token( string $input, string &$offset ): ?s * * https://www.w3.org/TR/css-syntax-3/#consume-name */ - protected static function parse_ident( string $input, string &$offset ): ?string { + protected static function parse_ident( string $input, int &$offset ): ?string { if ( ! self::check_if_three_code_points_would_start_an_ident_sequence( $input, $offset ) ) { return null; } @@ -307,7 +307,7 @@ protected static function consume_escaped_codepoint( $input, &$offset ): ?string * * @todo this does not check whether the second codepoint is valid. */ - protected static function next_two_are_valid_escape( string $input, string $offset ): bool { + protected static function next_two_are_valid_escape( string $input, int $offset ): bool { if ( $offset + 1 >= strlen( $input ) ) { return false; } @@ -326,7 +326,7 @@ protected static function next_two_are_valid_escape( string $input, string $offs * > non-ASCII code point * > A code point with a value equal to or greater than U+0080 . */ - protected static function is_ident_start_codepoint( string $input, string $offset ): bool { + protected static function is_ident_start_codepoint( string $input, int $offset ): bool { if ( $offset >= strlen( $input ) ) { return false; } @@ -345,7 +345,7 @@ protected static function is_ident_start_codepoint( string $input, string $offse * > digit * > A code point between U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9) inclusive. */ - protected static function is_ident_codepoint( string $input, string $offset ): bool { + protected static function is_ident_codepoint( string $input, int $offset ): bool { return '-' === $input[ $offset ] || ( '0' <= $input[ $offset ] && $input[ $offset ] <= '9' ) || self::is_ident_start_codepoint( $input, $offset ); @@ -370,7 +370,7 @@ protected static function is_ident_codepoint( string $input, string $offset ): b * * https://www.w3.org/TR/css-syntax-3/#would-start-an-identifier */ - protected static function check_if_three_code_points_would_start_an_ident_sequence( string $input, string $offset ): bool { + protected static function check_if_three_code_points_would_start_an_ident_sequence( string $input, int $offset ): bool { if ( $offset >= strlen( $input ) ) { return false; } @@ -422,7 +422,7 @@ private function __construct( string $ident ) { $this->ident = $ident; } - public static function parse( string $input, string &$offset ): ?self { + public static function parse( string $input, int &$offset ): ?self { $ident = self::parse_hash_token( $input, $offset ); if ( null === $ident ) { return null; diff --git a/tests/phpunit/tests/html-api/wpCssSelectors.php b/tests/phpunit/tests/html-api/wpCssSelectors.php index 39d68efcd8f4a..e0dd09c929d09 100644 --- a/tests/phpunit/tests/html-api/wpCssSelectors.php +++ b/tests/phpunit/tests/html-api/wpCssSelectors.php @@ -42,7 +42,7 @@ public static function data_valid_idents() { */ public function test_valid_idents( string $input, string $result, string $rest ) { $c = new class() extends WP_CSS_Selector_Parser { - public static function parse( string $input, string &$offset ) {} + public static function parse( string $input, int &$offset ) {} public static function test( string $input, &$offset ) { return self::parse_ident( $input, $offset ); } From 24c9744657023179a33f786a6a7b4d0242534783 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 22 Nov 2024 20:14:48 +0100 Subject: [PATCH 014/129] Add more tests and invalid tests --- .../phpunit/tests/html-api/wpCssSelectors.php | 66 +++++++++++++++---- 1 file changed, 53 insertions(+), 13 deletions(-) diff --git a/tests/phpunit/tests/html-api/wpCssSelectors.php b/tests/phpunit/tests/html-api/wpCssSelectors.php index e0dd09c929d09..d12fcc42c8e60 100644 --- a/tests/phpunit/tests/html-api/wpCssSelectors.php +++ b/tests/phpunit/tests/html-api/wpCssSelectors.php @@ -18,20 +18,41 @@ class Tests_HtmlApi_WpCssSelectors extends WP_UnitTestCase { */ public static function data_valid_idents() { return array( - 'trailing #' => array( '_-foo123#xyz', '_-foo123', '#xyz' ), - 'trailing .' => array( '😍foo123.xyz', '😍foo123', '.xyz' ), - 'trailing " "' => array( '😍foo123 more', '😍foo123', ' more' ), - 'escaped ASCII character' => array( '\\xyz', 'xyz', '' ), - 'escaped space' => array( '\\ x', ' x', '' ), - 'escaped emoji' => array( '\\😍', '😍', '' ), - 'hex unicode codepoint' => array( '\\abcd', 'ꯍ', '' ), + 'trailing #' => array( '_-foo123#xyz', '_-foo123', '#xyz' ), + 'trailing .' => array( '😍foo123.xyz', '😍foo123', '.xyz' ), + 'trailing " "' => array( '😍foo123 more', '😍foo123', ' more' ), + 'escaped ASCII character' => array( '\\xyz', 'xyz', '' ), + 'escaped space' => array( '\\ x', ' x', '' ), + 'escaped emoji' => array( '\\😍', '😍', '' ), + 'hex unicode codepoint' => array( '\\abcd', 'ꯍ', '' ), - 'hex tab-suffixed 1' => array( "\\31\t23", '123', '' ), - 'hex newline-suffixed 1' => array( "\\31\n23", '123', '' ), - 'hex space-suffixed 1' => array( "\\31 23", '123', '' ), - 'hex tab' => array( '\\9', "\t", '' ), - 'hex a' => array( '\\61 bc', 'abc', '' ), - 'hex a max escape length' => array( '\\000061bc', 'abc', '' ), + 'hex tab-suffixed 1' => array( "\\31\t23", '123', '' ), + 'hex newline-suffixed 1' => array( "\\31\n23", '123', '' ), + 'hex space-suffixed 1' => array( "\\31 23", '123', '' ), + 'hex tab' => array( '\\9', "\t", '' ), + 'hex a' => array( '\\61 bc', 'abc', '' ), + 'hex a max escape length' => array( '\\000061bc', 'abc', '' ), + + 'out of range replacement min' => array( '\\110000 ', "\u{fffd}", '' ), + 'out of range replacement max' => array( '\\ffffff ', "\u{fffd}", '' ), + 'leading surrogate min replacement' => array( '\\d800 ', "\u{fffd}", '' ), + 'leading surrogate max replacement' => array( '\\dbff ', "\u{fffd}", '' ), + 'trailing surrogate min replacement' => array( '\\dc00 ', "\u{fffd}", '' ), + 'trailing surrogate max replacement' => array( '\\dfff ', "\u{fffd}", '' ), + ); + } + + /** + * Data provider. + */ + public static function data_invalid_idents() { + return array( + 'bad start >' => array( '>' ), + 'bad start [' => array( '[' ), + 'bad start #' => array( '#' ), + 'bad start " "' => array( ' ' ), + 'bad start -' => array( '-' ), + 'bad start 1' => array( '-' ), ); } @@ -53,4 +74,23 @@ public static function test( string $input, &$offset ) { $this->assertSame( $ident, $result, 'Ident did not match.' ); $this->assertSame( substr( $input, $offset ), $rest, 'Offset was not updated correctly.' ); } + + /** + * @ticket TBD + * + * @dataProvider data_invalid_idents + */ + public function test_invalid_idents( string $input ) { + $c = new class() extends WP_CSS_Selector_Parser { + public static function parse( string $input, int &$offset ) {} + public static function test( string $input, int &$offset ) { + return self::parse_ident( $input, $offset ); + } + }; + + $offset = 0; + $result = $c::test( $input, $offset ); + $this->assertNull( $result, 'Ident did not match.' ); + $this->assertSame( 0, $offset, 'Offset was incorrectly adjusted.' ); + } } From a7c10b9e12aeed9263a69b46eeb011e59092ed07 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 22 Nov 2024 20:15:03 +0100 Subject: [PATCH 015/129] Fix wrong offset var usage --- src/wp-includes/html-api/class-wp-css-selectors.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-css-selectors.php b/src/wp-includes/html-api/class-wp-css-selectors.php index 897cf4b59d752..8afb3928e07de 100644 --- a/src/wp-includes/html-api/class-wp-css-selectors.php +++ b/src/wp-includes/html-api/class-wp-css-selectors.php @@ -383,7 +383,7 @@ protected static function check_if_three_code_points_would_start_an_ident_sequen // > U+002D HYPHEN-MINUS if ( '-' === $input[ $offset ] ) { $after_initial_hyphen_minus_offset = $offset + 1; - if ( $offset >= strlen( $input ) ) { + if ( $after_initial_hyphen_minus_offset >= strlen( $input ) ) { return false; } From dd718b7093dfa3510d6b7476b39510013f759797 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 22 Nov 2024 20:17:15 +0100 Subject: [PATCH 016/129] comment tweak --- src/wp-includes/html-api/class-wp-css-selectors.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-css-selectors.php b/src/wp-includes/html-api/class-wp-css-selectors.php index 8afb3928e07de..64020bcc0c607 100644 --- a/src/wp-includes/html-api/class-wp-css-selectors.php +++ b/src/wp-includes/html-api/class-wp-css-selectors.php @@ -392,7 +392,7 @@ protected static function check_if_three_code_points_would_start_an_ident_sequen return true; } - // > If the second and third code points are a valid escape, return true. + // > If the second and third code points are a valid escape… return true. if ( self::next_two_are_valid_escape( $input, $after_initial_hyphen_minus_offset ) ) { return true; } From 5884aca6e807002d6474c37e291b3dde5c59778d Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 22 Nov 2024 20:53:50 +0100 Subject: [PATCH 017/129] Implement codepoint escape with strspn --- .../html-api/class-wp-css-selectors.php | 24 ++++--------------- 1 file changed, 4 insertions(+), 20 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-css-selectors.php b/src/wp-includes/html-api/class-wp-css-selectors.php index 64020bcc0c607..56c31911d95b8 100644 --- a/src/wp-includes/html-api/class-wp-css-selectors.php +++ b/src/wp-includes/html-api/class-wp-css-selectors.php @@ -230,25 +230,9 @@ protected static function parse_ident( string $input, int &$offset ): ?string { * > Return the current input code point. */ protected static function consume_escaped_codepoint( $input, &$offset ): ?string { - if ( - ( '0' <= $input[ $offset ] && $input[ $offset ] <= '9' ) || - ( 'a' <= $input[ $offset ] && $input[ $offset ] <= 'f' ) || - ( 'A' <= $input[ $offset ] && $input[ $offset ] <= 'F' ) - ) { - $hex_end_offset = $offset + 1; - while ( - strlen( $input ) > $hex_end_offset && - $hex_end_offset - $offset < 6 && - ( - ( '0' <= $input[ $hex_end_offset ] && $input[ $hex_end_offset ] <= '9' ) || - ( 'a' <= $input[ $hex_end_offset ] && $input[ $hex_end_offset ] <= 'f' ) || - ( 'A' <= $input[ $hex_end_offset ] && $input[ $hex_end_offset ] <= 'F' ) - ) - ) { - $hex_end_offset += 1; - } - - $codepoint_value = hexdec( substr( $input, $offset, $hex_end_offset - $offset ) ); + $hex_length = strspn( $input, '0123456789abcdefABCDEF', $offset, 6 ); + if ( $hex_length > 0 ) { + $codepoint_value = hexdec( substr( $input, $offset, $hex_length ) ); // > A surrogate is a leading surrogate or a trailing surrogate. // > A leading surrogate is a code point that is in the range U+D800 to U+DBFF, inclusive. @@ -263,7 +247,7 @@ protected static function consume_escaped_codepoint( $input, &$offset ): ?string "\u{FFFD}" : mb_chr( $codepoint_value, 'UTF-8' ); - $offset = $hex_end_offset; + $offset += $hex_length; // If the next input code point is whitespace, consume it as well. if ( From a9a077f463c9c981adc811b7be6b27d89c05d9dc Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 22 Nov 2024 20:54:11 +0100 Subject: [PATCH 018/129] Test with UPPER HEX --- tests/phpunit/tests/html-api/wpCssSelectors.php | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/phpunit/tests/html-api/wpCssSelectors.php b/tests/phpunit/tests/html-api/wpCssSelectors.php index d12fcc42c8e60..270def39b53d3 100644 --- a/tests/phpunit/tests/html-api/wpCssSelectors.php +++ b/tests/phpunit/tests/html-api/wpCssSelectors.php @@ -24,7 +24,8 @@ public static function data_valid_idents() { 'escaped ASCII character' => array( '\\xyz', 'xyz', '' ), 'escaped space' => array( '\\ x', ' x', '' ), 'escaped emoji' => array( '\\😍', '😍', '' ), - 'hex unicode codepoint' => array( '\\abcd', 'ꯍ', '' ), + 'hex unicode codepoint' => array( '\\1f0a1', '🂡', '' ), + 'HEX UNICODE CODEPOINT' => array( '\\1D4B2', '𝒲', '' ), 'hex tab-suffixed 1' => array( "\\31\t23", '123', '' ), 'hex newline-suffixed 1' => array( "\\31\n23", '123', '' ), From 5f53e0a50b472a0aff078f233d6d7ffae189de33 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Mon, 25 Nov 2024 17:34:25 +0100 Subject: [PATCH 019/129] Add ID tests --- .../phpunit/tests/html-api/wpCssSelectors.php | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/tests/phpunit/tests/html-api/wpCssSelectors.php b/tests/phpunit/tests/html-api/wpCssSelectors.php index 270def39b53d3..149bcd1f9572d 100644 --- a/tests/phpunit/tests/html-api/wpCssSelectors.php +++ b/tests/phpunit/tests/html-api/wpCssSelectors.php @@ -94,4 +94,33 @@ public static function test( string $input, int &$offset ) { $this->assertNull( $result, 'Ident did not match.' ); $this->assertSame( 0, $offset, 'Offset was incorrectly adjusted.' ); } + + /** + * @ticket TBD + * + * @dataProvider data_ids + */ + public function test_parse_id( string $input, ?string $expected_id = null, ?string $rest = null ) { + $offset = 0; + $result = WP_CSS_ID_Selector::parse( $input, $offset ); + if ( null === $expected_id ) { + $this->assertNull( $result ); + } else { + $this->assertSame( $result->ident, $expected_id ); + $this->assertSame( substr( $input, $offset ), $rest ); + } + } + + public static function data_ids(): array { + return array( + 'valid #_-foo123' => array( '#_-foo123', '_-foo123', '' ), + 'valid #foo#bar' => array( '#foo#bar', 'foo', '#bar' ), + 'escaped #\31 23' => array( '#\\31 23', '123', '' ), + 'with descendant #\31 23 div' => array( '#\\31 23 div', '123', ' div' ), + + 'not ID foo' => array( 'foo' ), + 'not valid #1foo' => array( '#1foo' ), + 'not id .bar' => array( '.bar' ), + ); + } } From effbbbece335486d269ecccf480fab99fc497d17 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Mon, 25 Nov 2024 17:46:07 +0100 Subject: [PATCH 020/129] Improve tests --- .../phpunit/tests/html-api/wpCssSelectors.php | 72 ++++++++----------- 1 file changed, 29 insertions(+), 43 deletions(-) diff --git a/tests/phpunit/tests/html-api/wpCssSelectors.php b/tests/phpunit/tests/html-api/wpCssSelectors.php index 149bcd1f9572d..53495f0b09004 100644 --- a/tests/phpunit/tests/html-api/wpCssSelectors.php +++ b/tests/phpunit/tests/html-api/wpCssSelectors.php @@ -15,8 +15,10 @@ class Tests_HtmlApi_WpCssSelectors extends WP_UnitTestCase { /** * Data provider. + * + * @return array */ - public static function data_valid_idents() { + public static function data_idents(): array { return array( 'trailing #' => array( '_-foo123#xyz', '_-foo123', '#xyz' ), 'trailing .' => array( '😍foo123.xyz', '😍foo123', '.xyz' ), @@ -40,29 +42,23 @@ public static function data_valid_idents() { 'leading surrogate max replacement' => array( '\\dbff ', "\u{fffd}", '' ), 'trailing surrogate min replacement' => array( '\\dc00 ', "\u{fffd}", '' ), 'trailing surrogate max replacement' => array( '\\dfff ', "\u{fffd}", '' ), - ); - } - /** - * Data provider. - */ - public static function data_invalid_idents() { - return array( - 'bad start >' => array( '>' ), - 'bad start [' => array( '[' ), - 'bad start #' => array( '#' ), - 'bad start " "' => array( ' ' ), - 'bad start -' => array( '-' ), - 'bad start 1' => array( '-' ), + // Invalid + 'bad start >' => array( '>' ), + 'bad start [' => array( '[' ), + 'bad start #' => array( '#' ), + 'bad start " "' => array( ' ' ), + 'bad start -' => array( '-' ), + 'bad start 1' => array( '-' ), ); } /** * @ticket TBD * - * @dataProvider data_valid_idents + * @dataProvider data_idents */ - public function test_valid_idents( string $input, string $result, string $rest ) { + public function test_parse_ident( string $input, ?string $expected = null, ?string $rest = null ) { $c = new class() extends WP_CSS_Selector_Parser { public static function parse( string $input, int &$offset ) {} public static function test( string $input, &$offset ) { @@ -70,48 +66,38 @@ public static function test( string $input, &$offset ) { } }; - $offset = 0; - $ident = $c::test( $input, $offset ); - $this->assertSame( $ident, $result, 'Ident did not match.' ); - $this->assertSame( substr( $input, $offset ), $rest, 'Offset was not updated correctly.' ); - } - - /** - * @ticket TBD - * - * @dataProvider data_invalid_idents - */ - public function test_invalid_idents( string $input ) { - $c = new class() extends WP_CSS_Selector_Parser { - public static function parse( string $input, int &$offset ) {} - public static function test( string $input, int &$offset ) { - return self::parse_ident( $input, $offset ); - } - }; - $offset = 0; $result = $c::test( $input, $offset ); - $this->assertNull( $result, 'Ident did not match.' ); - $this->assertSame( 0, $offset, 'Offset was incorrectly adjusted.' ); + if ( null === $expected ) { + $this->assertNull( $result ); + } else { + $this->assertSame( $expected, $result, 'Ident did not match.' ); + $this->assertSame( substr( $input, $offset ), $rest, 'Offset was not updated correctly.' ); + } } /** * @ticket TBD * - * @dataProvider data_ids + * @dataProvider data_id_selectors */ - public function test_parse_id( string $input, ?string $expected_id = null, ?string $rest = null ) { + public function test_parse_id( string $input, ?string $expected = null, ?string $rest = null ) { $offset = 0; $result = WP_CSS_ID_Selector::parse( $input, $offset ); - if ( null === $expected_id ) { + if ( null === $expected ) { $this->assertNull( $result ); } else { - $this->assertSame( $result->ident, $expected_id ); + $this->assertSame( $result->ident, $expected ); $this->assertSame( substr( $input, $offset ), $rest ); } } - public static function data_ids(): array { + /** + * Data provider. + * + * @return array + */ + public static function data_id_selectors(): array { return array( 'valid #_-foo123' => array( '#_-foo123', '_-foo123', '' ), 'valid #foo#bar' => array( '#foo#bar', 'foo', '#bar' ), @@ -119,8 +105,8 @@ public static function data_ids(): array { 'with descendant #\31 23 div' => array( '#\\31 23 div', '123', ' div' ), 'not ID foo' => array( 'foo' ), + 'not ID .bar' => array( '.bar' ), 'not valid #1foo' => array( '#1foo' ), - 'not id .bar' => array( '.bar' ), ); } } From 62ec5bb804872afe38073e86a0e23ee1d5cd16a7 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Mon, 25 Nov 2024 17:46:23 +0100 Subject: [PATCH 021/129] Add class selector tests --- .../phpunit/tests/html-api/wpCssSelectors.php | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/tests/phpunit/tests/html-api/wpCssSelectors.php b/tests/phpunit/tests/html-api/wpCssSelectors.php index 53495f0b09004..aac3339e4d27d 100644 --- a/tests/phpunit/tests/html-api/wpCssSelectors.php +++ b/tests/phpunit/tests/html-api/wpCssSelectors.php @@ -109,4 +109,38 @@ public static function data_id_selectors(): array { 'not valid #1foo' => array( '#1foo' ), ); } + + /** + * @ticket TBD + * + * @dataProvider data_class_selectors + */ + public function test_parse_class( string $input, ?string $expected = null, ?string $rest = null ) { + $offset = 0; + $result = WP_CSS_Class_Selector::parse( $input, $offset ); + if ( null === $expected ) { + $this->assertNull( $result ); + } else { + $this->assertSame( $result->ident, $expected ); + $this->assertSame( substr( $input, $offset ), $rest ); + } + } + + /** + * Data provider. + * + * @return array + */ + public static function data_class_selectors(): array { + return array( + 'valid ._-foo123' => array( '._-foo123', '_-foo123', '' ), + 'valid .foo.bar' => array( '.foo.bar', 'foo', '.bar' ), + 'escaped .\31 23' => array( '.\\31 23', '123', '' ), + 'with descendant .\31 23 div' => array( '.\\31 23 div', '123', ' div' ), + + 'not class foo' => array( 'foo' ), + 'not class #bar' => array( '#bar' ), + 'not valid .1foo' => array( '.1foo' ), + ); + } } From 153f00978429f98cd7c5cc3d65a8b8affdcf1e45 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Mon, 25 Nov 2024 17:47:00 +0100 Subject: [PATCH 022/129] Add class selector --- .../html-api/class-wp-css-selectors.php | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/src/wp-includes/html-api/class-wp-css-selectors.php b/src/wp-includes/html-api/class-wp-css-selectors.php index 56c31911d95b8..7b72fa0fe9616 100644 --- a/src/wp-includes/html-api/class-wp-css-selectors.php +++ b/src/wp-includes/html-api/class-wp-css-selectors.php @@ -414,3 +414,29 @@ public static function parse( string $input, int &$offset ): ?self { return new self( $ident ); } } + +final class WP_CSS_Class_Selector extends WP_CSS_Selector_Parser { + /** @var string */ + public $ident; + + private function __construct( string $ident ) { + $this->ident = $ident; + } + + public static function parse( string $input, int &$offset ): ?self { + if ( $offset + 1 >= strlen( $input ) || '.' !== $input[ $offset ] ) { + return null; + } + + $updated_offset = $offset + 1; + $result = self::parse_ident( $input, $updated_offset ); + + if ( null === $result ) { + return null; + $offset = $updated_offset; + } + + $offset = $updated_offset; + return new self( $result ); + } +} From fcc6401475554cd955891ae1dd82e067064067e8 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Mon, 25 Nov 2024 17:47:21 +0100 Subject: [PATCH 023/129] Simplify id selector parse --- .../html-api/class-wp-css-selectors.php | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-css-selectors.php b/src/wp-includes/html-api/class-wp-css-selectors.php index 7b72fa0fe9616..fbccb55a5a0eb 100644 --- a/src/wp-includes/html-api/class-wp-css-selectors.php +++ b/src/wp-includes/html-api/class-wp-css-selectors.php @@ -152,12 +152,16 @@ protected static function parse_hash_token( string $input, int &$offset ): ?stri return null; } - $offset_after_hash = $offset + 1; - if ( self::check_if_three_code_points_would_start_an_ident_sequence( $input, $offset_after_hash ) ) { - $offset = $offset_after_hash; - return self::parse_ident( $input, $offset ); + $updated_offset = $offset + 1; + $result = self::parse_ident( $input, $updated_offset ); + + if ( null === $result ) { + return null; + $offset = $updated_offset; } - return null; + + $offset = $updated_offset; + return $result; } /** From 21c67e52745b532489f6a494892b71c83f1b03ac Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Mon, 25 Nov 2024 18:02:03 +0100 Subject: [PATCH 024/129] Improve ident tests --- .../phpunit/tests/html-api/wpCssSelectors.php | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/tests/phpunit/tests/html-api/wpCssSelectors.php b/tests/phpunit/tests/html-api/wpCssSelectors.php index aac3339e4d27d..b3099146e226c 100644 --- a/tests/phpunit/tests/html-api/wpCssSelectors.php +++ b/tests/phpunit/tests/html-api/wpCssSelectors.php @@ -42,14 +42,20 @@ public static function data_idents(): array { 'leading surrogate max replacement' => array( '\\dbff ', "\u{fffd}", '' ), 'trailing surrogate min replacement' => array( '\\dc00 ', "\u{fffd}", '' ), 'trailing surrogate max replacement' => array( '\\dfff ', "\u{fffd}", '' ), + 'can start with -ident' => array( '-ident', '-ident', '' ), + 'can start with --anything' => array( '--anything', '--anything', '' ), + 'can start with ---anything' => array( '--_anything', '--_anything', '' ), + 'can start with --1anything' => array( '--1anything', '--1anything', '' ), + 'can start with -\31 23' => array( '-\31 23', '-123', '' ), + 'can start with --\31 23' => array( '--\31 23', '--123', '' ), // Invalid - 'bad start >' => array( '>' ), - 'bad start [' => array( '[' ), - 'bad start #' => array( '#' ), - 'bad start " "' => array( ' ' ), - 'bad start -' => array( '-' ), - 'bad start 1' => array( '-' ), + 'bad start >' => array( '>ident' ), + 'bad start [' => array( '[ident' ), + 'bad start #' => array( '#ident' ), + 'bad start " "' => array( ' ident' ), + 'bad start 1' => array( '1ident' ), + 'bad start -1' => array( '-1ident' ), ); } From 728d798d663d27f5b385d82fe54f3b88544983de Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Mon, 25 Nov 2024 18:31:24 +0100 Subject: [PATCH 025/129] Add type selector tests --- .../phpunit/tests/html-api/wpCssSelectors.php | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/tests/phpunit/tests/html-api/wpCssSelectors.php b/tests/phpunit/tests/html-api/wpCssSelectors.php index b3099146e226c..694c405c09e0b 100644 --- a/tests/phpunit/tests/html-api/wpCssSelectors.php +++ b/tests/phpunit/tests/html-api/wpCssSelectors.php @@ -149,4 +149,39 @@ public static function data_class_selectors(): array { 'not valid .1foo' => array( '.1foo' ), ); } + + /** + * @ticket TBD + * + * @dataProvider data_type_selectors + */ + public function test_parse_type( string $input, ?string $expected = null, ?string $rest = null ) { + $offset = 0; + $result = WP_CSS_Type_Selector::parse( $input, $offset ); + if ( null === $expected ) { + $this->assertNull( $result ); + } else { + $this->assertSame( $result->ident, $expected ); + $this->assertSame( substr( $input, $offset ), $rest ); + } + } + + /** + * Data provider. + * + * @return array + */ + public static function data_type_selectors(): array { + return array( + 'any *' => array( '* .class', '*', ' .class' ), + 'a' => array( 'a', 'a', '' ), + 'div.class' => array( 'div.class', 'div', '.class' ), + 'custom-type#id' => array( 'custom-type#id', 'custom-type', '#id' ), + + // invalid + '#id' => array( '#id' ), + '.class' => array( '.class' ), + '[attr]' => array( '[attr]' ), + ); + } } From e1e8e098cfa4d0854104760e7e225e265f022064 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Mon, 25 Nov 2024 18:31:54 +0100 Subject: [PATCH 026/129] Add docs and remove unreachable line --- .../html-api/class-wp-css-selectors.php | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-css-selectors.php b/src/wp-includes/html-api/class-wp-css-selectors.php index fbccb55a5a0eb..4ea438b95d8ce 100644 --- a/src/wp-includes/html-api/class-wp-css-selectors.php +++ b/src/wp-includes/html-api/class-wp-css-selectors.php @@ -410,6 +410,13 @@ private function __construct( string $ident ) { $this->ident = $ident; } + /** + * Parse an ID selector + * + * > = + * + * https://www.w3.org/TR/selectors/#grammar + */ public static function parse( string $input, int &$offset ): ?self { $ident = self::parse_hash_token( $input, $offset ); if ( null === $ident ) { @@ -427,6 +434,13 @@ private function __construct( string $ident ) { $this->ident = $ident; } + /** + * Parse a class selector + * + * > = '.' + * + * https://www.w3.org/TR/selectors/#grammar + */ public static function parse( string $input, int &$offset ): ?self { if ( $offset + 1 >= strlen( $input ) || '.' !== $input[ $offset ] ) { return null; @@ -437,7 +451,6 @@ public static function parse( string $input, int &$offset ): ?self { if ( null === $result ) { return null; - $offset = $updated_offset; } $offset = $updated_offset; From 13ac3c11204d31e30455870bff92f0b81ecd3386 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Mon, 25 Nov 2024 18:32:17 +0100 Subject: [PATCH 027/129] Add type selector class --- .../html-api/class-wp-css-selectors.php | 43 +++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/src/wp-includes/html-api/class-wp-css-selectors.php b/src/wp-includes/html-api/class-wp-css-selectors.php index 4ea438b95d8ce..4a6b65048b62b 100644 --- a/src/wp-includes/html-api/class-wp-css-selectors.php +++ b/src/wp-includes/html-api/class-wp-css-selectors.php @@ -457,3 +457,46 @@ public static function parse( string $input, int &$offset ): ?self { return new self( $result ); } } + +final class WP_CSS_Type_Selector extends WP_CSS_Selector_Parser { + /** + * @var string + * + * The type identifier string or '*'. + */ + public $ident; + + private function __construct( string $ident ) { + $this->ident = $ident; + } + + /** + * Parse a type selector + * + * > = | ? '*' + * > = [ | '*' ]? '|' + * > = ? + * + * Namespaces (e.g. |div, *|div, or namespace|div) are not supported, + * so this selector effectively matches * or ident. + * + * https://www.w3.org/TR/selectors/#grammar + */ + public static function parse( string $input, int &$offset ): ?self { + if ( $offset >= strlen( $input ) ) { + return false; + } + + if ( '*' === $input[ $offset ] ) { + ++$offset; + return new self( '*' ); + } + + $result = self::parse_ident( $input, $offset ); + if ( null === $result ) { + return null; + } + + return new self( $result ); + } +} From a3c25e892f059f02d42070d593d03c5199a15e8d Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Mon, 25 Nov 2024 19:13:39 +0100 Subject: [PATCH 028/129] Add attribute selector tests --- .../phpunit/tests/html-api/wpCssSelectors.php | 65 +++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/tests/phpunit/tests/html-api/wpCssSelectors.php b/tests/phpunit/tests/html-api/wpCssSelectors.php index 694c405c09e0b..5d0af28006039 100644 --- a/tests/phpunit/tests/html-api/wpCssSelectors.php +++ b/tests/phpunit/tests/html-api/wpCssSelectors.php @@ -184,4 +184,69 @@ public static function data_type_selectors(): array { '[attr]' => array( '[attr]' ), ); } + + /** + * @ticket TBD + * + * @dataProvider data_attribute_selectors + */ + public function test_parse_attribute( + string $input, + ?string $expected_name = null, + ?string $expected_matcher = null, + ?string $expected_value = null, + ?string $expected_modifier = null, + ?string $rest = null + ) { + $offset = 0; + $result = WP_CSS_Attribute_Selector::parse( $input, $offset ); + if ( null === $expected_name ) { + $this->assertNull( $result ); + } else { + $this->assertSame( $result->name, $expected_name ); + $this->assertSame( $result->matcher, $expected_matcher ); + $this->assertSame( $result->value, $expected_value ); + $this->assertSame( $result->modifier, $expected_modifier ); + $this->assertSame( substr( $input, $offset ), $rest ); + } + } + + /** + * Data provider. + * + * @return array + */ + public static function data_attribute_selectors(): array { + return array( + array( '[href]', 'href', null, null, null, '' ), + array( '[href] type', 'href', null, null, null, ' type' ), + array( '[href]#id', 'href', null, null, null, '#id' ), + array( '[href].class', 'href', null, null, null, '.class' ), + array( '[href][href2]', 'href', null, null, null, '[href2]' ), + array( "[\n href\t\r]", 'href', null, null, null, '' ), + array( '[href=foo]', 'href', WP_CSS_Attribute_Selector::MATCH_EXACT, 'foo', null, '' ), + array( "[href \n = bar ]", WP_CSS_Attribute_Selector::MATCH_EXACT, 'bar', null, '' ), + array( "[href \n ^= baz ]", WP_CSS_Attribute_Selector::MATCH_PREFIXED_BY, 'bar', null, '' ), + array( '[match $= insensitive i]', WP_CSS_Attribute_Selector::MATCH_SUFFIXED_BY, 'insensitive', WP_CSS_Attribute_Selector::MODIFIER_CASE_INSENSITIVE, '' ), + array( '[match|=sensitive s]', WP_CSS_Attribute_Selector::MATCH_EXACT_OR_EXACT_WITH_HYPHEN, 'sensitive', WP_CSS_Attribute_Selector::MODIFIER_CASE_SENSITIVE, '' ), + array( '[match="quoted[][]"]', WP_CSS_Attribute_Selector::MATCH_EXACT_OR_EXACT_WITH_HYPHEN, 'quoted[][]', null, '' ), + array( "[match='quoted!{}']", WP_CSS_Attribute_Selector::MATCH_EXACT_OR_EXACT_WITH_HYPHEN, 'quoted!{}', null, '' ), + array( "[match*='quoted's]", WP_CSS_Attribute_Selector::MATCH_EXACT_OR_EXACT_WITH_HYPHEN, 'quoted', WP_CSS_Attribute_Selector::MODIFIER_CASE_SENSITIVE, '' ), + + // Invalid + array( 'foo' ), + array( '[foo' ), + array( '[#foo]' ), + array( '[*|*]' ), + array( '[ns|*]' ), + array( '[* |att]' ), + array( '[*| att]' ), + array( '[att * =]' ), + array( '[att * =]' ), + array( '[att i]' ), + array( '[att s]' ), + array( '[att="val" I]' ), + array( '[att="val" S]' ), + ); + } } From ad5c600d99ffeb98e92e6678b1476c0a7e02a808 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Mon, 25 Nov 2024 19:49:57 +0100 Subject: [PATCH 029/129] improve attr tests --- .../phpunit/tests/html-api/wpCssSelectors.php | 54 +++++++++---------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/tests/phpunit/tests/html-api/wpCssSelectors.php b/tests/phpunit/tests/html-api/wpCssSelectors.php index 5d0af28006039..43c710a6f750c 100644 --- a/tests/phpunit/tests/html-api/wpCssSelectors.php +++ b/tests/phpunit/tests/html-api/wpCssSelectors.php @@ -218,35 +218,35 @@ public function test_parse_attribute( */ public static function data_attribute_selectors(): array { return array( - array( '[href]', 'href', null, null, null, '' ), - array( '[href] type', 'href', null, null, null, ' type' ), - array( '[href]#id', 'href', null, null, null, '#id' ), - array( '[href].class', 'href', null, null, null, '.class' ), - array( '[href][href2]', 'href', null, null, null, '[href2]' ), - array( "[\n href\t\r]", 'href', null, null, null, '' ), - array( '[href=foo]', 'href', WP_CSS_Attribute_Selector::MATCH_EXACT, 'foo', null, '' ), - array( "[href \n = bar ]", WP_CSS_Attribute_Selector::MATCH_EXACT, 'bar', null, '' ), - array( "[href \n ^= baz ]", WP_CSS_Attribute_Selector::MATCH_PREFIXED_BY, 'bar', null, '' ), - array( '[match $= insensitive i]', WP_CSS_Attribute_Selector::MATCH_SUFFIXED_BY, 'insensitive', WP_CSS_Attribute_Selector::MODIFIER_CASE_INSENSITIVE, '' ), - array( '[match|=sensitive s]', WP_CSS_Attribute_Selector::MATCH_EXACT_OR_EXACT_WITH_HYPHEN, 'sensitive', WP_CSS_Attribute_Selector::MODIFIER_CASE_SENSITIVE, '' ), - array( '[match="quoted[][]"]', WP_CSS_Attribute_Selector::MATCH_EXACT_OR_EXACT_WITH_HYPHEN, 'quoted[][]', null, '' ), - array( "[match='quoted!{}']", WP_CSS_Attribute_Selector::MATCH_EXACT_OR_EXACT_WITH_HYPHEN, 'quoted!{}', null, '' ), - array( "[match*='quoted's]", WP_CSS_Attribute_Selector::MATCH_EXACT_OR_EXACT_WITH_HYPHEN, 'quoted', WP_CSS_Attribute_Selector::MODIFIER_CASE_SENSITIVE, '' ), + '[href]' => array( '[href]', 'href', null, null, null, '' ), + '[href] type' => array( '[href] type', 'href', null, null, null, ' type' ), + '[href]#id' => array( '[href]#id', 'href', null, null, null, '#id' ), + '[href].class' => array( '[href].class', 'href', null, null, null, '.class' ), + '[href][href2]' => array( '[href][href2]', 'href', null, null, null, '[href2]' ), + '[\n href\t\r]' => array( "[\n href\t\r]", 'href', null, null, null, '' ), + '[href=foo]' => array( '[href=foo]', 'href', WP_CSS_Attribute_Selector::MATCH_EXACT, 'foo', null, '' ), + '[href \n = bar ]' => array( "[href \n = bar ]", WP_CSS_Attribute_Selector::MATCH_EXACT, 'bar', null, '' ), + '[href \n ^= baz ]' => array( "[href \n ^= baz ]", WP_CSS_Attribute_Selector::MATCH_PREFIXED_BY, 'bar', null, '' ), + '[match $= insensitive i]' => array( '[match $= insensitive i]', WP_CSS_Attribute_Selector::MATCH_SUFFIXED_BY, 'insensitive', WP_CSS_Attribute_Selector::MODIFIER_CASE_INSENSITIVE, '' ), + '[match|=sensitive s]' => array( '[match|=sensitive s]', WP_CSS_Attribute_Selector::MATCH_EXACT_OR_EXACT_WITH_HYPHEN, 'sensitive', WP_CSS_Attribute_Selector::MODIFIER_CASE_SENSITIVE, '' ), + '[match="quoted[][]"]' => array( '[match="quoted[][]"]', WP_CSS_Attribute_Selector::MATCH_EXACT_OR_EXACT_WITH_HYPHEN, 'quoted[][]', null, '' ), + "[match='quoted!{}']" => array( "[match='quoted!{}']", WP_CSS_Attribute_Selector::MATCH_EXACT_OR_EXACT_WITH_HYPHEN, 'quoted!{}', null, '' ), + "[match*='quoted's]" => array( "[match*='quoted's]", WP_CSS_Attribute_Selector::MATCH_EXACT_OR_EXACT_WITH_HYPHEN, 'quoted', WP_CSS_Attribute_Selector::MODIFIER_CASE_SENSITIVE, '' ), // Invalid - array( 'foo' ), - array( '[foo' ), - array( '[#foo]' ), - array( '[*|*]' ), - array( '[ns|*]' ), - array( '[* |att]' ), - array( '[*| att]' ), - array( '[att * =]' ), - array( '[att * =]' ), - array( '[att i]' ), - array( '[att s]' ), - array( '[att="val" I]' ), - array( '[att="val" S]' ), + 'foo' => array( 'foo' ), + '[foo' => array( '[foo' ), + '[#foo]' => array( '[#foo]' ), + '[*|*]' => array( '[*|*]' ), + '[ns|*]' => array( '[ns|*]' ), + '[* |att]' => array( '[* |att]' ), + '[*| att]' => array( '[*| att]' ), + '[att * =]' => array( '[att * =]' ), + '[att * =]' => array( '[att * =]' ), + '[att i]' => array( '[att i]' ), + '[att s]' => array( '[att s]' ), + '[att="val" I]' => array( '[att="val" I]' ), + '[att="val" S]' => array( '[att="val" S]' ), ); } } From 675870497312b388d4992090c7681886b06c919a Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Mon, 25 Nov 2024 19:53:06 +0100 Subject: [PATCH 030/129] Fix expectation argument order --- .../phpunit/tests/html-api/wpCssSelectors.php | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/phpunit/tests/html-api/wpCssSelectors.php b/tests/phpunit/tests/html-api/wpCssSelectors.php index 43c710a6f750c..7bea7c3b34180 100644 --- a/tests/phpunit/tests/html-api/wpCssSelectors.php +++ b/tests/phpunit/tests/html-api/wpCssSelectors.php @@ -78,7 +78,7 @@ public static function test( string $input, &$offset ) { $this->assertNull( $result ); } else { $this->assertSame( $expected, $result, 'Ident did not match.' ); - $this->assertSame( substr( $input, $offset ), $rest, 'Offset was not updated correctly.' ); + $this->assertSame( $rest, substr( $input, $offset ), 'Offset was not updated correctly.' ); } } @@ -93,8 +93,8 @@ public function test_parse_id( string $input, ?string $expected = null, ?string if ( null === $expected ) { $this->assertNull( $result ); } else { - $this->assertSame( $result->ident, $expected ); - $this->assertSame( substr( $input, $offset ), $rest ); + $this->assertSame( $expected, $result->ident ); + $this->assertSame( $rest, substr( $input, $offset ) ); } } @@ -127,8 +127,8 @@ public function test_parse_class( string $input, ?string $expected = null, ?stri if ( null === $expected ) { $this->assertNull( $result ); } else { - $this->assertSame( $result->ident, $expected ); - $this->assertSame( substr( $input, $offset ), $rest ); + $this->assertSame( $expected, $result->ident ); + $this->assertSame( $rest, substr( $input, $offset ) ); } } @@ -161,8 +161,8 @@ public function test_parse_type( string $input, ?string $expected = null, ?strin if ( null === $expected ) { $this->assertNull( $result ); } else { - $this->assertSame( $result->ident, $expected ); - $this->assertSame( substr( $input, $offset ), $rest ); + $this->assertSame( $expected, $result->ident ); + $this->assertSame( $rest, substr( $input, $offset ) ); } } @@ -203,11 +203,11 @@ public function test_parse_attribute( if ( null === $expected_name ) { $this->assertNull( $result ); } else { - $this->assertSame( $result->name, $expected_name ); - $this->assertSame( $result->matcher, $expected_matcher ); - $this->assertSame( $result->value, $expected_value ); - $this->assertSame( $result->modifier, $expected_modifier ); - $this->assertSame( substr( $input, $offset ), $rest ); + $this->assertSame( $expected_name, $result->name ); + $this->assertSame( $expected_matcher, $result->matcher ); + $this->assertSame( $expected_value, $result->value ); + $this->assertSame( $expected_modifier, $result->modifier ); + $this->assertSame( $rest, substr( $input, $offset ) ); } } From e97842cf6665fef97059b71acef61e70ebbdf03e Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Mon, 25 Nov 2024 21:31:31 +0100 Subject: [PATCH 031/129] Add test and fix is_ident --- .../html-api/class-wp-css-selectors.php | 2 +- .../phpunit/tests/html-api/wpCssSelectors.php | 23 +++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-css-selectors.php b/src/wp-includes/html-api/class-wp-css-selectors.php index 4a6b65048b62b..49b51e51fe81e 100644 --- a/src/wp-includes/html-api/class-wp-css-selectors.php +++ b/src/wp-includes/html-api/class-wp-css-selectors.php @@ -323,7 +323,7 @@ protected static function is_ident_start_codepoint( string $input, int $offset ) '_' === $input[ $offset ] || ( 'a' <= $input[ $offset ] && $input[ $offset ] <= 'z' ) || ( 'A' <= $input[ $offset ] && $input[ $offset ] <= 'Z' ) || - $input[ $offset ] > '\x7F' + ord( $input[ $offset ] ) > 0x7F ); } diff --git a/tests/phpunit/tests/html-api/wpCssSelectors.php b/tests/phpunit/tests/html-api/wpCssSelectors.php index 7bea7c3b34180..55cd1eafb29c9 100644 --- a/tests/phpunit/tests/html-api/wpCssSelectors.php +++ b/tests/phpunit/tests/html-api/wpCssSelectors.php @@ -48,6 +48,7 @@ public static function data_idents(): array { 'can start with --1anything' => array( '--1anything', '--1anything', '' ), 'can start with -\31 23' => array( '-\31 23', '-123', '' ), 'can start with --\31 23' => array( '--\31 23', '--123', '' ), + 'ident ends before ]' => array( 'ident]', 'ident', ']' ), // Invalid 'bad start >' => array( '>ident' ), @@ -59,6 +60,28 @@ public static function data_idents(): array { ); } + /** + * @ticket TBD + */ + public function test_is_ident_and_is_ident_start() { + $c = new class() extends WP_CSS_Selector_Parser { + public static function parse( string $input, int &$offset ) {} + + public static function test_is_ident( string $input, int $offset ) { + return self::is_ident_codepoint( $input, $offset ); + } + + public static function test_is_ident_start( string $input, int $offset ) { + return self::is_ident_start_codepoint( $input, $offset ); + } + }; + + $this->assertFalse( $c::test_is_ident( '[', 0 ) ); + $this->assertFalse( $c::test_is_ident( ']', 0 ) ); + $this->assertFalse( $c::test_is_ident_start( '[', 0 ) ); + $this->assertFalse( $c::test_is_ident_start( ']', 0 ) ); + } + /** * @ticket TBD * From ef0085631424083dfc217308684c1baac3eea7f8 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 26 Nov 2024 12:36:40 +0100 Subject: [PATCH 032/129] Add parse_string stub --- src/wp-includes/html-api/class-wp-css-selectors.php | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/wp-includes/html-api/class-wp-css-selectors.php b/src/wp-includes/html-api/class-wp-css-selectors.php index 49b51e51fe81e..96c4465c2dbd6 100644 --- a/src/wp-includes/html-api/class-wp-css-selectors.php +++ b/src/wp-includes/html-api/class-wp-css-selectors.php @@ -212,6 +212,11 @@ protected static function parse_ident( string $input, int &$offset ): ?string { return $ident; } + // @todo stub + protected static function parse_string( string $input, int &$offset ): ?string { + return null; + } + /** * Consume an escaped code point. * From 463e799a75d713829f84a988d58595d2ba0923f0 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 26 Nov 2024 12:37:31 +0100 Subject: [PATCH 033/129] Add attribute selector parsing --- .../html-api/class-wp-css-selectors.php | 213 ++++++++++++++++++ 1 file changed, 213 insertions(+) diff --git a/src/wp-includes/html-api/class-wp-css-selectors.php b/src/wp-includes/html-api/class-wp-css-selectors.php index 96c4465c2dbd6..5067d1c2b87e6 100644 --- a/src/wp-includes/html-api/class-wp-css-selectors.php +++ b/src/wp-includes/html-api/class-wp-css-selectors.php @@ -505,3 +505,216 @@ public static function parse( string $input, int &$offset ): ?self { return new self( $result ); } } + +final class WP_CSS_Attribute_Selector extends WP_CSS_Selector_Parser { + /** + * [attr=value] + * Represents elements with an attribute name of attr whose value is exactly value. + */ + const MATCH_EXACT = 'MATCH_EXACT'; + + /** + * [attr~=value] + * Represents elements with an attribute name of attr whose value is a + * whitespace-separated list of words, one of which is exactly value. + */ + const MATCH_ONE_OF_EXACT = 'MATCH_ONE_OF_EXACT'; + + /** + * [attr|=value] + * Represents elements with an attribute name of attr whose value can be exactly value or + * can begin with value immediately followed by a hyphen, - (U+002D). It is often used for + * language subcode matches. + */ + const MATCH_EXACT_OR_EXACT_WITH_HYPHEN = 'MATCH_EXACT_OR_EXACT_WITH_HYPHEN'; + + /** + * [attr^=value] + * Represents elements with an attribute name of attr whose value is prefixed (preceded) + * by value. + */ + const MATCH_PREFIXED_BY = 'MATCH_PREFIXED_BY'; + + /** + * [attr$=value] + * Represents elements with an attribute name of attr whose value is suffixed (followed) + * by value. + */ + const MATCH_SUFFIXED_BY = 'MATCH_SUFFIXED_BY'; + + /** + * [attr*=value] + * Represents elements with an attribute name of attr whose value contains at least one + * occurrence of value within the string. + */ + const MATCH_CONTAINS = 'MATCH_CONTAINS'; + + /** + * Modifier for case sensitive matching + * [attr=value s] + */ + const MODIFIER_CASE_SENSITIVE = 'case-sensitive'; + + /** + * Modifier for case insensitive matching + * [attr=value i] + */ + const MODIFIER_CASE_INSENSITIVE = 'case-insensitive'; + + + /** + * The attribute name. + * + * @var string + */ + public $name; + + /** + * The attribute matcher. + * + * @var string|null + */ + public $matcher; + + /** + * The attribute value. + * + * @var string|null + */ + public $value; + + /** + * The attribute modifier. + * + * @var string|null + */ + public $modifier; + + private function __construct( string $name, ?string $matcher = null, ?string $value = null, ?string $modifier = null ) { + $this->name = $name; + $this->matcher = $matcher; + $this->value = $value; + $this->modifier = $modifier; + } + + /** + * Parse a attribute selector + * + * > = '[' ']' | + * > '[' [ | ] ? ']' + * > = [ '~' | '|' | '^' | '$' | '*' ]? '=' + * > = i | s + * > = ? + * + * Namespaces are not supported, so attribute names are effectively identifiers. + * + * https://www.w3.org/TR/selectors/#grammar + */ + public static function parse( string $input, int &$offset ): ?self { + // Need at least 3 bytes [x] + if ( $offset + 2 >= strlen( $input ) ) { + return false; + } + + $updated_offset = $offset; + + if ( '[' !== $input[ $updated_offset ] ) { + return null; + } + ++$updated_offset; + + self::parse_whitespace( $input, $updated_offset ); + $attr_name = self::parse_ident( $input, $updated_offset ); + if ( null === $attr_name ) { + return null; + } + self::parse_whitespace( $input, $updated_offset ); + + if ( $updated_offset >= strlen( $input ) ) { + return null; + } + + if ( ']' === $input[ $updated_offset ] ) { + $offset = $updated_offset + 1; + return new self( $attr_name ); + } + + // need to match at least `=x]` at this point + if ( $updated_offset + 3 >= strlen( $input ) ) { + return null; + } + + if ( '=' === $input[ $updated_offset ] ) { + ++$updated_offset; + $attr_matcher = WP_CSS_Attribute_Selector::MATCH_EXACT; + } elseif ( '=' === $input[ $updated_offset + 1 ] ) { + switch ( $input[ $updated_offset ] ) { + case '~': + $attr_matcher = WP_CSS_Attribute_Selector::MATCH_ONE_OF_EXACT; + $updated_offset += 2; + break; + case '|': + $attr_matcher = WP_CSS_Attribute_Selector::MATCH_EXACT_OR_EXACT_WITH_HYPHEN; + $updated_offset += 2; + break; + case '^': + $attr_matcher = WP_CSS_Attribute_Selector::MATCH_PREFIXED_BY; + $updated_offset += 2; + break; + case '$': + $attr_matcher = WP_CSS_Attribute_Selector::MATCH_SUFFIXED_BY; + $updated_offset += 2; + break; + case '*': + $attr_matcher = WP_CSS_Attribute_Selector::MATCH_CONTAINS; + $updated_offset += 2; + break; + default: + return null; + } + } else { + return null; + } + + self::parse_whitespace( $input, $updated_offset ); + $attr_val = + self::parse_string( $input, $updated_offset ) ?? + self::parse_ident( $input, $updated_offset ); + + if ( null === $attr_val ) { + return null; + } + + self::parse_whitespace( $input, $updated_offset ); + if ( $updated_offset >= strlen( $input ) ) { + return null; + } + + $attr_modifier = null; + switch ( $input[ $updated_offset ] ) { + case 'i': + $attr_modifier = WP_CSS_Attribute_Selector::MODIFIER_CASE_INSENSITIVE; + ++$updated_offset; + break; + + case 's': + $attr_modifier = WP_CSS_Attribute_Selector::MODIFIER_CASE_SENSITIVE; + ++$updated_offset; + break; + } + + if ( null !== $attr_modifier ) { + self::parse_whitespace( $input, $updated_offset ); + if ( $updated_offset >= strlen( $input ) ) { + return null; + } + } + + if ( ']' === $input[ $updated_offset ] ) { + $offset = $updated_offset + 1; + return new self( $attr_name, $attr_matcher, $attr_val, $attr_modifier ); + } + + return null; + } +} From 0f5b28cc5ed226f23ea38a3025ae5403b9b24bff Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 26 Nov 2024 12:45:17 +0100 Subject: [PATCH 034/129] Fix test expectations --- tests/phpunit/tests/html-api/wpCssSelectors.php | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/phpunit/tests/html-api/wpCssSelectors.php b/tests/phpunit/tests/html-api/wpCssSelectors.php index 55cd1eafb29c9..ae3c3e80c4f90 100644 --- a/tests/phpunit/tests/html-api/wpCssSelectors.php +++ b/tests/phpunit/tests/html-api/wpCssSelectors.php @@ -248,13 +248,13 @@ public static function data_attribute_selectors(): array { '[href][href2]' => array( '[href][href2]', 'href', null, null, null, '[href2]' ), '[\n href\t\r]' => array( "[\n href\t\r]", 'href', null, null, null, '' ), '[href=foo]' => array( '[href=foo]', 'href', WP_CSS_Attribute_Selector::MATCH_EXACT, 'foo', null, '' ), - '[href \n = bar ]' => array( "[href \n = bar ]", WP_CSS_Attribute_Selector::MATCH_EXACT, 'bar', null, '' ), - '[href \n ^= baz ]' => array( "[href \n ^= baz ]", WP_CSS_Attribute_Selector::MATCH_PREFIXED_BY, 'bar', null, '' ), - '[match $= insensitive i]' => array( '[match $= insensitive i]', WP_CSS_Attribute_Selector::MATCH_SUFFIXED_BY, 'insensitive', WP_CSS_Attribute_Selector::MODIFIER_CASE_INSENSITIVE, '' ), - '[match|=sensitive s]' => array( '[match|=sensitive s]', WP_CSS_Attribute_Selector::MATCH_EXACT_OR_EXACT_WITH_HYPHEN, 'sensitive', WP_CSS_Attribute_Selector::MODIFIER_CASE_SENSITIVE, '' ), - '[match="quoted[][]"]' => array( '[match="quoted[][]"]', WP_CSS_Attribute_Selector::MATCH_EXACT_OR_EXACT_WITH_HYPHEN, 'quoted[][]', null, '' ), - "[match='quoted!{}']" => array( "[match='quoted!{}']", WP_CSS_Attribute_Selector::MATCH_EXACT_OR_EXACT_WITH_HYPHEN, 'quoted!{}', null, '' ), - "[match*='quoted's]" => array( "[match*='quoted's]", WP_CSS_Attribute_Selector::MATCH_EXACT_OR_EXACT_WITH_HYPHEN, 'quoted', WP_CSS_Attribute_Selector::MODIFIER_CASE_SENSITIVE, '' ), + '[href \n = bar ]' => array( "[href \n = bar ]", 'href', WP_CSS_Attribute_Selector::MATCH_EXACT, 'bar', null, '' ), + '[href \n ^= baz ]' => array( "[href \n ^= baz ]", 'href', WP_CSS_Attribute_Selector::MATCH_PREFIXED_BY, 'baz', null, '' ), + '[match $= insensitive i]' => array( '[match $= insensitive i]', 'match', WP_CSS_Attribute_Selector::MATCH_SUFFIXED_BY, 'insensitive', WP_CSS_Attribute_Selector::MODIFIER_CASE_INSENSITIVE, '' ), + '[match|=sensitive s]' => array( '[match|=sensitive s]', 'match', WP_CSS_Attribute_Selector::MATCH_EXACT_OR_EXACT_WITH_HYPHEN, 'sensitive', WP_CSS_Attribute_Selector::MODIFIER_CASE_SENSITIVE, '' ), + '[match="quoted[][]"]' => array( '[match="quoted[][]"]', 'match', WP_CSS_Attribute_Selector::MATCH_EXACT_OR_EXACT_WITH_HYPHEN, 'quoted[][]', null, '' ), + "[match='quoted!{}']" => array( "[match='quoted!{}']", 'match', WP_CSS_Attribute_Selector::MATCH_EXACT_OR_EXACT_WITH_HYPHEN, 'quoted!{}', null, '' ), + "[match*='quoted's]" => array( "[match*='quoted's]", 'match', WP_CSS_Attribute_Selector::MATCH_EXACT_OR_EXACT_WITH_HYPHEN, 'quoted', WP_CSS_Attribute_Selector::MODIFIER_CASE_SENSITIVE, '' ), // Invalid 'foo' => array( 'foo' ), From f4a491ae52aaaf4807e9eb9c9b6c671bae105abf Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 26 Nov 2024 18:11:25 +0100 Subject: [PATCH 035/129] More and improved attribute tests --- .../phpunit/tests/html-api/wpCssSelectors.php | 36 ++++++++++--------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/tests/phpunit/tests/html-api/wpCssSelectors.php b/tests/phpunit/tests/html-api/wpCssSelectors.php index ae3c3e80c4f90..4557ee1a5b3c4 100644 --- a/tests/phpunit/tests/html-api/wpCssSelectors.php +++ b/tests/phpunit/tests/html-api/wpCssSelectors.php @@ -252,24 +252,28 @@ public static function data_attribute_selectors(): array { '[href \n ^= baz ]' => array( "[href \n ^= baz ]", 'href', WP_CSS_Attribute_Selector::MATCH_PREFIXED_BY, 'baz', null, '' ), '[match $= insensitive i]' => array( '[match $= insensitive i]', 'match', WP_CSS_Attribute_Selector::MATCH_SUFFIXED_BY, 'insensitive', WP_CSS_Attribute_Selector::MODIFIER_CASE_INSENSITIVE, '' ), '[match|=sensitive s]' => array( '[match|=sensitive s]', 'match', WP_CSS_Attribute_Selector::MATCH_EXACT_OR_EXACT_WITH_HYPHEN, 'sensitive', WP_CSS_Attribute_Selector::MODIFIER_CASE_SENSITIVE, '' ), - '[match="quoted[][]"]' => array( '[match="quoted[][]"]', 'match', WP_CSS_Attribute_Selector::MATCH_EXACT_OR_EXACT_WITH_HYPHEN, 'quoted[][]', null, '' ), - "[match='quoted!{}']" => array( "[match='quoted!{}']", 'match', WP_CSS_Attribute_Selector::MATCH_EXACT_OR_EXACT_WITH_HYPHEN, 'quoted!{}', null, '' ), - "[match*='quoted's]" => array( "[match*='quoted's]", 'match', WP_CSS_Attribute_Selector::MATCH_EXACT_OR_EXACT_WITH_HYPHEN, 'quoted', WP_CSS_Attribute_Selector::MODIFIER_CASE_SENSITIVE, '' ), + '[match~="quoted[][]"]' => array( '[match~="quoted[][]"]', 'match', WP_CSS_Attribute_Selector::MATCH_ONE_OF_EXACT, 'quoted[][]', null, '' ), + "[match$='quoted!{}']" => array( "[match$='quoted!{}']", 'match', WP_CSS_Attribute_Selector::MATCH_SUFFIXED_BY, 'quoted!{}', null, '' ), + "[match*='quoted's]" => array( "[match*='quoted's]", 'match', WP_CSS_Attribute_Selector::MATCH_CONTAINS, 'quoted', WP_CSS_Attribute_Selector::MODIFIER_CASE_SENSITIVE, '' ), + + '[escape-nl="foo\\nbar"]' => array( "[escape-nl='foo\\\nbar']", 'escape-nl', WP_CSS_Attribute_Selector::MATCH_EXACT, 'foobar', null, '' ), + '[escape-seq="\\31 23"]' => array( "[escape-seq='\\31 23']", 'escape-seq', WP_CSS_Attribute_Selector::MATCH_EXACT, '123', null, '' ), // Invalid - 'foo' => array( 'foo' ), - '[foo' => array( '[foo' ), - '[#foo]' => array( '[#foo]' ), - '[*|*]' => array( '[*|*]' ), - '[ns|*]' => array( '[ns|*]' ), - '[* |att]' => array( '[* |att]' ), - '[*| att]' => array( '[*| att]' ), - '[att * =]' => array( '[att * =]' ), - '[att * =]' => array( '[att * =]' ), - '[att i]' => array( '[att i]' ), - '[att s]' => array( '[att s]' ), - '[att="val" I]' => array( '[att="val" I]' ), - '[att="val" S]' => array( '[att="val" S]' ), + 'Invalid: foo' => array( 'foo' ), + 'Invalid: [foo' => array( '[foo' ), + 'Invalid: [#foo]' => array( '[#foo]' ), + 'Invalid: [*|*]' => array( '[*|*]' ), + 'Invalid: [ns|*]' => array( '[ns|*]' ), + 'Invalid: [* |att]' => array( '[* |att]' ), + 'Invalid: [*| att]' => array( '[*| att]' ), + 'Invalid: [att * =]' => array( '[att * =]' ), + 'Invalid: [att * =]' => array( '[att * =]' ), + 'Invalid: [att i]' => array( '[att i]' ), + 'Invalid: [att s]' => array( '[att s]' ), + 'Invalid: [att="val" I]' => array( '[att="val" I]' ), + 'Invalid: [att="val" S]' => array( '[att="val" S]' ), + "Invalid: [att='val\\n']" => array( "[att='val\n']" ), ); } } From b680b1b8e5f69bf17490934761899452fc935826 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 26 Nov 2024 18:11:52 +0100 Subject: [PATCH 036/129] Implement parse_string --- .../html-api/class-wp-css-selectors.php | 84 ++++++++++++++++++- 1 file changed, 82 insertions(+), 2 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-css-selectors.php b/src/wp-includes/html-api/class-wp-css-selectors.php index 5067d1c2b87e6..c1c3e35fc9ae1 100644 --- a/src/wp-includes/html-api/class-wp-css-selectors.php +++ b/src/wp-includes/html-api/class-wp-css-selectors.php @@ -212,9 +212,89 @@ protected static function parse_ident( string $input, int &$offset ): ?string { return $ident; } - // @todo stub + /** + * Parse a string token + * + * > 4.3.5. Consume a string token + * > This section describes how to consume a string token from a stream of code points. It returns either a or . + * > + * > This algorithm may be called with an ending code point, which denotes the code point that ends the string. If an ending code point is not specified, the current input code point is used. + * > + * > Initially create a with its value set to the empty string. + * > + * > Repeatedly consume the next input code point from the stream: + * > + * > ending code point + * > Return the . + * > EOF + * > This is a parse error. Return the . + * > newline + * > This is a parse error. Reconsume the current input code point, create a , and return it. + * > U+005C REVERSE SOLIDUS (\) + * > If the next input code point is EOF, do nothing. + * > Otherwise, if the next input code point is a newline, consume it. + * > Otherwise, (the stream starts with a valid escape) consume an escaped code point and append the returned code point to the ’s value. + * > + * > anything else + * > Append the current input code point to the ’s value. + * + * https://www.w3.org/TR/css-syntax-3/#consume-string-token + * + * This implementation will never return a because + * the is not a part of the selector grammar. That + * case is treated as failure to parse and null is returned. + */ protected static function parse_string( string $input, int &$offset ): ?string { - return null; + if ( $offset + 1 >= strlen( $input ) ) { + return null; + } + + $ending_code_point = $input[ $offset ]; + if ( '"' !== $ending_code_point && "'" !== $ending_code_point ) { + return null; + } + + $string_token = ''; + + $stop_characters = "\\\n{$ending_code_point}"; + + $updated_offset = $offset + 1; + while ( $updated_offset < strlen( $input ) ) { + switch ( $input[ $updated_offset ] ) { + case '\\': + if ( $updated_offset + 1 >= strlen( $input ) ) { + break; + } + ++$updated_offset; + if ( "\n" === $input[ $updated_offset ] ) { + ++$updated_offset; + break; + } else { + $string_token .= self::consume_escaped_codepoint( $input, $updated_offset ); + } + break; + + /* + * This case would return a . + * The is not a part of the selector grammar + * so we do not return it and instead treat this as a + * failure to parse a string token. + */ + case "\n": + return null; + + case $ending_code_point: + ++$updated_offset; + break 2; + + default: + $string_token .= $input[ $updated_offset ]; + ++$updated_offset; + } + } + + $offset = $updated_offset; + return $string_token; } /** From e7da05f238008dd987f176672565acfeacbd86b4 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 26 Nov 2024 18:25:20 +0100 Subject: [PATCH 037/129] Add string parse tests --- .../phpunit/tests/html-api/wpCssSelectors.php | 72 ++++++++++++++++++- 1 file changed, 70 insertions(+), 2 deletions(-) diff --git a/tests/phpunit/tests/html-api/wpCssSelectors.php b/tests/phpunit/tests/html-api/wpCssSelectors.php index 4557ee1a5b3c4..96f2fa96dcb7f 100644 --- a/tests/phpunit/tests/html-api/wpCssSelectors.php +++ b/tests/phpunit/tests/html-api/wpCssSelectors.php @@ -9,8 +9,6 @@ * @since TBD * * @group html-api - * - * @coversDefaultClass WP_CSS_Selectors */ class Tests_HtmlApi_WpCssSelectors extends WP_UnitTestCase { /** @@ -62,6 +60,9 @@ public static function data_idents(): array { /** * @ticket TBD + * + * @covers WP_CSS_Selector_Parser::is_ident_codepoint + * @covers WP_CSS_Selector_Parser::is_ident_start_codepoint */ public function test_is_ident_and_is_ident_start() { $c = new class() extends WP_CSS_Selector_Parser { @@ -86,6 +87,8 @@ public static function test_is_ident_start( string $input, int $offset ) { * @ticket TBD * * @dataProvider data_idents + * + * @covers WP_CSS_Selector_Parser::parse_ident */ public function test_parse_ident( string $input, ?string $expected = null, ?string $rest = null ) { $c = new class() extends WP_CSS_Selector_Parser { @@ -105,10 +108,69 @@ public static function test( string $input, &$offset ) { } } + /** + * @ticket TBD + * + * @dataProvider data_strings + * + * @covers WP_CSS_Selector_Parser::parse_string + */ + public function test_parse_string( string $input, ?string $expected = null, ?string $rest = null ) { + $c = new class() extends WP_CSS_Selector_Parser { + public static function parse( string $input, int &$offset ) {} + public static function test( string $input, &$offset ) { + return self::parse_string( $input, $offset ); + } + }; + + $offset = 0; + $result = $c::test( $input, $offset ); + if ( null === $expected ) { + $this->assertNull( $result ); + } else { + $this->assertSame( $expected, $result, 'String did not match.' ); + $this->assertSame( $rest, substr( $input, $offset ), 'Offset was not updated correctly.' ); + } + } + + /** + * Data provider. + * + * @return array + */ + public static function data_strings(): array { + return array( + '"foo"' => array( '"foo"', 'foo', '' ), + '"foo"after' => array( '"foo"after', 'foo', 'after' ), + '"foo""two"' => array( '"foo""two"', 'foo', '"two"' ), + '"foo"\'two\'' => array( '"foo"\'two\'', 'foo', "'two'" ), + + "'foo'" => array( "'foo'", 'foo', '' ), + "'foo'after" => array( "'foo'after", 'foo', 'after' ), + "'foo'\"two\"" => array( "'foo'\"two\"", 'foo', '"two"' ), + "'foo''two'" => array( "'foo''two'", 'foo', "'two'" ), + + "'foo\\nbar'" => array( "'foo\\\nbar'", 'foobar', '' ), + "'foo\\31 23'" => array( "'foo\\31 23'", 'foo123', '' ), + "'foo\\31\\n23'" => array( "'foo\\31\n23'", 'foo123', '' ), + "'foo\\31\\t23'" => array( "'foo\\31\t23'", 'foo123', '' ), + "'foo\\00003123'" => array( "'foo\\00003123'", 'foo123', '' ), + + // Invalid + "Invalid: 'newline\\n'" => array( "'newline\n'" ), + 'Invalid: foo' => array( 'foo' ), + 'Invalid: \\"' => array( '\\"' ), + 'Invalid: .foo' => array( '.foo' ), + 'Invalid: #foo' => array( '#foo' ), + ); + } + /** * @ticket TBD * * @dataProvider data_id_selectors + * + * @covers WP_CSS_ID_Selector::parse */ public function test_parse_id( string $input, ?string $expected = null, ?string $rest = null ) { $offset = 0; @@ -143,6 +205,8 @@ public static function data_id_selectors(): array { * @ticket TBD * * @dataProvider data_class_selectors + * + * @covers WP_CSS_Class_Selector::parse */ public function test_parse_class( string $input, ?string $expected = null, ?string $rest = null ) { $offset = 0; @@ -177,6 +241,8 @@ public static function data_class_selectors(): array { * @ticket TBD * * @dataProvider data_type_selectors + * + * @covers WP_CSS_Type_Selector::parse */ public function test_parse_type( string $input, ?string $expected = null, ?string $rest = null ) { $offset = 0; @@ -212,6 +278,8 @@ public static function data_type_selectors(): array { * @ticket TBD * * @dataProvider data_attribute_selectors + * + * @covers WP_CSS_Attribute_Selector::parse */ public function test_parse_attribute( string $input, From d5e7e6087aab9f58905aa3c5993a5357efe812e1 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 26 Nov 2024 18:26:01 +0100 Subject: [PATCH 038/129] Remove covers annotations --- tests/phpunit/tests/html-api/wpCssSelectors.php | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/tests/phpunit/tests/html-api/wpCssSelectors.php b/tests/phpunit/tests/html-api/wpCssSelectors.php index 96f2fa96dcb7f..7c5cdca447bbe 100644 --- a/tests/phpunit/tests/html-api/wpCssSelectors.php +++ b/tests/phpunit/tests/html-api/wpCssSelectors.php @@ -60,9 +60,6 @@ public static function data_idents(): array { /** * @ticket TBD - * - * @covers WP_CSS_Selector_Parser::is_ident_codepoint - * @covers WP_CSS_Selector_Parser::is_ident_start_codepoint */ public function test_is_ident_and_is_ident_start() { $c = new class() extends WP_CSS_Selector_Parser { @@ -87,8 +84,6 @@ public static function test_is_ident_start( string $input, int $offset ) { * @ticket TBD * * @dataProvider data_idents - * - * @covers WP_CSS_Selector_Parser::parse_ident */ public function test_parse_ident( string $input, ?string $expected = null, ?string $rest = null ) { $c = new class() extends WP_CSS_Selector_Parser { @@ -112,8 +107,6 @@ public static function test( string $input, &$offset ) { * @ticket TBD * * @dataProvider data_strings - * - * @covers WP_CSS_Selector_Parser::parse_string */ public function test_parse_string( string $input, ?string $expected = null, ?string $rest = null ) { $c = new class() extends WP_CSS_Selector_Parser { @@ -169,8 +162,6 @@ public static function data_strings(): array { * @ticket TBD * * @dataProvider data_id_selectors - * - * @covers WP_CSS_ID_Selector::parse */ public function test_parse_id( string $input, ?string $expected = null, ?string $rest = null ) { $offset = 0; @@ -205,8 +196,6 @@ public static function data_id_selectors(): array { * @ticket TBD * * @dataProvider data_class_selectors - * - * @covers WP_CSS_Class_Selector::parse */ public function test_parse_class( string $input, ?string $expected = null, ?string $rest = null ) { $offset = 0; @@ -241,8 +230,6 @@ public static function data_class_selectors(): array { * @ticket TBD * * @dataProvider data_type_selectors - * - * @covers WP_CSS_Type_Selector::parse */ public function test_parse_type( string $input, ?string $expected = null, ?string $rest = null ) { $offset = 0; @@ -278,8 +265,6 @@ public static function data_type_selectors(): array { * @ticket TBD * * @dataProvider data_attribute_selectors - * - * @covers WP_CSS_Attribute_Selector::parse */ public function test_parse_attribute( string $input, From 08187c6858d95503d0e11eed6832045a68579f8a Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 26 Nov 2024 18:32:55 +0100 Subject: [PATCH 039/129] Remove unused line --- src/wp-includes/html-api/class-wp-css-selectors.php | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-css-selectors.php b/src/wp-includes/html-api/class-wp-css-selectors.php index c1c3e35fc9ae1..3a4c0a7577679 100644 --- a/src/wp-includes/html-api/class-wp-css-selectors.php +++ b/src/wp-includes/html-api/class-wp-css-selectors.php @@ -256,8 +256,6 @@ protected static function parse_string( string $input, int &$offset ): ?string { $string_token = ''; - $stop_characters = "\\\n{$ending_code_point}"; - $updated_offset = $offset + 1; while ( $updated_offset < strlen( $input ) ) { switch ( $input[ $updated_offset ] ) { From 5a5066ce52335b330a57441b765ed9cc33184467 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 26 Nov 2024 19:32:21 +0100 Subject: [PATCH 040/129] Improve tests for 100% coverage on parse methods --- .../phpunit/tests/html-api/wpCssSelectors.php | 75 +++++++++++-------- 1 file changed, 42 insertions(+), 33 deletions(-) diff --git a/tests/phpunit/tests/html-api/wpCssSelectors.php b/tests/phpunit/tests/html-api/wpCssSelectors.php index 7c5cdca447bbe..7b6e5ce79a365 100644 --- a/tests/phpunit/tests/html-api/wpCssSelectors.php +++ b/tests/phpunit/tests/html-api/wpCssSelectors.php @@ -49,12 +49,14 @@ public static function data_idents(): array { 'ident ends before ]' => array( 'ident]', 'ident', ']' ), // Invalid - 'bad start >' => array( '>ident' ), - 'bad start [' => array( '[ident' ), - 'bad start #' => array( '#ident' ), - 'bad start " "' => array( ' ident' ), - 'bad start 1' => array( '1ident' ), - 'bad start -1' => array( '-1ident' ), + 'Invalid: (empty string)' => array( '' ), + 'Invalid: bad start >' => array( '>ident' ), + 'Invalid: bad start [' => array( '[ident' ), + 'Invalid: bad start #' => array( '#ident' ), + 'Invalid: bad start " "' => array( ' ident' ), + 'Invalid: bad start 1' => array( '1ident' ), + 'Invalid: bad start -1' => array( '-1ident' ), + 'Invalid: bad start -' => array( '-' ), ); } @@ -133,28 +135,31 @@ public static function test( string $input, &$offset ) { */ public static function data_strings(): array { return array( - '"foo"' => array( '"foo"', 'foo', '' ), - '"foo"after' => array( '"foo"after', 'foo', 'after' ), - '"foo""two"' => array( '"foo""two"', 'foo', '"two"' ), - '"foo"\'two\'' => array( '"foo"\'two\'', 'foo', "'two'" ), + '"foo"' => array( '"foo"', 'foo', '' ), + '"foo"after' => array( '"foo"after', 'foo', 'after' ), + '"foo""two"' => array( '"foo""two"', 'foo', '"two"' ), + '"foo"\'two\'' => array( '"foo"\'two\'', 'foo', "'two'" ), - "'foo'" => array( "'foo'", 'foo', '' ), - "'foo'after" => array( "'foo'after", 'foo', 'after' ), - "'foo'\"two\"" => array( "'foo'\"two\"", 'foo', '"two"' ), - "'foo''two'" => array( "'foo''two'", 'foo', "'two'" ), + "'foo'" => array( "'foo'", 'foo', '' ), + "'foo'after" => array( "'foo'after", 'foo', 'after' ), + "'foo'\"two\"" => array( "'foo'\"two\"", 'foo', '"two"' ), + "'foo''two'" => array( "'foo''two'", 'foo', "'two'" ), - "'foo\\nbar'" => array( "'foo\\\nbar'", 'foobar', '' ), - "'foo\\31 23'" => array( "'foo\\31 23'", 'foo123', '' ), - "'foo\\31\\n23'" => array( "'foo\\31\n23'", 'foo123', '' ), - "'foo\\31\\t23'" => array( "'foo\\31\t23'", 'foo123', '' ), - "'foo\\00003123'" => array( "'foo\\00003123'", 'foo123', '' ), + "'foo\\nbar'" => array( "'foo\\\nbar'", 'foobar', '' ), + "'foo\\31 23'" => array( "'foo\\31 23'", 'foo123', '' ), + "'foo\\31\\n23'" => array( "'foo\\31\n23'", 'foo123', '' ), + "'foo\\31\\t23'" => array( "'foo\\31\t23'", 'foo123', '' ), + "'foo\\00003123'" => array( "'foo\\00003123'", 'foo123', '' ), + + "'foo\\" => array( "'foo\\", 'foo', '' ), // Invalid - "Invalid: 'newline\\n'" => array( "'newline\n'" ), - 'Invalid: foo' => array( 'foo' ), - 'Invalid: \\"' => array( '\\"' ), - 'Invalid: .foo' => array( '.foo' ), - 'Invalid: #foo' => array( '#foo' ), + 'Invalid: (empty string)' => array( '' ), + "Invalid: 'newline\\n'" => array( "'newline\n'" ), + 'Invalid: foo' => array( 'foo' ), + 'Invalid: \\"' => array( '\\"' ), + 'Invalid: .foo' => array( '.foo' ), + 'Invalid: #foo' => array( '#foo' ), ); } @@ -249,15 +254,16 @@ public function test_parse_type( string $input, ?string $expected = null, ?strin */ public static function data_type_selectors(): array { return array( - 'any *' => array( '* .class', '*', ' .class' ), - 'a' => array( 'a', 'a', '' ), - 'div.class' => array( 'div.class', 'div', '.class' ), - 'custom-type#id' => array( 'custom-type#id', 'custom-type', '#id' ), + 'any *' => array( '* .class', '*', ' .class' ), + 'a' => array( 'a', 'a', '' ), + 'div.class' => array( 'div.class', 'div', '.class' ), + 'custom-type#id' => array( 'custom-type#id', 'custom-type', '#id' ), - // invalid - '#id' => array( '#id' ), - '.class' => array( '.class' ), - '[attr]' => array( '[attr]' ), + // Invalid + 'Invalid: (empty string)' => array( '' ), + 'Invalid: #id' => array( '#id' ), + 'Invalid: .class' => array( '.class' ), + 'Invalid: [attr]' => array( '[attr]' ), ); } @@ -313,6 +319,7 @@ public static function data_attribute_selectors(): array { '[escape-seq="\\31 23"]' => array( "[escape-seq='\\31 23']", 'escape-seq', WP_CSS_Attribute_Selector::MATCH_EXACT, '123', null, '' ), // Invalid + 'Invalid: (empty string)' => array( '' ), 'Invalid: foo' => array( 'foo' ), 'Invalid: [foo' => array( '[foo' ), 'Invalid: [#foo]' => array( '[#foo]' ), @@ -321,12 +328,14 @@ public static function data_attribute_selectors(): array { 'Invalid: [* |att]' => array( '[* |att]' ), 'Invalid: [*| att]' => array( '[*| att]' ), 'Invalid: [att * =]' => array( '[att * =]' ), - 'Invalid: [att * =]' => array( '[att * =]' ), + 'Invalid: [att+=val]' => array( '[att+=val]' ), + 'Invalid: [att=val ' => array( '[att=val ' ), 'Invalid: [att i]' => array( '[att i]' ), 'Invalid: [att s]' => array( '[att s]' ), 'Invalid: [att="val" I]' => array( '[att="val" I]' ), 'Invalid: [att="val" S]' => array( '[att="val" S]' ), "Invalid: [att='val\\n']" => array( "[att='val\n']" ), + 'Invalid: [att=val i ' => array( '[att=val i ' ), ); } } From 2f8bd19efec5fb4f5f6cabd51d7173642d79af34 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 26 Nov 2024 19:33:01 +0100 Subject: [PATCH 041/129] Improve documentation --- .../html-api/class-wp-css-selectors.php | 22 ++++++++++++++----- 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-css-selectors.php b/src/wp-includes/html-api/class-wp-css-selectors.php index 3a4c0a7577679..669c74c1b676d 100644 --- a/src/wp-includes/html-api/class-wp-css-selectors.php +++ b/src/wp-includes/html-api/class-wp-css-selectors.php @@ -260,10 +260,10 @@ protected static function parse_string( string $input, int &$offset ): ?string { while ( $updated_offset < strlen( $input ) ) { switch ( $input[ $updated_offset ] ) { case '\\': - if ( $updated_offset + 1 >= strlen( $input ) ) { + ++$updated_offset; + if ( $updated_offset >= strlen( $input ) ) { break; } - ++$updated_offset; if ( "\n" === $input[ $updated_offset ] ) { ++$updated_offset; break; @@ -386,6 +386,11 @@ protected static function next_two_are_valid_escape( string $input, int $offset } /** + * Check if the next code point is an "ident start code point". + * + * Caution! This method does not do any bounds checking, it should not be passed + * a string with an offset that is out of bounds. + * * > ident-start code point * > A letter, a non-ASCII code point, or U+005F LOW LINE (_). * > uppercase letter @@ -396,12 +401,10 @@ protected static function next_two_are_valid_escape( string $input, int $offset * > An uppercase letter or a lowercase letter. * > non-ASCII code point * > A code point with a value equal to or greater than U+0080 . + * + * https://www.w3.org/TR/css-syntax-3/#ident-start-code-point */ protected static function is_ident_start_codepoint( string $input, int $offset ): bool { - if ( $offset >= strlen( $input ) ) { - return false; - } - return ( '_' === $input[ $offset ] || ( 'a' <= $input[ $offset ] && $input[ $offset ] <= 'z' ) || @@ -411,10 +414,17 @@ protected static function is_ident_start_codepoint( string $input, int $offset ) } /** + * Check if the next code point is an "ident code point". + * + * Caution! This method does not do any bounds checking, it should not be passed + * a string with an offset that is out of bounds. + * * > ident code point * > An ident-start code point, a digit, or U+002D HYPHEN-MINUS (-). * > digit * > A code point between U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9) inclusive. + * + * https://www.w3.org/TR/css-syntax-3/#ident-code-point */ protected static function is_ident_codepoint( string $input, int $offset ): bool { return '-' === $input[ $offset ] || From 8b0ac551e7694d3de921d84e60afe372583558b8 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 26 Nov 2024 19:37:26 +0100 Subject: [PATCH 042/129] Fix parse return type and return annotations --- .../html-api/class-wp-css-selectors.php | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-css-selectors.php b/src/wp-includes/html-api/class-wp-css-selectors.php index 669c74c1b676d..6a80ca2e42b7c 100644 --- a/src/wp-includes/html-api/class-wp-css-selectors.php +++ b/src/wp-includes/html-api/class-wp-css-selectors.php @@ -186,6 +186,8 @@ protected static function parse_hash_token( string $input, int &$offset ): ?stri * > Reconsume the current input code point. Return result. * * https://www.w3.org/TR/css-syntax-3/#consume-name + * + * @return string|null */ protected static function parse_ident( string $input, int &$offset ): ?string { if ( ! self::check_if_three_code_points_would_start_an_ident_sequence( $input, $offset ) ) { @@ -243,6 +245,8 @@ protected static function parse_ident( string $input, int &$offset ): ?string { * This implementation will never return a because * the is not a part of the selector grammar. That * case is treated as failure to parse and null is returned. + * + * @return string|null */ protected static function parse_string( string $input, int &$offset ): ?string { if ( $offset + 1 >= strlen( $input ) ) { @@ -509,6 +513,8 @@ private function __construct( string $ident ) { * > = * * https://www.w3.org/TR/selectors/#grammar + * + * @return self|null */ public static function parse( string $input, int &$offset ): ?self { $ident = self::parse_hash_token( $input, $offset ); @@ -533,6 +539,8 @@ private function __construct( string $ident ) { * > = '.' * * https://www.w3.org/TR/selectors/#grammar + * + * @return self|null */ public static function parse( string $input, int &$offset ): ?self { if ( $offset + 1 >= strlen( $input ) || '.' !== $input[ $offset ] ) { @@ -574,10 +582,12 @@ private function __construct( string $ident ) { * so this selector effectively matches * or ident. * * https://www.w3.org/TR/selectors/#grammar + * + * @return self|null */ public static function parse( string $input, int &$offset ): ?self { if ( $offset >= strlen( $input ) ) { - return false; + return null; } if ( '*' === $input[ $offset ] ) { @@ -697,11 +707,13 @@ private function __construct( string $name, ?string $matcher = null, ?string $va * Namespaces are not supported, so attribute names are effectively identifiers. * * https://www.w3.org/TR/selectors/#grammar + * + * @return self|null */ public static function parse( string $input, int &$offset ): ?self { // Need at least 3 bytes [x] if ( $offset + 2 >= strlen( $input ) ) { - return false; + return null; } $updated_offset = $offset; From dffcac6ed016f727aaacfb192f151f5c3cb3c67f Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 27 Nov 2024 17:07:48 +0100 Subject: [PATCH 043/129] Update documentation links and grammar --- .../html-api/class-wp-css-selectors.php | 32 ++++++++++++++++--- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-css-selectors.php b/src/wp-includes/html-api/class-wp-css-selectors.php index 6a80ca2e42b7c..264f684692f17 100644 --- a/src/wp-includes/html-api/class-wp-css-selectors.php +++ b/src/wp-includes/html-api/class-wp-css-selectors.php @@ -19,7 +19,29 @@ * is invalid or unsupported. * * A subset of the CSS selector grammar is supported. The grammar is defined in the CSS Syntax - * specification, which is available at https://www.w3.org/TR/css-syntax-3/. + * specification, which is available at {@link https://www.w3.org/TR/selectors/#grammar}. + * + * @todo Review this grammar, especially the complex selector for accurate support information. + * The supported grammar is: + * + * = + * = # + * = # + * = # + * = [ ? ]* + * = [ ? * ]! + * = | + * = '>' | '+' | '~' | [ '|' '|' ] + * = | '*' + * = | | + * = + * = '.' + * = '[' ']' | + * '[' [ | ] ? ']' + * = [ '~' | '|' | '^' | '$' | '*' ]? '=' + * = i | s + * + * @link https://www.w3.org/TR/selectors/#grammar Refer to the grammar for more details. * * Supported selector syntax: * - Type selectors (tag names, e.g. `div`) @@ -43,10 +65,10 @@ * * @access private * - * @see https://www.w3.org/TR/css-syntax-3/#consume-a-token - * @see https://www.w3.org/tr/selectors/#parse-selector - * @see https://www.w3.org/TR/selectors-api2/ - * @see https://www.w3.org/TR/selectors-4/ + * @see {@link https://www.w3.org/TR/css-syntax-3/} + * @see {@link https://www.w3.org/tr/selectors/} + * @see {@link https://www.w3.org/TR/selectors-api2/} + * @see {@link https://www.w3.org/TR/selectors-4/} * */ class WP_CSS_Selectors { From 9f81744aa7bc68fda9269d48251fa13fb2223519 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 27 Nov 2024 20:29:56 +0100 Subject: [PATCH 044/129] Update documentation and class name --- src/wp-includes/html-api/class-wp-css-selectors.php | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-css-selectors.php b/src/wp-includes/html-api/class-wp-css-selectors.php index 264f684692f17..d9bbc4b9235c8 100644 --- a/src/wp-includes/html-api/class-wp-css-selectors.php +++ b/src/wp-includes/html-api/class-wp-css-selectors.php @@ -14,7 +14,7 @@ * * This class is designed for internal use by the HTML processor. * - * This class is instantiated via the `WP_CSS_Selector::from_selector( string $selector )` method. + * This class is instantiated via the `WP_CSS_Selector_List::from_selector( string $selector )` method. * It accepts a CSS selector string and returns an instance of itself or `null` if the selector * is invalid or unsupported. * @@ -27,10 +27,8 @@ * = * = # * = # - * = # * = [ ? ]* * = [ ? * ]! - * = | * = '>' | '+' | '~' | [ '|' '|' ] * = | '*' * = | | @@ -71,7 +69,7 @@ * @see {@link https://www.w3.org/TR/selectors-4/} * */ -class WP_CSS_Selectors { +class WP_CSS_Selector_List { private $selectors; private function __construct( array $selectors ) { @@ -131,7 +129,7 @@ private static function parse( string $input ) { } } if ( count( $selectors ) ) { - return new WP_CSS_Selectors( $selectors ); + return new WP_CSS_Selector_List( $selectors ); } return null; } From d4c6f382dc246e151dc688a70daf88ad8a9f7916 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 27 Nov 2024 20:30:12 +0100 Subject: [PATCH 045/129] Add selector class --- .../html-api/class-wp-css-selectors.php | 64 +++++++++++++++++++ .../phpunit/tests/html-api/wpCssSelectors.php | 18 ++++++ 2 files changed, 82 insertions(+) diff --git a/src/wp-includes/html-api/class-wp-css-selectors.php b/src/wp-includes/html-api/class-wp-css-selectors.php index d9bbc4b9235c8..8d8ec35de98b6 100644 --- a/src/wp-includes/html-api/class-wp-css-selectors.php +++ b/src/wp-includes/html-api/class-wp-css-selectors.php @@ -838,3 +838,67 @@ public static function parse( string $input, int &$offset ): ?self { return null; } } + +/** + * This corresponds to in the grammar. + */ +final class WP_CSS_Selector extends WP_CSS_Selector_Parser { + + /** @var WP_CSS_Type_Selector|null */ + public $type_selector; + + /** @var array|null */ + public $subclass_selectors; + + private function __construct( ?WP_CSS_Type_Selector $type_selector, array $subclass_selectors ) { + $this->type_selector = $type_selector; + $this->subclass_selectors = array() === $subclass_selectors ? null : $subclass_selectors; + } + + /** + * Parses a selector string into a `WP_CSS_Selector` object. + * + * > = [ ? * ]! + * + * @param string $input The selector string to parse. + * @return WP_CSS_Selector|null The parsed selector, or `null` if the selector is invalid or unsupported. + */ + public static function parse( string $input, int &$offset ): ?self { + if ( $offset >= strlen( $input ) ) { + return null; + } + + $updated_offset = $offset; + $type_selector = WP_CSS_Type_Selector::parse( $input, $updated_offset ); + + $subclass_selectors = array(); + $last_parsed_subclass_selector = self::parse_subclass_selector( $input, $updated_offset ); + while ( null !== $last_parsed_subclass_selector ) { + $subclass_selectors[] = $last_parsed_subclass_selector; + $last_parsed_subclass_selector = self::parse_subclass_selector( $input, $updated_offset ); + } + + if ( null !== $type_selector || array() !== $subclass_selectors ) { + $offset = $updated_offset; + return new self( $type_selector, $subclass_selectors ); + } + } + + /** + * @return WP_CSS_ID_Selector|WP_CSS_Class_Selector|WP_CSS_Attribute_Selector|null + */ + private static function parse_subclass_selector( string $input, int &$offset ) { + if ( $offset >= strlen( $input ) ) { + return null; + } + + $next_char = $input[ $offset ]; + return '.' === $next_char ? + WP_CSS_Class_Selector::parse( $input, $offset ) : ( + '#' === $next_char ? + WP_CSS_ID_Selector::parse( $input, $offset ) : ( + '[' === $next_char ? + WP_CSS_Attribute_Selector::parse( $input, $offset ) : + null ) ); + } +} diff --git a/tests/phpunit/tests/html-api/wpCssSelectors.php b/tests/phpunit/tests/html-api/wpCssSelectors.php index 7b6e5ce79a365..180bee4f53c05 100644 --- a/tests/phpunit/tests/html-api/wpCssSelectors.php +++ b/tests/phpunit/tests/html-api/wpCssSelectors.php @@ -338,4 +338,22 @@ public static function data_attribute_selectors(): array { 'Invalid: [att=val i ' => array( '[att=val i ' ), ); } + + /** + * @ticket TBD + */ + public function test_parse_selector() { + $input = 'el.foo#bar[baz=quux] > .child'; + $offset = 0; + $sel = WP_CSS_Selector::parse( $input, $offset ); + + $this->assertSame( $sel->type_selector->ident, 'el' ); + $this->assertSame( count( $sel->subclass_selectors ), 3 ); + $this->assertSame( $sel->subclass_selectors[0]->ident, 'foo' ); + $this->assertSame( $sel->subclass_selectors[1]->ident, 'bar' ); + $this->assertSame( $sel->subclass_selectors[2]->name, 'baz' ); + $this->assertSame( $sel->subclass_selectors[2]->matcher, WP_CSS_Attribute_Selector::MATCH_EXACT ); + $this->assertSame( $sel->subclass_selectors[2]->value, 'quux' ); + $this->assertSame( ' > .child', substr( $input, $offset ) ); + } } From 6432056bd38a8aebb94c51b6bfe6ac87353181c7 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 27 Nov 2024 21:01:43 +0100 Subject: [PATCH 046/129] Implement complex selector --- .../html-api/class-wp-css-selectors.php | 87 +++++++++++++++++-- .../phpunit/tests/html-api/wpCssSelectors.php | 34 ++++++-- 2 files changed, 106 insertions(+), 15 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-css-selectors.php b/src/wp-includes/html-api/class-wp-css-selectors.php index 8d8ec35de98b6..8ccec5de029cc 100644 --- a/src/wp-includes/html-api/class-wp-css-selectors.php +++ b/src/wp-includes/html-api/class-wp-css-selectors.php @@ -123,9 +123,9 @@ private static function parse( string $input ) { $offset = 0; while ( $offset < $length ) { - $sel = WP_CSS_ID_Selector::parse( $input, $offset ); - if ( $sel ) { - $selectors[] = $sel; + $selector = WP_CSS_ID_Selector::parse( $input, $offset ); + if ( null !== $selector ) { + $selectors[] = $selector; } } if ( count( $selectors ) ) { @@ -841,6 +841,8 @@ public static function parse( string $input, int &$offset ): ?self { /** * This corresponds to in the grammar. + * + * > = [ ? * ]! */ final class WP_CSS_Selector extends WP_CSS_Selector_Parser { @@ -856,12 +858,7 @@ private function __construct( ?WP_CSS_Type_Selector $type_selector, array $subcl } /** - * Parses a selector string into a `WP_CSS_Selector` object. - * * > = [ ? * ]! - * - * @param string $input The selector string to parse. - * @return WP_CSS_Selector|null The parsed selector, or `null` if the selector is invalid or unsupported. */ public static function parse( string $input, int &$offset ): ?self { if ( $offset >= strlen( $input ) ) { @@ -882,6 +879,7 @@ public static function parse( string $input, int &$offset ): ?self { $offset = $updated_offset; return new self( $type_selector, $subclass_selectors ); } + return null; } /** @@ -902,3 +900,76 @@ private static function parse_subclass_selector( string $input, int &$offset ) { null ) ); } } + + +/** + * This corresponds to in the grammar. + * + * > = [ ? ]* + */ +final class WP_CSS_Complex_Selector extends WP_CSS_Selector_Parser { + const COMBINATOR_CHILD = '>'; + const COMBINATOR_DESCENDANT = ' '; + const COMBINATOR_NEXT_SIBLING = '+'; + const COMBINATOR_SUBSEQUENT_SIBLING = '~'; + + /** + * even indexes are WP_CSS_Selector, odd indexes are string combinators. + * @var array + */ + public $selectors = array(); + + private function __construct( array $selectors ) { + $this->selectors = $selectors; + } + + public static function parse( string $input, int &$offset ): ?self { + if ( $offset >= strlen( $input ) ) { + return null; + } + + $updated_offset = $offset; + $selector = WP_CSS_Selector::parse( $input, $updated_offset ); + if ( null === $selector ) { + return null; + } + + $selectors = array( $selector ); + + $found_whitespace = self::parse_whitespace( $input, $updated_offset ); + while ( $updated_offset < strlen( $input ) ) { + switch ( $input[ $updated_offset ] ) { + case self::COMBINATOR_CHILD: + case self::COMBINATOR_NEXT_SIBLING: + case self::COMBINATOR_SUBSEQUENT_SIBLING: + $combinator = $input[ $updated_offset ]; + ++$updated_offset; + self::parse_whitespace( $input, $updated_offset ); + break; + + default: + /* + * Whitespace is a descendant combinator. + * Either whitespace was found and we're on a selector, + * or we've failed to find any combinator and parsing is complete. + */ + if ( ! $found_whitespace ) { + break 2; + } + $combinator = self::COMBINATOR_DESCENDANT; + break; + } + // Here we've found a combinator and need another selector. + $selector = WP_CSS_Selector::parse( $input, $updated_offset ); + // Failure to find a selector is a parse error. + if ( null === $selector ) { + return null; + } + $selectors[] = $combinator; + $selectors[] = $selector; + $found_whitespace = self::parse_whitespace( $input, $updated_offset ); + } + $offset = $updated_offset; + return new self( $selectors ); + } +} diff --git a/tests/phpunit/tests/html-api/wpCssSelectors.php b/tests/phpunit/tests/html-api/wpCssSelectors.php index 180bee4f53c05..4189ec586011a 100644 --- a/tests/phpunit/tests/html-api/wpCssSelectors.php +++ b/tests/phpunit/tests/html-api/wpCssSelectors.php @@ -347,13 +347,33 @@ public function test_parse_selector() { $offset = 0; $sel = WP_CSS_Selector::parse( $input, $offset ); - $this->assertSame( $sel->type_selector->ident, 'el' ); - $this->assertSame( count( $sel->subclass_selectors ), 3 ); - $this->assertSame( $sel->subclass_selectors[0]->ident, 'foo' ); - $this->assertSame( $sel->subclass_selectors[1]->ident, 'bar' ); - $this->assertSame( $sel->subclass_selectors[2]->name, 'baz' ); - $this->assertSame( $sel->subclass_selectors[2]->matcher, WP_CSS_Attribute_Selector::MATCH_EXACT ); - $this->assertSame( $sel->subclass_selectors[2]->value, 'quux' ); + $this->assertSame( 'el', $sel->type_selector->ident ); + $this->assertSame( 3, count( $sel->subclass_selectors ) ); + $this->assertSame( 'foo', $sel->subclass_selectors[0]->ident, 'foo' ); + $this->assertSame( 'bar', $sel->subclass_selectors[1]->ident, 'bar' ); + $this->assertSame( 'baz', $sel->subclass_selectors[2]->name, 'baz' ); + $this->assertSame( WP_CSS_Attribute_Selector::MATCH_EXACT, $sel->subclass_selectors[2]->matcher ); + $this->assertSame( 'quux', $sel->subclass_selectors[2]->value ); $this->assertSame( ' > .child', substr( $input, $offset ) ); } + + /** + * @ticket TBD + */ + public function test_parse_complex_selector() { + $input = 'el.foo#bar[baz=quux] > .child, rest'; + $offset = 0; + $sel = WP_CSS_Complex_Selector::parse( $input, $offset ); + + var_dump( $sel ); + $this->assertSame( 3, count( $sel->selectors ) ); + $this->assertNotNull( $sel->selectors[0]->type_selector ); + $this->assertSame( 3, count( $sel->selectors[0]->subclass_selectors ) ); + $this->assertSame( WP_CSS_Complex_Selector::COMBINATOR_CHILD, $sel->selectors[1] ); + $this->assertNull( $sel->selectors[2]->type_selector ); + $this->assertSame( 1, count( $sel->selectors[2]->subclass_selectors ) ); + $this->assertSame( 'child', $sel->selectors[2]->subclass_selectors[0]->ident ); + + $this->assertSame( ', rest', substr( $input, $offset ) ); + } } From 5c746cd58b3e1178e9579e11b71974a5be652ac2 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 27 Nov 2024 22:39:22 +0100 Subject: [PATCH 047/129] Working and tested --- .../html-api/class-wp-css-selectors.php | 83 +++++++++++-------- .../phpunit/tests/html-api/wpCssSelectors.php | 67 ++++++++++++++- 2 files changed, 113 insertions(+), 37 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-css-selectors.php b/src/wp-includes/html-api/class-wp-css-selectors.php index 8ccec5de029cc..734c3e38d094b 100644 --- a/src/wp-includes/html-api/class-wp-css-selectors.php +++ b/src/wp-includes/html-api/class-wp-css-selectors.php @@ -117,21 +117,31 @@ private static function parse( string $input ) { $input = str_replace( array( "\r", "\f" ), "\n", $input ); $input = str_replace( "\0", "\u{FFFD}", $input ); - $length = strlen( $input ); - $selectors = array(); - $offset = 0; - while ( $offset < $length ) { - $selector = WP_CSS_ID_Selector::parse( $input, $offset ); - if ( null !== $selector ) { - $selectors[] = $selector; - } + $selector = WP_CSS_Complex_Selector::parse( $input, $offset ); + if ( null === $selector ) { + return null; } - if ( count( $selectors ) ) { - return new WP_CSS_Selector_List( $selectors ); + WP_CSS_Selector_Parser::parse_whitespace( $input, $offset ); + + $selectors = array( $selector ); + while ( $offset < strlen( $input ) ) { + // Each loop should stop on a `,` selector list delimiter. + if ( ',' !== $input[ $offset ] ) { + return null; + } + ++$offset; + WP_CSS_Selector_Parser::parse_whitespace( $input, $offset ); + $selector = WP_CSS_Complex_Selector::parse( $input, $offset ); + if ( null === $selector ) { + return null; + } + $selectors[] = $selector; + WP_CSS_Selector_Parser::parse_whitespace( $input, $offset ); } - return null; + + return new WP_CSS_Selector_List( $selectors ); } } @@ -145,7 +155,7 @@ public static function parse( string $input, int &$offset ); abstract class WP_CSS_Selector_Parser implements IWP_CSS_Selector_Parser { const UTF8_MAX_CODEPOINT_VALUE = 0x10FFFF; - protected static function parse_whitespace( string $input, int &$offset ): bool { + public static function parse_whitespace( string $input, int &$offset ): bool { $length = strspn( $input, " \t\r\n\f", $offset ); $advanced = $length > 0; $offset += $length; @@ -938,35 +948,38 @@ public static function parse( string $input, int &$offset ): ?self { $found_whitespace = self::parse_whitespace( $input, $updated_offset ); while ( $updated_offset < strlen( $input ) ) { - switch ( $input[ $updated_offset ] ) { - case self::COMBINATOR_CHILD: - case self::COMBINATOR_NEXT_SIBLING: - case self::COMBINATOR_SUBSEQUENT_SIBLING: + if ( + self::COMBINATOR_CHILD === $input[ $updated_offset ] || + self::COMBINATOR_NEXT_SIBLING === $input[ $updated_offset ] || + self::COMBINATOR_SUBSEQUENT_SIBLING === $input[ $updated_offset ] + ) { $combinator = $input[ $updated_offset ]; ++$updated_offset; self::parse_whitespace( $input, $updated_offset ); - break; - default: - /* - * Whitespace is a descendant combinator. - * Either whitespace was found and we're on a selector, - * or we've failed to find any combinator and parsing is complete. - */ - if ( ! $found_whitespace ) { - break 2; - } - $combinator = self::COMBINATOR_DESCENDANT; + // Failure to find a selector here is a parse error + $selector = WP_CSS_Selector::parse( $input, $updated_offset ); + // Failure to find a selector is a parse error. + if ( null === $selector ) { + return null; + } + $selectors[] = $combinator; + $selectors[] = $selector; + } elseif ( ! $found_whitespace ) { + break; + } else { + + /* + * Whitespace is ambiguous, it could be a descendant combinator or + * insignificant whitespace. + */ + $selector = WP_CSS_Selector::parse( $input, $updated_offset ); + if ( null === $selector ) { break; + } + $selectors[] = self::COMBINATOR_DESCENDANT; + $selectors[] = $selector; } - // Here we've found a combinator and need another selector. - $selector = WP_CSS_Selector::parse( $input, $updated_offset ); - // Failure to find a selector is a parse error. - if ( null === $selector ) { - return null; - } - $selectors[] = $combinator; - $selectors[] = $selector; $found_whitespace = self::parse_whitespace( $input, $updated_offset ); } $offset = $updated_offset; diff --git a/tests/phpunit/tests/html-api/wpCssSelectors.php b/tests/phpunit/tests/html-api/wpCssSelectors.php index 4189ec586011a..33ada4ccbe3f9 100644 --- a/tests/phpunit/tests/html-api/wpCssSelectors.php +++ b/tests/phpunit/tests/html-api/wpCssSelectors.php @@ -357,15 +357,24 @@ public function test_parse_selector() { $this->assertSame( ' > .child', substr( $input, $offset ) ); } + /** + * @ticket TBD + */ + public function test_parse_empty_selector() { + $input = ''; + $offset = 0; + $result = WP_CSS_Selector::parse( $input, $offset ); + $this->assertNull( $result ); + } + /** * @ticket TBD */ public function test_parse_complex_selector() { - $input = 'el.foo#bar[baz=quux] > .child, rest'; + $input = 'el.foo#bar[baz=quux] > .child , rest'; $offset = 0; $sel = WP_CSS_Complex_Selector::parse( $input, $offset ); - var_dump( $sel ); $this->assertSame( 3, count( $sel->selectors ) ); $this->assertNotNull( $sel->selectors[0]->type_selector ); $this->assertSame( 3, count( $sel->selectors[0]->subclass_selectors ) ); @@ -376,4 +385,58 @@ public function test_parse_complex_selector() { $this->assertSame( ', rest', substr( $input, $offset ) ); } + + /** + * @ticket TBD + */ + public function test_parse_invalid_complex_selector() { + $input = 'el.foo#bar[baz=quux] > , rest'; + $offset = 0; + $result = WP_CSS_Complex_Selector::parse( $input, $offset ); + $this->assertNull( $result ); + } + + public function test_parse_empty_complex_selector() { + $input = ''; + $offset = 0; + $result = WP_CSS_Complex_Selector::parse( $input, $offset ); + $this->assertNull( $result ); + } + + + /** + * @ticket TBD + */ + public function test_parse_selector_list() { + $input = 'el.foo#bar[baz=quux] .descendent , rest'; + $result = WP_CSS_Selector_List::from_selectors( $input ); + $this->assertNotNull( $result ); + } + + /** + * @ticket TBD + */ + public function test_parse_invalid_selector_list() { + $input = 'el,,'; + $result = WP_CSS_Selector_List::from_selectors( $input ); + $this->assertNull( $result ); + } + + /** + * @ticket TBD + */ + public function test_parse_invalid_selector_list2() { + $input = 'el!'; + $result = WP_CSS_Selector_List::from_selectors( $input ); + $this->assertNull( $result ); + } + + /** + * @ticket TBD + */ + public function test_parse_empty_selector_list() { + $input = " \t \t\n\r\f"; + $result = WP_CSS_Selector_List::from_selectors( $input ); + $this->assertNull( $result ); + } } From 501102a87bb3f38bc2781c22b6de9a59d640bf62 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 28 Nov 2024 18:30:47 +0100 Subject: [PATCH 048/129] Selector parsing should allow cap I,S modifier --- src/wp-includes/html-api/class-wp-css-selectors.php | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/wp-includes/html-api/class-wp-css-selectors.php b/src/wp-includes/html-api/class-wp-css-selectors.php index 734c3e38d094b..6e382f8f8b744 100644 --- a/src/wp-includes/html-api/class-wp-css-selectors.php +++ b/src/wp-includes/html-api/class-wp-css-selectors.php @@ -823,11 +823,13 @@ public static function parse( string $input, int &$offset ): ?self { $attr_modifier = null; switch ( $input[ $updated_offset ] ) { case 'i': + case 'I': $attr_modifier = WP_CSS_Attribute_Selector::MODIFIER_CASE_INSENSITIVE; ++$updated_offset; break; case 's': + case 'S': $attr_modifier = WP_CSS_Attribute_Selector::MODIFIER_CASE_SENSITIVE; ++$updated_offset; break; From f98fbb39c71333b22e3c7f97c380c7ce81c56097 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 28 Nov 2024 19:08:17 +0100 Subject: [PATCH 049/129] CSS Add matches to selector classes --- .../html-api/class-wp-css-selectors.php | 120 +++++++++++++++++- 1 file changed, 116 insertions(+), 4 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-css-selectors.php b/src/wp-includes/html-api/class-wp-css-selectors.php index 6e382f8f8b744..d9c507bb5f557 100644 --- a/src/wp-includes/html-api/class-wp-css-selectors.php +++ b/src/wp-includes/html-api/class-wp-css-selectors.php @@ -69,7 +69,20 @@ * @see {@link https://www.w3.org/TR/selectors-4/} * */ -class WP_CSS_Selector_List { +class WP_CSS_Selector_List implements IWP_CSS_Selector_Matcher { + public function matches( WP_HTML_Processor $processor ): bool { + if ( $processor->get_token_type() !== '#tag' ) { + return false; + } + + foreach ( $this->selectors as $selector ) { + if ( ! $selector->matches( $processor ) ) { + return false; + } + } + return true; + } + private $selectors; private function __construct( array $selectors ) { @@ -145,6 +158,13 @@ private static function parse( string $input ) { } } +interface IWP_CSS_Selector_Matcher { + /** + * @return bool + */ + public function matches( WP_HTML_Processor $processor ): bool; +} + interface IWP_CSS_Selector_Parser { /** * @return static|null @@ -152,7 +172,7 @@ interface IWP_CSS_Selector_Parser { public static function parse( string $input, int &$offset ); } -abstract class WP_CSS_Selector_Parser implements IWP_CSS_Selector_Parser { +abstract class WP_CSS_Selector_Parser implements IWP_CSS_Selector_Parser, IWP_CSS_Selector_Matcher { const UTF8_MAX_CODEPOINT_VALUE = 0x10FFFF; public static function parse_whitespace( string $input, int &$offset ): bool { @@ -553,9 +573,18 @@ public static function parse( string $input, int &$offset ): ?self { } return new self( $ident ); } + + public function matches( WP_HTML_Processor $processor ): bool { + // @todo check case sensitivity. + return $processor->get_attribute( 'id' ) === $this->ident; + } } final class WP_CSS_Class_Selector extends WP_CSS_Selector_Parser { + public function matches( WP_HTML_Processor $processor ): bool { + return $processor->has_class( $this->ident ); + } + /** @var string */ public $ident; @@ -590,6 +619,13 @@ public static function parse( string $input, int &$offset ): ?self { } final class WP_CSS_Type_Selector extends WP_CSS_Selector_Parser { + public function matches( WP_HTML_Processor $processor ): bool { + if ( '*' === $this->ident ) { + return true; + } + return 0 === strcasecmp( $processor->get_tag(), $this->ident ); + } + /** * @var string * @@ -635,9 +671,64 @@ public static function parse( string $input, int &$offset ): ?self { } final class WP_CSS_Attribute_Selector extends WP_CSS_Selector_Parser { + public function matches( WP_HTML_Processor $processor ): bool { + $att_value = $processor->get_attribute( $this->name ); + if ( null === $att_value ) { + return false; + } + + if ( null === $this->value ) { + return true; + } + + $case_insensitive = self::MODIFIER_CASE_INSENSITIVE === $this->modifier; + + switch ( $this->matcher ) { + case self::MATCH_EXACT: + return $case_insensitive ? + 0 === strcasecmp( $att_value, $this->value ) : + $att_value === $this->value; + + case self::MATCH_ONE_OF_EXACT: + // @todo + throw new Exception( 'One of attribute matching is not supported yet.' ); + + case self::MATCH_EXACT_OR_EXACT_WITH_HYPHEN: + // Attempt the full match first + if ( + $case_insensitive ? + 0 === strcasecmp( $att_value, $this->value ) : + $att_value === $this->value + ) { + return true; + } + + // Partial match + if ( strlen( $att_value ) < strlen( $this->value ) + 1 ) { + return false; + } + + $starts_with = "{$this->value}-"; + return 0 === substr_compare( $att_value, $starts_with, 0, strlen( $starts_with ), $case_insensitive ); + + case self::MATCH_PREFIXED_BY: + return 0 === substr_compare( $att_value, $this->value, 0, strlen( $this->value ), $case_insensitive ); + + case self::MATCH_SUFFIXED_BY: + return 0 === substr_compare( $att_value, $this->value, -strlen( $this->value ), null, $case_insensitive ); + + case self::MATCH_CONTAINS: + return false !== ( + $case_insensitive ? + stripos( $att_value, $this->value ) : + strpos( $att_value, $this->value ) + ); + } + } + /** - * [attr=value] - * Represents elements with an attribute name of attr whose value is exactly value. + * [att=val] + * Represents an element with the att attribute whose value is exactly "val". */ const MATCH_EXACT = 'MATCH_EXACT'; @@ -857,6 +948,19 @@ public static function parse( string $input, int &$offset ): ?self { * > = [ ? * ]! */ final class WP_CSS_Selector extends WP_CSS_Selector_Parser { + public function matches( WP_HTML_Processor $processor ): bool { + if ( $this->type_selector ) { + if ( ! $this->type_selector->matches( $processor ) ) { + return false; + } + } + foreach ( $this->subclass_selectors as $subclass_selector ) { + if ( ! $subclass_selector->matches( $processor ) ) { + return false; + } + } + return true; + } /** @var WP_CSS_Type_Selector|null */ public $type_selector; @@ -920,6 +1024,14 @@ private static function parse_subclass_selector( string $input, int &$offset ) { * > = [ ? ]* */ final class WP_CSS_Complex_Selector extends WP_CSS_Selector_Parser { + public function matches( WP_HTML_Processor $processor ): bool { + // @todo this can throw on parse. + if ( count( $this->selectors ) > 1 ) { + throw new Exception( 'Combined complex selectors are not supported yet.' ); + } + return $this->selectors[0]->matches( $processor ); + } + const COMBINATOR_CHILD = '>'; const COMBINATOR_DESCENDANT = ' '; const COMBINATOR_NEXT_SIBLING = '+'; From c8f16e19f30ec4b4ad0cfbaac849b33e811229e3 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 28 Nov 2024 19:40:55 +0100 Subject: [PATCH 050/129] Match is successful on _any_ match in selector list --- src/wp-includes/html-api/class-wp-css-selectors.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-css-selectors.php b/src/wp-includes/html-api/class-wp-css-selectors.php index d9c507bb5f557..1a50defba8ea3 100644 --- a/src/wp-includes/html-api/class-wp-css-selectors.php +++ b/src/wp-includes/html-api/class-wp-css-selectors.php @@ -76,11 +76,11 @@ public function matches( WP_HTML_Processor $processor ): bool { } foreach ( $this->selectors as $selector ) { - if ( ! $selector->matches( $processor ) ) { - return false; + if ( $selector->matches( $processor ) ) { + return true; } } - return true; + return false; } private $selectors; From c689c9c50fb6827dd330df1707844410479b4234 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 28 Nov 2024 19:41:55 +0100 Subject: [PATCH 051/129] PICKME: Add is_quirks_mode method to processor --- src/wp-includes/html-api/class-wp-html-tag-processor.php | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index 39390621e86a6..7dadbc1bebdb2 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -537,6 +537,10 @@ class WP_HTML_Tag_Processor { */ protected $compat_mode = self::NO_QUIRKS_MODE; + public function is_quirks_mode() { + return self::QUIRKS_MODE === $this->compat_mode; + } + /** * Indicates whether the parser is inside foreign content, * e.g. inside an SVG or MathML element. From 1221efae34bf033af893180aa32a13e58b5312d8 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 28 Nov 2024 19:41:27 +0100 Subject: [PATCH 052/129] ID matches depend on quirks mode --- src/wp-includes/html-api/class-wp-css-selectors.php | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-css-selectors.php b/src/wp-includes/html-api/class-wp-css-selectors.php index 1a50defba8ea3..01e3253893d57 100644 --- a/src/wp-includes/html-api/class-wp-css-selectors.php +++ b/src/wp-includes/html-api/class-wp-css-selectors.php @@ -575,8 +575,10 @@ public static function parse( string $input, int &$offset ): ?self { } public function matches( WP_HTML_Processor $processor ): bool { - // @todo check case sensitivity. - return $processor->get_attribute( 'id' ) === $this->ident; + $case_insensitive = method_exists( $processor, 'is_quirks_mode' ) && $processor->is_quirks_mode(); + return $case_insensitive ? + 0 === strcasecmp( $processor->get_attribute( 'id' ), $this->ident ) : + $processor->get_attribute( 'id' ) === $this->ident; } } From e5e94b11b5d9e3c113364c2a595ebb8cfdb715f7 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 28 Nov 2024 19:42:12 +0100 Subject: [PATCH 053/129] has_class may return null, coerce to bool --- src/wp-includes/html-api/class-wp-css-selectors.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-css-selectors.php b/src/wp-includes/html-api/class-wp-css-selectors.php index 01e3253893d57..3e35a383b4446 100644 --- a/src/wp-includes/html-api/class-wp-css-selectors.php +++ b/src/wp-includes/html-api/class-wp-css-selectors.php @@ -584,7 +584,7 @@ public function matches( WP_HTML_Processor $processor ): bool { final class WP_CSS_Class_Selector extends WP_CSS_Selector_Parser { public function matches( WP_HTML_Processor $processor ): bool { - return $processor->has_class( $this->ident ); + return (bool) $processor->has_class( $this->ident ); } /** @var string */ From 1e888babcc7e4448a02ace55a509e655bdea1e5d Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 28 Nov 2024 21:29:13 +0100 Subject: [PATCH 054/129] Update docs to only allow subclass selectors in final complex selector position --- .../html-api/class-wp-css-selectors.php | 20 ++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-css-selectors.php b/src/wp-includes/html-api/class-wp-css-selectors.php index 3e35a383b4446..b0d5afbb5bba7 100644 --- a/src/wp-includes/html-api/class-wp-css-selectors.php +++ b/src/wp-includes/html-api/class-wp-css-selectors.php @@ -27,9 +27,9 @@ * = * = # * = # - * = [ ? ]* + * = [ ? ]* * = [ ? * ]! - * = '>' | '+' | '~' | [ '|' '|' ] + * = '>' | [ '|' '|' ] * = | '*' * = | | * = @@ -47,17 +47,23 @@ * - ID selectors (e.g. `#unique-id`) * - Attribute selectors (e.g. `[attribute-name]` or `[attribute-name="value"]`) * - Comma-separated selector lists (e.g. `.selector-1, .selector-2`) - * - The following combinators: - * - descendant (e.g. `.parent .descendant`) - * - child (`.parent > .child`) + * - The following combinators. Only type (element) selectors are allowed in non-final position: + * - descendant (e.g. `el .descendant`) + * - child (`el > .child`) * * Unsupported selector syntax: * - Pseudo-element selectors (e.g. `::before`) * - Pseudo-class selectors (e.g. `:hover` or `:nth-child(2)`) * - Namespace prefixes (e.g. `svg|title` or `[xlink|href]`) * - The following combinators: - * - Next sibling (`.sibling + .sibling`) - * - Subsequent sibling (`.sibling ~ .sibling`) + * - Next sibling (`el + el`) + * - Subsequent sibling (`el ~ el`) + * + * Future ideas + * - Namespace type selectors could be implemented with select namespaces in order to + * select elements from a namespace, for example: + * - `svg|*` to select all SVG elements + * - `html|title` to select only HTML TITLE elements. * * @since TBD * From dd4fcb01184f9e07ec51067e1d7c1a8d4021d168 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 28 Nov 2024 22:10:05 +0100 Subject: [PATCH 055/129] Restrict complex selectors to only allow subclass selectors in final position --- .../html-api/class-wp-css-selectors.php | 43 +++++++++++-------- 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-css-selectors.php b/src/wp-includes/html-api/class-wp-css-selectors.php index b0d5afbb5bba7..45a2f78d94fd5 100644 --- a/src/wp-includes/html-api/class-wp-css-selectors.php +++ b/src/wp-includes/html-api/class-wp-css-selectors.php @@ -1066,7 +1066,8 @@ public static function parse( string $input, int &$offset ): ?self { return null; } - $selectors = array( $selector ); + $selectors = array( $selector ); + $has_preceding_subclass_selector = null !== $selector->subclass_selectors; $found_whitespace = self::parse_whitespace( $input, $updated_offset ); while ( $updated_offset < strlen( $input ) ) { @@ -1075,22 +1076,13 @@ public static function parse( string $input, int &$offset ): ?self { self::COMBINATOR_NEXT_SIBLING === $input[ $updated_offset ] || self::COMBINATOR_SUBSEQUENT_SIBLING === $input[ $updated_offset ] ) { - $combinator = $input[ $updated_offset ]; - ++$updated_offset; - self::parse_whitespace( $input, $updated_offset ); - - // Failure to find a selector here is a parse error - $selector = WP_CSS_Selector::parse( $input, $updated_offset ); - // Failure to find a selector is a parse error. - if ( null === $selector ) { - return null; - } - $selectors[] = $combinator; - $selectors[] = $selector; - } elseif ( ! $found_whitespace ) { - break; - } else { + $combinator = $input[ $updated_offset ]; + ++$updated_offset; + self::parse_whitespace( $input, $updated_offset ); + // Failure to find a selector here is a parse error + $selector = WP_CSS_Selector::parse( $input, $updated_offset ); + } elseif ( $found_whitespace ) { /* * Whitespace is ambiguous, it could be a descendant combinator or * insignificant whitespace. @@ -1099,9 +1091,24 @@ public static function parse( string $input, int &$offset ): ?self { if ( null === $selector ) { break; } - $selectors[] = self::COMBINATOR_DESCENDANT; - $selectors[] = $selector; + $combinator = self::COMBINATOR_DESCENDANT; + } else { + break; + } + + if ( null === $selector ) { + return null; } + + // `div > .className` is valid, but `.className > div` is not. + if ( $has_preceding_subclass_selector ) { + throw new Exception( 'Unsupported non-final subclass selector.' ); + } + $has_preceding_subclass_selector = null !== $selector->subclass_selectors; + + $selectors[] = $combinator; + $selectors[] = $selector; + $found_whitespace = self::parse_whitespace( $input, $updated_offset ); } $offset = $updated_offset; From 256c55a16d8e5adf3ebdc64a360e3373eeecaa28 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 28 Nov 2024 22:10:21 +0100 Subject: [PATCH 056/129] Work on complex selector handling --- .../html-api/class-wp-css-selectors.php | 49 +++++++++++++++++-- 1 file changed, 44 insertions(+), 5 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-css-selectors.php b/src/wp-includes/html-api/class-wp-css-selectors.php index 45a2f78d94fd5..bc28cfaa4f20e 100644 --- a/src/wp-includes/html-api/class-wp-css-selectors.php +++ b/src/wp-includes/html-api/class-wp-css-selectors.php @@ -1033,11 +1033,47 @@ private static function parse_subclass_selector( string $input, int &$offset ) { */ final class WP_CSS_Complex_Selector extends WP_CSS_Selector_Parser { public function matches( WP_HTML_Processor $processor ): bool { - // @todo this can throw on parse. - if ( count( $this->selectors ) > 1 ) { - throw new Exception( 'Combined complex selectors are not supported yet.' ); + if ( count( $this->selectors ) === 1 ) { + return $this->selectors[0]->matches( $processor ); + } + + // First selector must match this location. + if ( ! $this->selectors[0]->matches( $processor ) ) { + return false; + } + + $breadcrumbs = array_slice( array_reverse( $processor->get_breadcrumbs() ), 1 ); + $selectors = array_slice( $this->selectors, 1 ); + return $this->explore_matches( $selectors, $breadcrumbs ); + } + + /** + * This only looks at breadcrumbs and can therefore only support type selectors. + * + * @param array $selectors + */ + private function explore_matches( array $selectors, array $breadcrumbs ): bool { + if ( array() === $selectors ) { + return true; + } + if ( array() === $breadcrumbs ) { + return false; + } + + $combinator = $selectors[0]; + $selector = $selectors[1]; + + switch ( $combinator ) { + case self::COMBINATOR_CHILD: + if ( '*' === $selector->type_selector->ident || strcasecmp( $breadcrumbs[0], $selector->type_selector->ident ) === 0 ) { + return $this->explore_matches( array_slice( $selectors, 2 ), array_slice( $breadcrumbs, 1 ) ); + } + return $this->explore_matches( $selectors, array_slice( $breadcrumbs, 1 ) ); + + case self::COMBINATOR_DESCENDANT: + default: + throw new Exception( "Combinator '{$combinator}' is not supported yet." ); } - return $this->selectors[0]->matches( $processor ); } const COMBINATOR_CHILD = '>'; @@ -1047,12 +1083,15 @@ public function matches( WP_HTML_Processor $processor ): bool { /** * even indexes are WP_CSS_Selector, odd indexes are string combinators. + * In reverse order to match the current element and then work up the tree. + * Any non-final selector is a type selector. + * * @var array */ public $selectors = array(); private function __construct( array $selectors ) { - $this->selectors = $selectors; + $this->selectors = array_reverse( $selectors ); } public static function parse( string $input, int &$offset ): ?self { From 465cc3673cb15e2b229767223801224d8fd36335 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 28 Nov 2024 22:26:43 +0100 Subject: [PATCH 057/129] Implement descendent selector matching --- src/wp-includes/html-api/class-wp-css-selectors.php | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/wp-includes/html-api/class-wp-css-selectors.php b/src/wp-includes/html-api/class-wp-css-selectors.php index bc28cfaa4f20e..974c56e6581ff 100644 --- a/src/wp-includes/html-api/class-wp-css-selectors.php +++ b/src/wp-includes/html-api/class-wp-css-selectors.php @@ -1071,6 +1071,19 @@ private function explore_matches( array $selectors, array $breadcrumbs ): bool { return $this->explore_matches( $selectors, array_slice( $breadcrumbs, 1 ) ); case self::COMBINATOR_DESCENDANT: + $ident = $selector->type_selector->ident; + + // Find _all_ the breadcrumbs that match and recurse from each of them. + for ( $i = 0; $i < count( $breadcrumbs ); $i++ ) { + if ( '*' === $selector->type_selector->ident || strcasecmp( $breadcrumbs[ $i ], $selector->type_selector->ident ) === 0 ) { + $next_crumbs = array_slice( $breadcrumbs, $i + 1 ); + if ( $this->explore_matches( array_slice( $selectors, 2 ), $next_crumbs ) ) { + return true; + } + } + } + return false; + default: throw new Exception( "Combinator '{$combinator}' is not supported yet." ); } From 467d45dc3133dfefb7081e8e7e7821254dd073a0 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 29 Nov 2024 15:48:21 +0100 Subject: [PATCH 058/129] Add null check for subclass selectors --- src/wp-includes/html-api/class-wp-css-selectors.php | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-css-selectors.php b/src/wp-includes/html-api/class-wp-css-selectors.php index 974c56e6581ff..21039c0c7940e 100644 --- a/src/wp-includes/html-api/class-wp-css-selectors.php +++ b/src/wp-includes/html-api/class-wp-css-selectors.php @@ -962,9 +962,11 @@ public function matches( WP_HTML_Processor $processor ): bool { return false; } } - foreach ( $this->subclass_selectors as $subclass_selector ) { - if ( ! $subclass_selector->matches( $processor ) ) { - return false; + if ( null !== $this->subclass_selectors ) { + foreach ( $this->subclass_selectors as $subclass_selector ) { + if ( ! $subclass_selector->matches( $processor ) ) { + return false; + } } } return true; From 44bfc64b4fe9711f1800e854c059156bcf2b45fb Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 29 Nov 2024 16:20:22 +0100 Subject: [PATCH 059/129] CSS selector reformat ternaries --- .../html-api/class-wp-css-selectors.php | 41 ++++++++++--------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-css-selectors.php b/src/wp-includes/html-api/class-wp-css-selectors.php index 21039c0c7940e..65e384639abcb 100644 --- a/src/wp-includes/html-api/class-wp-css-selectors.php +++ b/src/wp-includes/html-api/class-wp-css-selectors.php @@ -390,9 +390,9 @@ protected static function consume_escaped_codepoint( $input, &$offset ): ?string 0 === $codepoint_value || $codepoint_value > self::UTF8_MAX_CODEPOINT_VALUE || ( 0xD800 <= $codepoint_value && $codepoint_value <= 0xDFFF ) - ) ? - "\u{FFFD}" : - mb_chr( $codepoint_value, 'UTF-8' ); + ) + ? "\u{FFFD}" + : mb_chr( $codepoint_value, 'UTF-8' ); $offset += $hex_length; @@ -582,9 +582,9 @@ public static function parse( string $input, int &$offset ): ?self { public function matches( WP_HTML_Processor $processor ): bool { $case_insensitive = method_exists( $processor, 'is_quirks_mode' ) && $processor->is_quirks_mode(); - return $case_insensitive ? - 0 === strcasecmp( $processor->get_attribute( 'id' ), $this->ident ) : - $processor->get_attribute( 'id' ) === $this->ident; + return $case_insensitive + ? 0 === strcasecmp( $processor->get_attribute( 'id' ), $this->ident ) + : $processor->get_attribute( 'id' ) === $this->ident; } } @@ -693,9 +693,9 @@ public function matches( WP_HTML_Processor $processor ): bool { switch ( $this->matcher ) { case self::MATCH_EXACT: - return $case_insensitive ? - 0 === strcasecmp( $att_value, $this->value ) : - $att_value === $this->value; + return $case_insensitive + ? 0 === strcasecmp( $att_value, $this->value ) + : $att_value === $this->value; case self::MATCH_ONE_OF_EXACT: // @todo @@ -704,9 +704,9 @@ public function matches( WP_HTML_Processor $processor ): bool { case self::MATCH_EXACT_OR_EXACT_WITH_HYPHEN: // Attempt the full match first if ( - $case_insensitive ? - 0 === strcasecmp( $att_value, $this->value ) : - $att_value === $this->value + $case_insensitive + ? 0 === strcasecmp( $att_value, $this->value ) + : $att_value === $this->value ) { return true; } @@ -1017,13 +1017,16 @@ private static function parse_subclass_selector( string $input, int &$offset ) { } $next_char = $input[ $offset ]; - return '.' === $next_char ? - WP_CSS_Class_Selector::parse( $input, $offset ) : ( - '#' === $next_char ? - WP_CSS_ID_Selector::parse( $input, $offset ) : ( - '[' === $next_char ? - WP_CSS_Attribute_Selector::parse( $input, $offset ) : - null ) ); + return '.' === $next_char + ? WP_CSS_Class_Selector::parse( $input, $offset ) + : ( + '#' === $next_char + ? WP_CSS_ID_Selector::parse( $input, $offset ) + : ( '[' === $next_char + ? WP_CSS_Attribute_Selector::parse( $input, $offset ) + : null + ) + ); } } From ca4531c0a190b89f6072799b2b1f90dbd1deb2c1 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 29 Nov 2024 16:20:54 +0100 Subject: [PATCH 060/129] Implement ~= attribute matching --- .../html-api/class-wp-css-selectors.php | 43 ++++++++++++++++--- 1 file changed, 37 insertions(+), 6 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-css-selectors.php b/src/wp-includes/html-api/class-wp-css-selectors.php index 65e384639abcb..49c3daf66c3b2 100644 --- a/src/wp-includes/html-api/class-wp-css-selectors.php +++ b/src/wp-includes/html-api/class-wp-css-selectors.php @@ -180,9 +180,10 @@ public static function parse( string $input, int &$offset ); abstract class WP_CSS_Selector_Parser implements IWP_CSS_Selector_Parser, IWP_CSS_Selector_Matcher { const UTF8_MAX_CODEPOINT_VALUE = 0x10FFFF; + const WHITESPACE_CHARACTERS = " \t\r\n\f"; public static function parse_whitespace( string $input, int &$offset ): bool { - $length = strspn( $input, " \t\r\n\f", $offset ); + $length = strspn( $input, self::WHITESPACE_CHARACTERS, $offset ); $advanced = $length > 0; $offset += $length; return $advanced; @@ -698,8 +699,16 @@ public function matches( WP_HTML_Processor $processor ): bool { : $att_value === $this->value; case self::MATCH_ONE_OF_EXACT: - // @todo - throw new Exception( 'One of attribute matching is not supported yet.' ); + foreach ( $this->whitespace_delimited_list( $att_value ) as $val ) { + if ( + $case_insensitive + ? 0 === strcasecmp( $val, $this->value ) + : $val === $this->value + ) { + return true; + } + } + return false; case self::MATCH_EXACT_OR_EXACT_WITH_HYPHEN: // Attempt the full match first @@ -727,13 +736,35 @@ public function matches( WP_HTML_Processor $processor ): bool { case self::MATCH_CONTAINS: return false !== ( - $case_insensitive ? - stripos( $att_value, $this->value ) : - strpos( $att_value, $this->value ) + $case_insensitive + ? stripos( $att_value, $this->value ) + : strpos( $att_value, $this->value ) ); } } + /** + * @param string $input + * + * @return Generator + */ + private function whitespace_delimited_list( string $input ): Generator { + $offset = strspn( $input, self::WHITESPACE_CHARACTERS ); + + while ( $offset < strlen( $input ) ) { + // Find the byte length until the next boundary. + $length = strcspn( $input, self::WHITESPACE_CHARACTERS, $offset ); + if ( 0 === $length ) { + return; + } + + $value = substr( $input, $offset, $length ); + $offset += $length + strspn( $input, self::WHITESPACE_CHARACTERS, $offset + $length ); + + yield $value; + } + } + /** * [att=val] * Represents an element with the att attribute whose value is exactly "val". From 489db93a917625bc7d42d6e3d9f5ad924d3a96ed Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 29 Nov 2024 16:48:15 +0100 Subject: [PATCH 061/129] CSS fix return type --- src/wp-includes/html-api/class-wp-css-selectors.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-css-selectors.php b/src/wp-includes/html-api/class-wp-css-selectors.php index 49c3daf66c3b2..1431dc58afb52 100644 --- a/src/wp-includes/html-api/class-wp-css-selectors.php +++ b/src/wp-includes/html-api/class-wp-css-selectors.php @@ -113,7 +113,7 @@ public static function from_selectors( string $selectors ): ?self { * * @since TBD * - * @return WP_CSS_Selectors|null + * @return self|null */ private static function parse( string $input ) { // > A selector string is a list of one or more complex selectors ([SELECTORS4], section 3.1) that may be surrounded by whitespace and matches the dom_selectors_group production. From e57a2114aafdd6cb1d0e3cf1b7d2e3064c3e8d0b Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 29 Nov 2024 17:05:40 +0100 Subject: [PATCH 062/129] Fix static analysis problems --- .../html-api/class-wp-css-selectors.php | 22 +++++++++++-------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-css-selectors.php b/src/wp-includes/html-api/class-wp-css-selectors.php index 1431dc58afb52..2205146bdf2be 100644 --- a/src/wp-includes/html-api/class-wp-css-selectors.php +++ b/src/wp-includes/html-api/class-wp-css-selectors.php @@ -1,4 +1,8 @@ -value ) ); } + + throw new Exception( 'Unreachable' ); } /** @@ -830,7 +834,7 @@ private function whitespace_delimited_list( string $input ): Generator { /** * The attribute matcher. * - * @var string|null + * @var null|self::MATCH_* */ public $matcher; @@ -844,7 +848,7 @@ private function whitespace_delimited_list( string $input ): Generator { /** * The attribute modifier. * - * @var string|null + * @var null|self::MODIFIER_* */ public $modifier; @@ -1086,7 +1090,7 @@ public function matches( WP_HTML_Processor $processor ): bool { /** * This only looks at breadcrumbs and can therefore only support type selectors. * - * @param array $selectors + * @param array $selectors */ private function explore_matches( array $selectors, array $breadcrumbs ): bool { if ( array() === $selectors ) { @@ -1096,8 +1100,10 @@ private function explore_matches( array $selectors, array $breadcrumbs ): bool { return false; } + /** @var self::COMBINATOR_* $combinator */ $combinator = $selectors[0]; - $selector = $selectors[1]; + /** @var WP_CSS_Selector $selector */ + $selector = $selectors[1]; switch ( $combinator ) { case self::COMBINATOR_CHILD: @@ -1107,8 +1113,6 @@ private function explore_matches( array $selectors, array $breadcrumbs ): bool { return $this->explore_matches( $selectors, array_slice( $breadcrumbs, 1 ) ); case self::COMBINATOR_DESCENDANT: - $ident = $selector->type_selector->ident; - // Find _all_ the breadcrumbs that match and recurse from each of them. for ( $i = 0; $i < count( $breadcrumbs ); $i++ ) { if ( '*' === $selector->type_selector->ident || strcasecmp( $breadcrumbs[ $i ], $selector->type_selector->ident ) === 0 ) { From 509e648685af757a6b38830c8ccd58e2ac36fe07 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 29 Nov 2024 17:40:39 +0100 Subject: [PATCH 063/129] Fix and annotate things (static analysis) --- .../html-api/class-wp-css-selectors.php | 52 +++++++++++++------ 1 file changed, 35 insertions(+), 17 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-css-selectors.php b/src/wp-includes/html-api/class-wp-css-selectors.php index 2205146bdf2be..28e51aa9a9735 100644 --- a/src/wp-includes/html-api/class-wp-css-selectors.php +++ b/src/wp-includes/html-api/class-wp-css-selectors.php @@ -77,7 +77,7 @@ * @see {@link https://www.w3.org/TR/selectors-4/} * */ -class WP_CSS_Selector_List implements IWP_CSS_Selector_Matcher { +class WP_CSS_Selector_List extends WP_CSS_Selector_Parser implements IWP_CSS_Selector_Matcher { public function matches( WP_HTML_Processor $processor ): bool { if ( $processor->get_token_type() !== '#tag' ) { return false; @@ -91,8 +91,14 @@ public function matches( WP_HTML_Processor $processor ): bool { return false; } + /** + * @var array + */ private $selectors; + /** + * @param array $selectors + */ private function __construct( array $selectors ) { $this->selectors = $selectors; } @@ -122,7 +128,7 @@ private static function parse( string $input ) { $input = trim( $input, " \t\r\n\r" ); if ( '' === $input ) { - null; + return null; } /* @@ -144,7 +150,7 @@ private static function parse( string $input ) { if ( null === $selector ) { return null; } - WP_CSS_Selector_Parser::parse_whitespace( $input, $offset ); + self::parse_whitespace( $input, $offset ); $selectors = array( $selector ); while ( $offset < strlen( $input ) ) { @@ -153,16 +159,16 @@ private static function parse( string $input ) { return null; } ++$offset; - WP_CSS_Selector_Parser::parse_whitespace( $input, $offset ); + self::parse_whitespace( $input, $offset ); $selector = WP_CSS_Complex_Selector::parse( $input, $offset ); if ( null === $selector ) { return null; } $selectors[] = $selector; - WP_CSS_Selector_Parser::parse_whitespace( $input, $offset ); + self::parse_whitespace( $input, $offset ); } - return new WP_CSS_Selector_List( $selectors ); + return new self( $selectors ); } } @@ -180,7 +186,7 @@ interface IWP_CSS_Selector_Parser { public static function parse( string $input, int &$offset ); } -abstract class WP_CSS_Selector_Parser implements IWP_CSS_Selector_Parser, IWP_CSS_Selector_Matcher { +abstract class WP_CSS_Selector_Parser { const UTF8_MAX_CODEPOINT_VALUE = 0x10FFFF; const WHITESPACE_CHARACTERS = " \t\r\n\f"; @@ -216,7 +222,6 @@ protected static function parse_hash_token( string $input, int &$offset ): ?stri if ( null === $result ) { return null; - $offset = $updated_offset; } $offset = $updated_offset; @@ -263,8 +268,8 @@ protected static function parse_ident( string $input, int &$offset ): ?string { continue; } elseif ( self::is_ident_codepoint( $input, $offset ) ) { // @todo this should append and advance the correct number of bytes. - $ident .= $input[ $offset ]; - $offset += 1; + $ident .= $input[ $offset ]; + ++$offset; continue; } break; @@ -378,6 +383,10 @@ protected static function parse_string( string $input, int &$offset ): ?string { * > This is a parse error. Return U+FFFD REPLACEMENT CHARACTER (�). * > anything else * > Return the current input code point. + * + * @param string $input + * @param int $offset + * @return string|null */ protected static function consume_escaped_codepoint( $input, &$offset ): ?string { $hex_length = strspn( $input, '0123456789abcdefABCDEF', $offset, 6 ); @@ -558,7 +567,8 @@ protected static function check_if_three_code_points_would_start_an_ident_sequen } } -final class WP_CSS_ID_Selector extends WP_CSS_Selector_Parser { +final class WP_CSS_ID_Selector extends WP_CSS_Selector_Parser implements IWP_CSS_Selector_Parser, IWP_CSS_Selector_Matcher { + /** @var string */ public $ident; @@ -591,7 +601,7 @@ public function matches( WP_HTML_Processor $processor ): bool { } } -final class WP_CSS_Class_Selector extends WP_CSS_Selector_Parser { +final class WP_CSS_Class_Selector extends WP_CSS_Selector_Parser implements IWP_CSS_Selector_Parser, IWP_CSS_Selector_Matcher { public function matches( WP_HTML_Processor $processor ): bool { return (bool) $processor->has_class( $this->ident ); } @@ -629,7 +639,7 @@ public static function parse( string $input, int &$offset ): ?self { } } -final class WP_CSS_Type_Selector extends WP_CSS_Selector_Parser { +final class WP_CSS_Type_Selector extends WP_CSS_Selector_Parser implements IWP_CSS_Selector_Parser, IWP_CSS_Selector_Matcher { public function matches( WP_HTML_Processor $processor ): bool { if ( '*' === $this->ident ) { return true; @@ -681,7 +691,7 @@ public static function parse( string $input, int &$offset ): ?self { } } -final class WP_CSS_Attribute_Selector extends WP_CSS_Selector_Parser { +final class WP_CSS_Attribute_Selector extends WP_CSS_Selector_Parser implements IWP_CSS_Selector_Parser, IWP_CSS_Selector_Matcher { public function matches( WP_HTML_Processor $processor ): bool { $att_value = $processor->get_attribute( $this->name ); if ( null === $att_value ) { @@ -990,7 +1000,7 @@ public static function parse( string $input, int &$offset ): ?self { * * > = [ ? * ]! */ -final class WP_CSS_Selector extends WP_CSS_Selector_Parser { +final class WP_CSS_Selector extends WP_CSS_Selector_Parser implements IWP_CSS_Selector_Parser, IWP_CSS_Selector_Matcher { public function matches( WP_HTML_Processor $processor ): bool { if ( $this->type_selector ) { if ( ! $this->type_selector->matches( $processor ) ) { @@ -1013,6 +1023,10 @@ public function matches( WP_HTML_Processor $processor ): bool { /** @var array|null */ public $subclass_selectors; + /** + * @param WP_CSS_Type_Selector|null $type_selector + * @param array $subclass_selectors + */ private function __construct( ?WP_CSS_Type_Selector $type_selector, array $subclass_selectors ) { $this->type_selector = $type_selector; $this->subclass_selectors = array() === $subclass_selectors ? null : $subclass_selectors; @@ -1071,7 +1085,7 @@ private static function parse_subclass_selector( string $input, int &$offset ) { * * > = [ ? ]* */ -final class WP_CSS_Complex_Selector extends WP_CSS_Selector_Parser { +final class WP_CSS_Complex_Selector extends WP_CSS_Selector_Parser implements IWP_CSS_Selector_Parser, IWP_CSS_Selector_Matcher { public function matches( WP_HTML_Processor $processor ): bool { if ( count( $this->selectors ) === 1 ) { return $this->selectors[0]->matches( $processor ); @@ -1091,6 +1105,7 @@ public function matches( WP_HTML_Processor $processor ): bool { * This only looks at breadcrumbs and can therefore only support type selectors. * * @param array $selectors + * @param array $breadcrumbs */ private function explore_matches( array $selectors, array $breadcrumbs ): bool { if ( array() === $selectors ) { @@ -1139,10 +1154,13 @@ private function explore_matches( array $selectors, array $breadcrumbs ): bool { * In reverse order to match the current element and then work up the tree. * Any non-final selector is a type selector. * - * @var array + * @var array */ public $selectors = array(); + /** + * @param array $selectors + */ private function __construct( array $selectors ) { $this->selectors = array_reverse( $selectors ); } From 58c1698b16a55ac3d9bc92c35b4c2346e43b67c7 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 29 Nov 2024 17:40:46 +0100 Subject: [PATCH 064/129] update tests --- .../phpunit/tests/html-api/wpCssSelectors.php | 24 ++++++++++++------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/tests/phpunit/tests/html-api/wpCssSelectors.php b/tests/phpunit/tests/html-api/wpCssSelectors.php index 33ada4ccbe3f9..5983f91c5d9ba 100644 --- a/tests/phpunit/tests/html-api/wpCssSelectors.php +++ b/tests/phpunit/tests/html-api/wpCssSelectors.php @@ -309,8 +309,12 @@ public static function data_attribute_selectors(): array { '[href=foo]' => array( '[href=foo]', 'href', WP_CSS_Attribute_Selector::MATCH_EXACT, 'foo', null, '' ), '[href \n = bar ]' => array( "[href \n = bar ]", 'href', WP_CSS_Attribute_Selector::MATCH_EXACT, 'bar', null, '' ), '[href \n ^= baz ]' => array( "[href \n ^= baz ]", 'href', WP_CSS_Attribute_Selector::MATCH_PREFIXED_BY, 'baz', null, '' ), + '[match $= insensitive i]' => array( '[match $= insensitive i]', 'match', WP_CSS_Attribute_Selector::MATCH_SUFFIXED_BY, 'insensitive', WP_CSS_Attribute_Selector::MODIFIER_CASE_INSENSITIVE, '' ), '[match|=sensitive s]' => array( '[match|=sensitive s]', 'match', WP_CSS_Attribute_Selector::MATCH_EXACT_OR_EXACT_WITH_HYPHEN, 'sensitive', WP_CSS_Attribute_Selector::MODIFIER_CASE_SENSITIVE, '' ), + '[att=val I]' => array( '[att=val I]', 'att', WP_CSS_Attribute_Selector::MATCH_EXACT, 'val', WP_CSS_Attribute_Selector::MODIFIER_CASE_INSENSITIVE, '' ), + '[att=val S]' => array( '[att=val S]', 'att', WP_CSS_Attribute_Selector::MATCH_EXACT, 'val', WP_CSS_Attribute_Selector::MODIFIER_CASE_SENSITIVE, '' ), + '[match~="quoted[][]"]' => array( '[match~="quoted[][]"]', 'match', WP_CSS_Attribute_Selector::MATCH_ONE_OF_EXACT, 'quoted[][]', null, '' ), "[match$='quoted!{}']" => array( "[match$='quoted!{}']", 'match', WP_CSS_Attribute_Selector::MATCH_SUFFIXED_BY, 'quoted!{}', null, '' ), "[match*='quoted's]" => array( "[match*='quoted's]", 'match', WP_CSS_Attribute_Selector::MATCH_CONTAINS, 'quoted', WP_CSS_Attribute_Selector::MODIFIER_CASE_SENSITIVE, '' ), @@ -332,8 +336,6 @@ public static function data_attribute_selectors(): array { 'Invalid: [att=val ' => array( '[att=val ' ), 'Invalid: [att i]' => array( '[att i]' ), 'Invalid: [att s]' => array( '[att s]' ), - 'Invalid: [att="val" I]' => array( '[att="val" I]' ), - 'Invalid: [att="val" S]' => array( '[att="val" S]' ), "Invalid: [att='val\\n']" => array( "[att='val\n']" ), 'Invalid: [att=val i ' => array( '[att=val i ' ), ); @@ -371,17 +373,21 @@ public function test_parse_empty_selector() { * @ticket TBD */ public function test_parse_complex_selector() { - $input = 'el.foo#bar[baz=quux] > .child , rest'; + $input = 'el1 > .child#bar[baz=quux] , rest'; $offset = 0; $sel = WP_CSS_Complex_Selector::parse( $input, $offset ); $this->assertSame( 3, count( $sel->selectors ) ); - $this->assertNotNull( $sel->selectors[0]->type_selector ); - $this->assertSame( 3, count( $sel->selectors[0]->subclass_selectors ) ); + + $this->assertSame( 'el1', $sel->selectors[2]->type_selector->ident ); + $this->assertNull( $sel->selectors[2]->subclass_selectors ); + $this->assertSame( WP_CSS_Complex_Selector::COMBINATOR_CHILD, $sel->selectors[1] ); - $this->assertNull( $sel->selectors[2]->type_selector ); - $this->assertSame( 1, count( $sel->selectors[2]->subclass_selectors ) ); - $this->assertSame( 'child', $sel->selectors[2]->subclass_selectors[0]->ident ); + + $this->assertSame( 3, count( $sel->selectors[0]->subclass_selectors ) ); + $this->assertNull( $sel->selectors[0]->type_selector ); + $this->assertSame( 3, count( $sel->selectors[0]->subclass_selectors ) ); + $this->assertSame( 'child', $sel->selectors[0]->subclass_selectors[0]->ident ); $this->assertSame( ', rest', substr( $input, $offset ) ); } @@ -408,7 +414,7 @@ public function test_parse_empty_complex_selector() { * @ticket TBD */ public function test_parse_selector_list() { - $input = 'el.foo#bar[baz=quux] .descendent , rest'; + $input = 'el1 el2 el.foo#bar[baz=quux], rest'; $result = WP_CSS_Selector_List::from_selectors( $input ); $this->assertNotNull( $result ); } From c9b914517674004d8b7c38099325183cf3a592a8 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 29 Nov 2024 17:44:21 +0100 Subject: [PATCH 065/129] Id attribute must be a string to match id selector --- src/wp-includes/html-api/class-wp-css-selectors.php | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-css-selectors.php b/src/wp-includes/html-api/class-wp-css-selectors.php index 28e51aa9a9735..8af33c2194723 100644 --- a/src/wp-includes/html-api/class-wp-css-selectors.php +++ b/src/wp-includes/html-api/class-wp-css-selectors.php @@ -594,9 +594,14 @@ public static function parse( string $input, int &$offset ): ?self { } public function matches( WP_HTML_Processor $processor ): bool { + $id = $processor->get_attribute( 'id' ); + if ( ! is_string( $id ) ) { + return false; + } + $case_insensitive = method_exists( $processor, 'is_quirks_mode' ) && $processor->is_quirks_mode(); return $case_insensitive - ? 0 === strcasecmp( $processor->get_attribute( 'id' ), $this->ident ) + ? 0 === strcasecmp( $id, $this->ident ) : $processor->get_attribute( 'id' ) === $this->ident; } } From e5cac63369f3c7b1a6cdf3c02c097bdae4e3d669 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 29 Nov 2024 17:47:31 +0100 Subject: [PATCH 066/129] Coerce boolean attributes to "" --- src/wp-includes/html-api/class-wp-css-selectors.php | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/wp-includes/html-api/class-wp-css-selectors.php b/src/wp-includes/html-api/class-wp-css-selectors.php index 8af33c2194723..8b92150cbef8f 100644 --- a/src/wp-includes/html-api/class-wp-css-selectors.php +++ b/src/wp-includes/html-api/class-wp-css-selectors.php @@ -707,6 +707,10 @@ public function matches( WP_HTML_Processor $processor ): bool { return true; } + if ( true === $att_value ) { + $att_value = ''; + } + $case_insensitive = self::MODIFIER_CASE_INSENSITIVE === $this->modifier; switch ( $this->matcher ) { From 2bafae995a64897ec393167e8a7416b74ff8b485 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 29 Nov 2024 17:56:57 +0100 Subject: [PATCH 067/129] Fix a few more static analysis things --- .../html-api/class-wp-css-selectors.php | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-css-selectors.php b/src/wp-includes/html-api/class-wp-css-selectors.php index 8b92150cbef8f..87e32727a434e 100644 --- a/src/wp-includes/html-api/class-wp-css-selectors.php +++ b/src/wp-includes/html-api/class-wp-css-selectors.php @@ -871,6 +871,12 @@ private function whitespace_delimited_list( string $input ): Generator { */ public $modifier; + /** + * @param string $name + * @param null|self::MATCH_* $matcher + * @param null|string $value + * @param null|self::MODIFIER_* $modifier + */ private function __construct( string $name, ?string $matcher = null, ?string $value = null, ?string $modifier = null ) { $this->name = $name; $this->matcher = $matcher; @@ -1092,19 +1098,20 @@ private static function parse_subclass_selector( string $input, int &$offset ) { /** * This corresponds to in the grammar. * - * > = [ ? ]* + * > = [ ? ] * */ final class WP_CSS_Complex_Selector extends WP_CSS_Selector_Parser implements IWP_CSS_Selector_Parser, IWP_CSS_Selector_Matcher { public function matches( WP_HTML_Processor $processor ): bool { - if ( count( $this->selectors ) === 1 ) { - return $this->selectors[0]->matches( $processor ); - } - // First selector must match this location. if ( ! $this->selectors[0]->matches( $processor ) ) { return false; } + if ( count( $this->selectors ) === 1 ) { + return true; + } + + /** @var array $breadcrumbs */ $breadcrumbs = array_slice( array_reverse( $processor->get_breadcrumbs() ), 1 ); $selectors = array_slice( $this->selectors, 1 ); return $this->explore_matches( $selectors, $breadcrumbs ); From 8fe57e393d947c2b8db0ee326cfa7989ade8c801 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 28 Nov 2024 18:04:13 +0100 Subject: [PATCH 068/129] Add select method --- .../html-api/class-wp-html-processor.php | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index e88757ec7b4c2..438dee4c47f4e 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -635,6 +635,44 @@ public function get_unsupported_exception() { return $this->unsupported_exception; } + /** + * Use a selector to advance. + * + * @param string $selectors + * @return Generator|null + */ + public function select_all( string $selectors ): ?Generator { + $select = WP_CSS_Selector_List::from_selectors( $selectors ); + if ( null === $select ) { + return null; + } + + while ( $this->next_tag() ) { + if ( $select->matches( $this ) ) { + yield; + } + } + } + + /** + * Select the next matching element. + * + * If iterating through matching elements, use `select_all` instead. + * + * @param string $selectors + * @return bool|null + */ + public function select( string $selectors ) { + $selection = $this->select_all( $selectors ); + if ( null === $selection ) { + return null; + } + foreach ( $selection as $_ ) { + return true; + } + return false; + } + /** * Finds the next tag matching the $query. * From ab2fe0d78e2f2f54b29dae6ddb36a664f703d476 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 3 Dec 2024 18:20:01 +0100 Subject: [PATCH 069/129] Unify parsing under single class --- .../html-api/class-wp-css-selectors.php | 820 +++++++++--------- .../html-api/class-wp-html-processor.php | 2 +- .../phpunit/tests/html-api/wpCssSelectors.php | 121 ++- 3 files changed, 510 insertions(+), 433 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-css-selectors.php b/src/wp-includes/html-api/class-wp-css-selectors.php index 87e32727a434e..7588eb72294bd 100644 --- a/src/wp-includes/html-api/class-wp-css-selectors.php +++ b/src/wp-includes/html-api/class-wp-css-selectors.php @@ -16,8 +16,8 @@ * * This class is designed for internal use by the HTML processor. * - * This class is instantiated via the `WP_CSS_Selector_List::from_selector( string $selector )` method. - * It accepts a CSS selector string and returns an instance of itself or `null` if the selector + * This class is instantiated via the `WP_CSS_Selector::from_selectors( string $input )` method. + * It takes a CSS selector string and returns an instance of itself or `null` if the selector * is invalid or unsupported. * * A subset of the CSS selector grammar is supported. The grammar is defined in the CSS Syntax @@ -39,7 +39,7 @@ * = '[' ']' | * '[' [ | ] ? ']' * = [ '~' | '|' | '^' | '$' | '*' ]? '=' - * = i | s + * = i | I | s | S * * @link https://www.w3.org/TR/selectors/#grammar Refer to the grammar for more details. * @@ -77,7 +77,7 @@ * @see {@link https://www.w3.org/TR/selectors-4/} * */ -class WP_CSS_Selector_List extends WP_CSS_Selector_Parser implements IWP_CSS_Selector_Matcher { +class WP_CSS_Selector implements IWP_CSS_Selector_Matcher { public function matches( WP_HTML_Processor $processor ): bool { if ( $processor->get_token_type() !== '#tag' ) { return false; @@ -97,34 +97,25 @@ public function matches( WP_HTML_Processor $processor ): bool { private $selectors; /** + * Constructor. + * * @param array $selectors */ - private function __construct( array $selectors ) { + protected function __construct( array $selectors ) { $this->selectors = $selectors; } /** - * Takes a CSS selectors string and returns an instance of itself or `null` if the selector - * is invalid or unsupported. - * - * @since TBD - * - * @param string $selectors CSS selectors string. - * @return self|null - */ - public static function from_selectors( string $selectors ): ?self { - return self::parse( $selectors ); - } - - /** - * Returns a list of selectors. + * Takes a CSS selector string and returns an instance of itself or `null` if the selector + * string is invalid or unsupported. * * @since TBD * + * @param string $input CSS selectors. * @return self|null */ - private static function parse( string $input ) { - // > A selector string is a list of one or more complex selectors ([SELECTORS4], section 3.1) that may be surrounded by whitespace and matches the dom_selectors_group production. + public static function from_selectors( string $input ): ?self { + // > A selector string is a list of one or more complex selectors ([SELECTORS4], section 3.1) that may be surrounded by whitespace… $input = trim( $input, " \t\r\n\r" ); if ( '' === $input ) { @@ -146,7 +137,7 @@ private static function parse( string $input ) { $offset = 0; - $selector = WP_CSS_Complex_Selector::parse( $input, $offset ); + $selector = self::parse_complex_selector( $input, $offset ); if ( null === $selector ) { return null; } @@ -160,7 +151,7 @@ private static function parse( string $input ) { } ++$offset; self::parse_whitespace( $input, $offset ); - $selector = WP_CSS_Complex_Selector::parse( $input, $offset ); + $selector = self::parse_complex_selector( $input, $offset ); if ( null === $selector ) { return null; } @@ -170,23 +161,343 @@ private static function parse( string $input ) { return new self( $selectors ); } -} -interface IWP_CSS_Selector_Matcher { + /* + * ------------------------------ + * Selector parsing functionality + * ------------------------------ + */ + /** - * @return bool + * Parse an ID selector + * + * > = + * + * https://www.w3.org/TR/selectors/#grammar + * + * @return WP_CSS_ID_Selector|null */ - public function matches( WP_HTML_Processor $processor ): bool; -} + final protected static function parse_id_selector( string $input, int &$offset ): ?WP_CSS_ID_Selector { + $ident = self::parse_hash_token( $input, $offset ); + if ( null === $ident ) { + return null; + } + return new WP_CSS_ID_Selector( $ident ); + } -interface IWP_CSS_Selector_Parser { /** - * @return static|null + * Parse a class selector + * + * > = '.' + * + * https://www.w3.org/TR/selectors/#grammar + * + * @return WP_CSS_Class_Selector|null + */ + final protected static function parse_class_selector( string $input, int &$offset ): ?WP_CSS_Class_Selector { + if ( $offset + 1 >= strlen( $input ) || '.' !== $input[ $offset ] ) { + return null; + } + + $updated_offset = $offset + 1; + $result = self::parse_ident( $input, $updated_offset ); + + if ( null === $result ) { + return null; + } + + $offset = $updated_offset; + return new WP_CSS_Class_Selector( $result ); + } + + /** + * Parse a type selector + * + * > = | ? '*' + * > = [ | '*' ]? '|' + * > = ? + * + * Namespaces (e.g. |div, *|div, or namespace|div) are not supported, + * so this selector effectively matches * or ident. + * + * https://www.w3.org/TR/selectors/#grammar + * + * @return WP_CSS_Type_Selector|null + */ + final protected static function parse_type_selector( string $input, int &$offset ): ?WP_CSS_Type_Selector { + if ( $offset >= strlen( $input ) ) { + return null; + } + + if ( '*' === $input[ $offset ] ) { + ++$offset; + return new WP_CSS_Type_Selector( '*' ); + } + + $result = self::parse_ident( $input, $offset ); + if ( null === $result ) { + return null; + } + + return new WP_CSS_Type_Selector( $result ); + } + + /** + * Parse an attribute selector + * + * > = '[' ']' | + * > '[' [ | ] ? ']' + * > = [ '~' | '|' | '^' | '$' | '*' ]? '=' + * > = i | s + * > = ? + * + * Namespaces are not supported, so attribute names are effectively identifiers. + * + * https://www.w3.org/TR/selectors/#grammar + * + * @return WP_CSS_Attribute_Selector|null + */ + final protected static function parse_attribute_selector( string $input, int &$offset ): ?WP_CSS_Attribute_Selector { + // Need at least 3 bytes [x] + if ( $offset + 2 >= strlen( $input ) ) { + return null; + } + + $updated_offset = $offset; + + if ( '[' !== $input[ $updated_offset ] ) { + return null; + } + ++$updated_offset; + + self::parse_whitespace( $input, $updated_offset ); + $attr_name = self::parse_ident( $input, $updated_offset ); + if ( null === $attr_name ) { + return null; + } + self::parse_whitespace( $input, $updated_offset ); + + if ( $updated_offset >= strlen( $input ) ) { + return null; + } + + if ( ']' === $input[ $updated_offset ] ) { + $offset = $updated_offset + 1; + return new WP_CSS_Attribute_Selector( $attr_name ); + } + + // need to match at least `=x]` at this point + if ( $updated_offset + 3 >= strlen( $input ) ) { + return null; + } + + if ( '=' === $input[ $updated_offset ] ) { + ++$updated_offset; + $attr_matcher = WP_CSS_Attribute_Selector::MATCH_EXACT; + } elseif ( '=' === $input[ $updated_offset + 1 ] ) { + switch ( $input[ $updated_offset ] ) { + case '~': + $attr_matcher = WP_CSS_Attribute_Selector::MATCH_ONE_OF_EXACT; + $updated_offset += 2; + break; + case '|': + $attr_matcher = WP_CSS_Attribute_Selector::MATCH_EXACT_OR_EXACT_WITH_HYPHEN; + $updated_offset += 2; + break; + case '^': + $attr_matcher = WP_CSS_Attribute_Selector::MATCH_PREFIXED_BY; + $updated_offset += 2; + break; + case '$': + $attr_matcher = WP_CSS_Attribute_Selector::MATCH_SUFFIXED_BY; + $updated_offset += 2; + break; + case '*': + $attr_matcher = WP_CSS_Attribute_Selector::MATCH_CONTAINS; + $updated_offset += 2; + break; + default: + return null; + } + } else { + return null; + } + + self::parse_whitespace( $input, $updated_offset ); + $attr_val = + self::parse_string( $input, $updated_offset ) ?? + self::parse_ident( $input, $updated_offset ); + + if ( null === $attr_val ) { + return null; + } + + self::parse_whitespace( $input, $updated_offset ); + if ( $updated_offset >= strlen( $input ) ) { + return null; + } + + $attr_modifier = null; + switch ( $input[ $updated_offset ] ) { + case 'i': + case 'I': + $attr_modifier = WP_CSS_Attribute_Selector::MODIFIER_CASE_INSENSITIVE; + ++$updated_offset; + break; + + case 's': + case 'S': + $attr_modifier = WP_CSS_Attribute_Selector::MODIFIER_CASE_SENSITIVE; + ++$updated_offset; + break; + } + + if ( null !== $attr_modifier ) { + self::parse_whitespace( $input, $updated_offset ); + if ( $updated_offset >= strlen( $input ) ) { + return null; + } + } + + if ( ']' === $input[ $updated_offset ] ) { + $offset = $updated_offset + 1; + return new WP_CSS_Attribute_Selector( $attr_name, $attr_matcher, $attr_val, $attr_modifier ); + } + + return null; + } + + /** + * Parses a compound selector. + * + * > = [ ? * ]! + * + * @return WP_CSS_Compound_Selector|null + */ + final protected static function parse_compound_selector( string $input, int &$offset ): ?WP_CSS_Compound_Selector { + if ( $offset >= strlen( $input ) ) { + return null; + } + + $updated_offset = $offset; + $type_selector = self::parse_type_selector( $input, $updated_offset ); + + $subclass_selectors = array(); + $last_parsed_subclass_selector = self::parse_subclass_selector( $input, $updated_offset ); + while ( null !== $last_parsed_subclass_selector ) { + $subclass_selectors[] = $last_parsed_subclass_selector; + $last_parsed_subclass_selector = self::parse_subclass_selector( $input, $updated_offset ); + } + + if ( null !== $type_selector || array() !== $subclass_selectors ) { + $offset = $updated_offset; + return new WP_CSS_Compound_Selector( $type_selector, $subclass_selectors ); + } + return null; + } + + /** + * Parses a complex selector. + * + * > = [ ? ]* + * + * @return WP_CSS_Complex_Selector|null + */ + final protected static function parse_complex_selector( string $input, int &$offset ): ?WP_CSS_Complex_Selector { + if ( $offset >= strlen( $input ) ) { + return null; + } + + $updated_offset = $offset; + $selector = self::parse_compound_selector( $input, $updated_offset ); + if ( null === $selector ) { + return null; + } + + $selectors = array( $selector ); + $has_preceding_subclass_selector = null !== $selector->subclass_selectors; + + $found_whitespace = self::parse_whitespace( $input, $updated_offset ); + while ( $updated_offset < strlen( $input ) ) { + if ( + WP_CSS_Complex_Selector::COMBINATOR_CHILD === $input[ $updated_offset ] || + WP_CSS_Complex_Selector::COMBINATOR_NEXT_SIBLING === $input[ $updated_offset ] || + WP_CSS_Complex_Selector::COMBINATOR_SUBSEQUENT_SIBLING === $input[ $updated_offset ] + ) { + $combinator = $input[ $updated_offset ]; + ++$updated_offset; + self::parse_whitespace( $input, $updated_offset ); + + // Failure to find a selector here is a parse error + $selector = self::parse_compound_selector( $input, $updated_offset ); + } elseif ( $found_whitespace ) { + /* + * Whitespace is ambiguous, it could be a descendant combinator or + * insignificant whitespace. + */ + $selector = self::parse_compound_selector( $input, $updated_offset ); + if ( null === $selector ) { + break; + } + $combinator = WP_CSS_Complex_Selector::COMBINATOR_DESCENDANT; + } else { + break; + } + + if ( null === $selector ) { + return null; + } + + // `div > .className` is valid, but `.className > div` is not. + if ( $has_preceding_subclass_selector ) { + throw new Exception( 'Unsupported non-final subclass selector.' ); + } + $has_preceding_subclass_selector = null !== $selector->subclass_selectors; + + $selectors[] = $combinator; + $selectors[] = $selector; + + $found_whitespace = self::parse_whitespace( $input, $updated_offset ); + } + $offset = $updated_offset; + return new WP_CSS_Complex_Selector( $selectors ); + } + + /** + * Parses a subclass selector. + * + * > = | | + * + * @return WP_CSS_ID_Selector|WP_CSS_Class_Selector|WP_CSS_Attribute_Selector|null + */ + private static function parse_subclass_selector( string $input, int &$offset ) { + if ( $offset >= strlen( $input ) ) { + return null; + } + + $next_char = $input[ $offset ]; + return '.' === $next_char + ? self::parse_class_selector( $input, $offset ) + : ( + '#' === $next_char + ? self::parse_id_selector( $input, $offset ) + : ( '[' === $next_char + ? self::parse_attribute_selector( $input, $offset ) + : null + ) + ); + } + + + /* + * ------------------------ + * Selector partial parsing + * ------------------------ + * + * These functions consume parts of a selector string input when successful + * and return meaningful values to be used by selectors. */ - public static function parse( string $input, int &$offset ); -} -abstract class WP_CSS_Selector_Parser { const UTF8_MAX_CODEPOINT_VALUE = 0x10FFFF; const WHITESPACE_CHARACTERS = " \t\r\n\f"; @@ -212,7 +523,7 @@ public static function parse_whitespace( string $input, int &$offset ): bool { * * This implementation is not interested in the , a '#' delim token is not relevant for selectors. */ - protected static function parse_hash_token( string $input, int &$offset ): ?string { + final protected static function parse_hash_token( string $input, int &$offset ): ?string { if ( $offset + 1 >= strlen( $input ) || '#' !== $input[ $offset ] ) { return null; } @@ -253,7 +564,7 @@ protected static function parse_hash_token( string $input, int &$offset ): ?stri * * @return string|null */ - protected static function parse_ident( string $input, int &$offset ): ?string { + final protected static function parse_ident( string $input, int &$offset ): ?string { if ( ! self::check_if_three_code_points_would_start_an_ident_sequence( $input, $offset ) ) { return null; } @@ -312,7 +623,7 @@ protected static function parse_ident( string $input, int &$offset ): ?string { * * @return string|null */ - protected static function parse_string( string $input, int &$offset ): ?string { + final protected static function parse_string( string $input, int &$offset ): ?string { if ( $offset + 1 >= strlen( $input ) ) { return null; } @@ -388,16 +699,24 @@ protected static function parse_string( string $input, int &$offset ): ?string { * @param int $offset * @return string|null */ - protected static function consume_escaped_codepoint( $input, &$offset ): ?string { + final protected static function consume_escaped_codepoint( $input, &$offset ): ?string { $hex_length = strspn( $input, '0123456789abcdefABCDEF', $offset, 6 ); if ( $hex_length > 0 ) { + /** + * The 6-character hex string has a maximum value of 0xFFFFFF. + * It is likely to fit in an int value and not be a float. + * + * @var int + */ $codepoint_value = hexdec( substr( $input, $offset, $hex_length ) ); - // > A surrogate is a leading surrogate or a trailing surrogate. - // > A leading surrogate is a code point that is in the range U+D800 to U+DBFF, inclusive. - // > A trailing surrogate is a code point that is in the range U+DC00 to U+DFFF, inclusive. - // The surrogate ranges are adjacent, so the complete range is 0xD800..=0xDFFF, - // inclusive. + /* + * > A surrogate is a leading surrogate or a trailing surrogate. + * > A leading surrogate is a code point that is in the range U+D800 to U+DBFF, inclusive. + * > A trailing surrogate is a code point that is in the range U+DC00 to U+DFFF, inclusive. + * + * The surrogate ranges are adjacent, so the complete range is 0xD800 to 0xDFFF, inclusive. + */ $codepoint_char = ( 0 === $codepoint_value || $codepoint_value > self::UTF8_MAX_CODEPOINT_VALUE || @@ -428,13 +747,16 @@ protected static function consume_escaped_codepoint( $input, &$offset ): ?string } /* - * Utiltities - * ========== + * --------------------------- + * Selector parsing utiltities + * --------------------------- * - * The following functions do not consume any input. + * The following functions are used for parsing but do not consume any input. */ /** + * Checks for two valid escape codepoints. + * * > 4.3.8. Check if two code points are a valid escape * > This section describes how to check if two code points are a valid escape. The algorithm described here can be called explicitly with two code points, or can be called with the input stream itself. In the latter case, the two code points in question are the current input code point and the next input code point, in that order. * > @@ -449,8 +771,12 @@ protected static function consume_escaped_codepoint( $input, &$offset ): ?string * https://www.w3.org/TR/css-syntax-3/#starts-with-a-valid-escape * * @todo this does not check whether the second codepoint is valid. + * + * @param string $input The input string. + * @param int $offset The byte offset in the string. + * @return bool True if the next two codepoints are a valid escape, otherwise false. */ - protected static function next_two_are_valid_escape( string $input, int $offset ): bool { + private static function next_two_are_valid_escape( string $input, int $offset ): bool { if ( $offset + 1 >= strlen( $input ) ) { return false; } @@ -458,7 +784,7 @@ protected static function next_two_are_valid_escape( string $input, int $offset } /** - * Check if the next code point is an "ident start code point". + * Checks if the next code point is an "ident start code point". * * Caution! This method does not do any bounds checking, it should not be passed * a string with an offset that is out of bounds. @@ -474,9 +800,13 @@ protected static function next_two_are_valid_escape( string $input, int $offset * > non-ASCII code point * > A code point with a value equal to or greater than U+0080 . * - * https://www.w3.org/TR/css-syntax-3/#ident-start-code-point + * @link https://www.w3.org/TR/css-syntax-3/#ident-start-code-point + * + * @param string $input The input string. + * @param int $offset The byte offset in the string. + * @return bool True if the next codepoint is an ident start code point, otherwise false. */ - protected static function is_ident_start_codepoint( string $input, int $offset ): bool { + final protected static function is_ident_start_codepoint( string $input, int $offset ): bool { return ( '_' === $input[ $offset ] || ( 'a' <= $input[ $offset ] && $input[ $offset ] <= 'z' ) || @@ -486,7 +816,7 @@ protected static function is_ident_start_codepoint( string $input, int $offset ) } /** - * Check if the next code point is an "ident code point". + * Checks if the next code point is an "ident code point". * * Caution! This method does not do any bounds checking, it should not be passed * a string with an offset that is out of bounds. @@ -496,15 +826,21 @@ protected static function is_ident_start_codepoint( string $input, int $offset ) * > digit * > A code point between U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9) inclusive. * - * https://www.w3.org/TR/css-syntax-3/#ident-code-point + * @link https://www.w3.org/TR/css-syntax-3/#ident-code-point + * + * @param string $input The input string. + * @param int $offset The byte offset in the string. + * @return bool True if the next codepoint is an ident code point, otherwise false. */ - protected static function is_ident_codepoint( string $input, int $offset ): bool { + final protected static function is_ident_codepoint( string $input, int $offset ): bool { return '-' === $input[ $offset ] || ( '0' <= $input[ $offset ] && $input[ $offset ] <= '9' ) || self::is_ident_start_codepoint( $input, $offset ); } /** + * Checks if three code points would start an ident sequence. + * * > 4.3.9. Check if three code points would start an ident sequence * > This section describes how to check if three code points would start an ident sequence. The algorithm described here can be called explicitly with three code points, or can be called with the input stream itself. In the latter case, the three code points in question are the current input code point and the next two input code points, in that order. * > @@ -521,9 +857,13 @@ protected static function is_ident_codepoint( string $input, int $offset ): bool * > anything else * > Return false. * - * https://www.w3.org/TR/css-syntax-3/#would-start-an-identifier + * @link https://www.w3.org/TR/css-syntax-3/#would-start-an-identifier + * + * @param string $input The input string. + * @param int $offset The byte offset in the string. + * @return bool True if the next three codepoints would start an ident sequence, otherwise false. */ - protected static function check_if_three_code_points_would_start_an_ident_sequence( string $input, int $offset ): bool { + private static function check_if_three_code_points_would_start_an_ident_sequence( string $input, int $offset ): bool { if ( $offset >= strlen( $input ) ) { return false; } @@ -567,32 +907,21 @@ protected static function check_if_three_code_points_would_start_an_ident_sequen } } -final class WP_CSS_ID_Selector extends WP_CSS_Selector_Parser implements IWP_CSS_Selector_Parser, IWP_CSS_Selector_Matcher { +interface IWP_CSS_Selector_Matcher { + /** + * @return bool + */ + public function matches( WP_HTML_Processor $processor ): bool; +} +final class WP_CSS_ID_Selector implements IWP_CSS_Selector_Matcher { /** @var string */ public $ident; - private function __construct( string $ident ) { + public function __construct( string $ident ) { $this->ident = $ident; } - /** - * Parse an ID selector - * - * > = - * - * https://www.w3.org/TR/selectors/#grammar - * - * @return self|null - */ - public static function parse( string $input, int &$offset ): ?self { - $ident = self::parse_hash_token( $input, $offset ); - if ( null === $ident ) { - return null; - } - return new self( $ident ); - } - public function matches( WP_HTML_Processor $processor ): bool { $id = $processor->get_attribute( 'id' ); if ( ! is_string( $id ) ) { @@ -606,50 +935,29 @@ public function matches( WP_HTML_Processor $processor ): bool { } } -final class WP_CSS_Class_Selector extends WP_CSS_Selector_Parser implements IWP_CSS_Selector_Parser, IWP_CSS_Selector_Matcher { - public function matches( WP_HTML_Processor $processor ): bool { - return (bool) $processor->has_class( $this->ident ); - } - - /** @var string */ - public $ident; - - private function __construct( string $ident ) { - $this->ident = $ident; - } - - /** - * Parse a class selector - * - * > = '.' - * - * https://www.w3.org/TR/selectors/#grammar - * - * @return self|null - */ - public static function parse( string $input, int &$offset ): ?self { - if ( $offset + 1 >= strlen( $input ) || '.' !== $input[ $offset ] ) { - return null; - } - - $updated_offset = $offset + 1; - $result = self::parse_ident( $input, $updated_offset ); - - if ( null === $result ) { - return null; - } +final class WP_CSS_Class_Selector implements IWP_CSS_Selector_Matcher { + public function matches( WP_HTML_Processor $processor ): bool { + return (bool) $processor->has_class( $this->ident ); + } - $offset = $updated_offset; - return new self( $result ); + /** @var string */ + public $ident; + + public function __construct( string $ident ) { + $this->ident = $ident; } } -final class WP_CSS_Type_Selector extends WP_CSS_Selector_Parser implements IWP_CSS_Selector_Parser, IWP_CSS_Selector_Matcher { +final class WP_CSS_Type_Selector implements IWP_CSS_Selector_Matcher { public function matches( WP_HTML_Processor $processor ): bool { + $tag_name = $processor->get_tag(); + if ( null === $tag_name ) { + return false; + } if ( '*' === $this->ident ) { return true; } - return 0 === strcasecmp( $processor->get_tag(), $this->ident ); + return 0 === strcasecmp( $tag_name, $this->ident ); } /** @@ -659,44 +967,12 @@ public function matches( WP_HTML_Processor $processor ): bool { */ public $ident; - private function __construct( string $ident ) { + public function __construct( string $ident ) { $this->ident = $ident; } - - /** - * Parse a type selector - * - * > = | ? '*' - * > = [ | '*' ]? '|' - * > = ? - * - * Namespaces (e.g. |div, *|div, or namespace|div) are not supported, - * so this selector effectively matches * or ident. - * - * https://www.w3.org/TR/selectors/#grammar - * - * @return self|null - */ - public static function parse( string $input, int &$offset ): ?self { - if ( $offset >= strlen( $input ) ) { - return null; - } - - if ( '*' === $input[ $offset ] ) { - ++$offset; - return new self( '*' ); - } - - $result = self::parse_ident( $input, $offset ); - if ( null === $result ) { - return null; - } - - return new self( $result ); - } } -final class WP_CSS_Attribute_Selector extends WP_CSS_Selector_Parser implements IWP_CSS_Selector_Parser, IWP_CSS_Selector_Matcher { +final class WP_CSS_Attribute_Selector implements IWP_CSS_Selector_Matcher { public function matches( WP_HTML_Processor $processor ): bool { $att_value = $processor->get_attribute( $this->name ); if ( null === $att_value ) { @@ -772,17 +1048,17 @@ public function matches( WP_HTML_Processor $processor ): bool { * @return Generator */ private function whitespace_delimited_list( string $input ): Generator { - $offset = strspn( $input, self::WHITESPACE_CHARACTERS ); + $offset = strspn( $input, WP_CSS_Selector::WHITESPACE_CHARACTERS ); while ( $offset < strlen( $input ) ) { // Find the byte length until the next boundary. - $length = strcspn( $input, self::WHITESPACE_CHARACTERS, $offset ); + $length = strcspn( $input, WP_CSS_Selector::WHITESPACE_CHARACTERS, $offset ); if ( 0 === $length ) { return; } $value = substr( $input, $offset, $length ); - $offset += $length + strspn( $input, self::WHITESPACE_CHARACTERS, $offset + $length ); + $offset += $length + strspn( $input, WP_CSS_Selector::WHITESPACE_CHARACTERS, $offset + $length ); yield $value; } @@ -877,137 +1153,12 @@ private function whitespace_delimited_list( string $input ): Generator { * @param null|string $value * @param null|self::MODIFIER_* $modifier */ - private function __construct( string $name, ?string $matcher = null, ?string $value = null, ?string $modifier = null ) { + public function __construct( string $name, ?string $matcher = null, ?string $value = null, ?string $modifier = null ) { $this->name = $name; $this->matcher = $matcher; $this->value = $value; $this->modifier = $modifier; } - - /** - * Parse a attribute selector - * - * > = '[' ']' | - * > '[' [ | ] ? ']' - * > = [ '~' | '|' | '^' | '$' | '*' ]? '=' - * > = i | s - * > = ? - * - * Namespaces are not supported, so attribute names are effectively identifiers. - * - * https://www.w3.org/TR/selectors/#grammar - * - * @return self|null - */ - public static function parse( string $input, int &$offset ): ?self { - // Need at least 3 bytes [x] - if ( $offset + 2 >= strlen( $input ) ) { - return null; - } - - $updated_offset = $offset; - - if ( '[' !== $input[ $updated_offset ] ) { - return null; - } - ++$updated_offset; - - self::parse_whitespace( $input, $updated_offset ); - $attr_name = self::parse_ident( $input, $updated_offset ); - if ( null === $attr_name ) { - return null; - } - self::parse_whitespace( $input, $updated_offset ); - - if ( $updated_offset >= strlen( $input ) ) { - return null; - } - - if ( ']' === $input[ $updated_offset ] ) { - $offset = $updated_offset + 1; - return new self( $attr_name ); - } - - // need to match at least `=x]` at this point - if ( $updated_offset + 3 >= strlen( $input ) ) { - return null; - } - - if ( '=' === $input[ $updated_offset ] ) { - ++$updated_offset; - $attr_matcher = WP_CSS_Attribute_Selector::MATCH_EXACT; - } elseif ( '=' === $input[ $updated_offset + 1 ] ) { - switch ( $input[ $updated_offset ] ) { - case '~': - $attr_matcher = WP_CSS_Attribute_Selector::MATCH_ONE_OF_EXACT; - $updated_offset += 2; - break; - case '|': - $attr_matcher = WP_CSS_Attribute_Selector::MATCH_EXACT_OR_EXACT_WITH_HYPHEN; - $updated_offset += 2; - break; - case '^': - $attr_matcher = WP_CSS_Attribute_Selector::MATCH_PREFIXED_BY; - $updated_offset += 2; - break; - case '$': - $attr_matcher = WP_CSS_Attribute_Selector::MATCH_SUFFIXED_BY; - $updated_offset += 2; - break; - case '*': - $attr_matcher = WP_CSS_Attribute_Selector::MATCH_CONTAINS; - $updated_offset += 2; - break; - default: - return null; - } - } else { - return null; - } - - self::parse_whitespace( $input, $updated_offset ); - $attr_val = - self::parse_string( $input, $updated_offset ) ?? - self::parse_ident( $input, $updated_offset ); - - if ( null === $attr_val ) { - return null; - } - - self::parse_whitespace( $input, $updated_offset ); - if ( $updated_offset >= strlen( $input ) ) { - return null; - } - - $attr_modifier = null; - switch ( $input[ $updated_offset ] ) { - case 'i': - case 'I': - $attr_modifier = WP_CSS_Attribute_Selector::MODIFIER_CASE_INSENSITIVE; - ++$updated_offset; - break; - - case 's': - case 'S': - $attr_modifier = WP_CSS_Attribute_Selector::MODIFIER_CASE_SENSITIVE; - ++$updated_offset; - break; - } - - if ( null !== $attr_modifier ) { - self::parse_whitespace( $input, $updated_offset ); - if ( $updated_offset >= strlen( $input ) ) { - return null; - } - } - - if ( ']' === $input[ $updated_offset ] ) { - $offset = $updated_offset + 1; - return new self( $attr_name, $attr_matcher, $attr_val, $attr_modifier ); - } - - return null; - } } /** @@ -1015,7 +1166,7 @@ public static function parse( string $input, int &$offset ): ?self { * * > = [ ? * ]! */ -final class WP_CSS_Selector extends WP_CSS_Selector_Parser implements IWP_CSS_Selector_Parser, IWP_CSS_Selector_Matcher { +final class WP_CSS_Compound_Selector implements IWP_CSS_Selector_Matcher { public function matches( WP_HTML_Processor $processor ): bool { if ( $this->type_selector ) { if ( ! $this->type_selector->matches( $processor ) ) { @@ -1042,65 +1193,18 @@ public function matches( WP_HTML_Processor $processor ): bool { * @param WP_CSS_Type_Selector|null $type_selector * @param array $subclass_selectors */ - private function __construct( ?WP_CSS_Type_Selector $type_selector, array $subclass_selectors ) { + public function __construct( ?WP_CSS_Type_Selector $type_selector, array $subclass_selectors ) { $this->type_selector = $type_selector; $this->subclass_selectors = array() === $subclass_selectors ? null : $subclass_selectors; } - - /** - * > = [ ? * ]! - */ - public static function parse( string $input, int &$offset ): ?self { - if ( $offset >= strlen( $input ) ) { - return null; - } - - $updated_offset = $offset; - $type_selector = WP_CSS_Type_Selector::parse( $input, $updated_offset ); - - $subclass_selectors = array(); - $last_parsed_subclass_selector = self::parse_subclass_selector( $input, $updated_offset ); - while ( null !== $last_parsed_subclass_selector ) { - $subclass_selectors[] = $last_parsed_subclass_selector; - $last_parsed_subclass_selector = self::parse_subclass_selector( $input, $updated_offset ); - } - - if ( null !== $type_selector || array() !== $subclass_selectors ) { - $offset = $updated_offset; - return new self( $type_selector, $subclass_selectors ); - } - return null; - } - - /** - * @return WP_CSS_ID_Selector|WP_CSS_Class_Selector|WP_CSS_Attribute_Selector|null - */ - private static function parse_subclass_selector( string $input, int &$offset ) { - if ( $offset >= strlen( $input ) ) { - return null; - } - - $next_char = $input[ $offset ]; - return '.' === $next_char - ? WP_CSS_Class_Selector::parse( $input, $offset ) - : ( - '#' === $next_char - ? WP_CSS_ID_Selector::parse( $input, $offset ) - : ( '[' === $next_char - ? WP_CSS_Attribute_Selector::parse( $input, $offset ) - : null - ) - ); - } } - /** * This corresponds to in the grammar. * * > = [ ? ] * */ -final class WP_CSS_Complex_Selector extends WP_CSS_Selector_Parser implements IWP_CSS_Selector_Parser, IWP_CSS_Selector_Matcher { +final class WP_CSS_Complex_Selector implements IWP_CSS_Selector_Matcher { public function matches( WP_HTML_Processor $processor ): bool { // First selector must match this location. if ( ! $this->selectors[0]->matches( $processor ) ) { @@ -1120,7 +1224,7 @@ public function matches( WP_HTML_Processor $processor ): bool { /** * This only looks at breadcrumbs and can therefore only support type selectors. * - * @param array $selectors + * @param array $selectors * @param array $breadcrumbs */ private function explore_matches( array $selectors, array $breadcrumbs ): bool { @@ -1133,7 +1237,7 @@ private function explore_matches( array $selectors, array $breadcrumbs ): bool { /** @var self::COMBINATOR_* $combinator */ $combinator = $selectors[0]; - /** @var WP_CSS_Selector $selector */ + /** @var WP_CSS_Compound_Selector $selector */ $selector = $selectors[1]; switch ( $combinator ) { @@ -1166,78 +1270,18 @@ private function explore_matches( array $selectors, array $breadcrumbs ): bool { const COMBINATOR_SUBSEQUENT_SIBLING = '~'; /** - * even indexes are WP_CSS_Selector, odd indexes are string combinators. + * even indexes are WP_CSS_Compound_Selector, odd indexes are string combinators. * In reverse order to match the current element and then work up the tree. * Any non-final selector is a type selector. * - * @var array + * @var array */ public $selectors = array(); /** - * @param array $selectors + * @param array $selectors */ - private function __construct( array $selectors ) { + public function __construct( array $selectors ) { $this->selectors = array_reverse( $selectors ); } - - public static function parse( string $input, int &$offset ): ?self { - if ( $offset >= strlen( $input ) ) { - return null; - } - - $updated_offset = $offset; - $selector = WP_CSS_Selector::parse( $input, $updated_offset ); - if ( null === $selector ) { - return null; - } - - $selectors = array( $selector ); - $has_preceding_subclass_selector = null !== $selector->subclass_selectors; - - $found_whitespace = self::parse_whitespace( $input, $updated_offset ); - while ( $updated_offset < strlen( $input ) ) { - if ( - self::COMBINATOR_CHILD === $input[ $updated_offset ] || - self::COMBINATOR_NEXT_SIBLING === $input[ $updated_offset ] || - self::COMBINATOR_SUBSEQUENT_SIBLING === $input[ $updated_offset ] - ) { - $combinator = $input[ $updated_offset ]; - ++$updated_offset; - self::parse_whitespace( $input, $updated_offset ); - - // Failure to find a selector here is a parse error - $selector = WP_CSS_Selector::parse( $input, $updated_offset ); - } elseif ( $found_whitespace ) { - /* - * Whitespace is ambiguous, it could be a descendant combinator or - * insignificant whitespace. - */ - $selector = WP_CSS_Selector::parse( $input, $updated_offset ); - if ( null === $selector ) { - break; - } - $combinator = self::COMBINATOR_DESCENDANT; - } else { - break; - } - - if ( null === $selector ) { - return null; - } - - // `div > .className` is valid, but `.className > div` is not. - if ( $has_preceding_subclass_selector ) { - throw new Exception( 'Unsupported non-final subclass selector.' ); - } - $has_preceding_subclass_selector = null !== $selector->subclass_selectors; - - $selectors[] = $combinator; - $selectors[] = $selector; - - $found_whitespace = self::parse_whitespace( $input, $updated_offset ); - } - $offset = $updated_offset; - return new self( $selectors ); - } } diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 438dee4c47f4e..bee0f63824abd 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -642,7 +642,7 @@ public function get_unsupported_exception() { * @return Generator|null */ public function select_all( string $selectors ): ?Generator { - $select = WP_CSS_Selector_List::from_selectors( $selectors ); + $select = WP_CSS_Selector::from_selectors( $selectors ); if ( null === $select ) { return null; } diff --git a/tests/phpunit/tests/html-api/wpCssSelectors.php b/tests/phpunit/tests/html-api/wpCssSelectors.php index 5983f91c5d9ba..19c1595253d84 100644 --- a/tests/phpunit/tests/html-api/wpCssSelectors.php +++ b/tests/phpunit/tests/html-api/wpCssSelectors.php @@ -11,6 +11,63 @@ * @group html-api */ class Tests_HtmlApi_WpCssSelectors extends WP_UnitTestCase { + private $test_class; + + public function set_up(): void { + parent::set_up(); + $this->test_class = new class() extends WP_CSS_Selector { + public function __construct() { + parent::__construct( array() ); + } + + /* + * Parsing + */ + public static function test_parse_ident( string $input, int &$offset ) { + return self::parse_ident( $input, $offset ); + } + + public static function test_parse_string( string $input, int &$offset ) { + return self::parse_string( $input, $offset ); + } + + public static function test_parse_type_selector( string $input, int &$offset ) { + return self::parse_type_selector( $input, $offset ); + } + + public static function test_parse_id_selector( string $input, int &$offset ) { + return self::parse_id_selector( $input, $offset ); + } + + public static function test_parse_class_selector( string $input, int &$offset ) { + return self::parse_class_selector( $input, $offset ); + } + + public static function test_parse_attribute_selector( string $input, int &$offset ) { + return self::parse_attribute_selector( $input, $offset ); + } + + public static function test_parse_compound_selector( string $input, int &$offset ) { + return self::parse_compound_selector( $input, $offset ); + } + + public static function test_parse_complex_selector( string $input, int &$offset ) { + return self::parse_complex_selector( $input, $offset ); + } + + /* + * Utilities + */ + public static function test_is_ident_codepoint( string $input, int $offset ) { + return self::is_ident_codepoint( $input, $offset ); + } + + public static function test_is_ident_start_codepoint( string $input, int $offset ) { + return self::is_ident_start_codepoint( $input, $offset ); + } + }; + } + /** * Data provider. * @@ -64,22 +121,10 @@ public static function data_idents(): array { * @ticket TBD */ public function test_is_ident_and_is_ident_start() { - $c = new class() extends WP_CSS_Selector_Parser { - public static function parse( string $input, int &$offset ) {} - - public static function test_is_ident( string $input, int $offset ) { - return self::is_ident_codepoint( $input, $offset ); - } - - public static function test_is_ident_start( string $input, int $offset ) { - return self::is_ident_start_codepoint( $input, $offset ); - } - }; - - $this->assertFalse( $c::test_is_ident( '[', 0 ) ); - $this->assertFalse( $c::test_is_ident( ']', 0 ) ); - $this->assertFalse( $c::test_is_ident_start( '[', 0 ) ); - $this->assertFalse( $c::test_is_ident_start( ']', 0 ) ); + $this->assertFalse( $this->test_class::test_is_ident_codepoint( '[', 0 ) ); + $this->assertFalse( $this->test_class::test_is_ident_codepoint( ']', 0 ) ); + $this->assertFalse( $this->test_class::test_is_ident_start_codepoint( '[', 0 ) ); + $this->assertFalse( $this->test_class::test_is_ident_start_codepoint( ']', 0 ) ); } /** @@ -88,15 +133,9 @@ public static function test_is_ident_start( string $input, int $offset ) { * @dataProvider data_idents */ public function test_parse_ident( string $input, ?string $expected = null, ?string $rest = null ) { - $c = new class() extends WP_CSS_Selector_Parser { - public static function parse( string $input, int &$offset ) {} - public static function test( string $input, &$offset ) { - return self::parse_ident( $input, $offset ); - } - }; $offset = 0; - $result = $c::test( $input, $offset ); + $result = $this->test_class::test_parse_ident( $input, $offset ); if ( null === $expected ) { $this->assertNull( $result ); } else { @@ -111,15 +150,8 @@ public static function test( string $input, &$offset ) { * @dataProvider data_strings */ public function test_parse_string( string $input, ?string $expected = null, ?string $rest = null ) { - $c = new class() extends WP_CSS_Selector_Parser { - public static function parse( string $input, int &$offset ) {} - public static function test( string $input, &$offset ) { - return self::parse_string( $input, $offset ); - } - }; - $offset = 0; - $result = $c::test( $input, $offset ); + $result = $this->test_class::test_parse_string( $input, $offset ); if ( null === $expected ) { $this->assertNull( $result ); } else { @@ -170,7 +202,7 @@ public static function data_strings(): array { */ public function test_parse_id( string $input, ?string $expected = null, ?string $rest = null ) { $offset = 0; - $result = WP_CSS_ID_Selector::parse( $input, $offset ); + $result = $this->test_class::test_parse_id_selector( $input, $offset ); if ( null === $expected ) { $this->assertNull( $result ); } else { @@ -204,7 +236,7 @@ public static function data_id_selectors(): array { */ public function test_parse_class( string $input, ?string $expected = null, ?string $rest = null ) { $offset = 0; - $result = WP_CSS_Class_Selector::parse( $input, $offset ); + $result = $this->test_class::test_parse_class_selector( $input, $offset ); if ( null === $expected ) { $this->assertNull( $result ); } else { @@ -238,7 +270,7 @@ public static function data_class_selectors(): array { */ public function test_parse_type( string $input, ?string $expected = null, ?string $rest = null ) { $offset = 0; - $result = WP_CSS_Type_Selector::parse( $input, $offset ); + $result = $this->test_class::test_parse_type_selector( $input, $offset ); if ( null === $expected ) { $this->assertNull( $result ); } else { @@ -281,7 +313,7 @@ public function test_parse_attribute( ?string $rest = null ) { $offset = 0; - $result = WP_CSS_Attribute_Selector::parse( $input, $offset ); + $result = $this->test_class::test_parse_attribute_selector( $input, $offset ); if ( null === $expected_name ) { $this->assertNull( $result ); } else { @@ -347,7 +379,7 @@ public static function data_attribute_selectors(): array { public function test_parse_selector() { $input = 'el.foo#bar[baz=quux] > .child'; $offset = 0; - $sel = WP_CSS_Selector::parse( $input, $offset ); + $sel = $this->test_class::test_parse_compound_selector( $input, $offset ); $this->assertSame( 'el', $sel->type_selector->ident ); $this->assertSame( 3, count( $sel->subclass_selectors ) ); @@ -365,8 +397,9 @@ public function test_parse_selector() { public function test_parse_empty_selector() { $input = ''; $offset = 0; - $result = WP_CSS_Selector::parse( $input, $offset ); + $result = $this->test_class::test_parse_compound_selector( $input, $offset ); $this->assertNull( $result ); + $this->assertSame( 0, $offset ); } /** @@ -375,7 +408,7 @@ public function test_parse_empty_selector() { public function test_parse_complex_selector() { $input = 'el1 > .child#bar[baz=quux] , rest'; $offset = 0; - $sel = WP_CSS_Complex_Selector::parse( $input, $offset ); + $sel = $this->test_class::test_parse_complex_selector( $input, $offset ); $this->assertSame( 3, count( $sel->selectors ) ); @@ -398,14 +431,14 @@ public function test_parse_complex_selector() { public function test_parse_invalid_complex_selector() { $input = 'el.foo#bar[baz=quux] > , rest'; $offset = 0; - $result = WP_CSS_Complex_Selector::parse( $input, $offset ); + $result = $this->test_class::test_parse_complex_selector( $input, $offset ); $this->assertNull( $result ); } public function test_parse_empty_complex_selector() { $input = ''; $offset = 0; - $result = WP_CSS_Complex_Selector::parse( $input, $offset ); + $result = $this->test_class::test_parse_complex_selector( $input, $offset ); $this->assertNull( $result ); } @@ -415,7 +448,7 @@ public function test_parse_empty_complex_selector() { */ public function test_parse_selector_list() { $input = 'el1 el2 el.foo#bar[baz=quux], rest'; - $result = WP_CSS_Selector_List::from_selectors( $input ); + $result = WP_CSS_Selector::from_selectors( $input ); $this->assertNotNull( $result ); } @@ -424,7 +457,7 @@ public function test_parse_selector_list() { */ public function test_parse_invalid_selector_list() { $input = 'el,,'; - $result = WP_CSS_Selector_List::from_selectors( $input ); + $result = WP_CSS_Selector::from_selectors( $input ); $this->assertNull( $result ); } @@ -433,7 +466,7 @@ public function test_parse_invalid_selector_list() { */ public function test_parse_invalid_selector_list2() { $input = 'el!'; - $result = WP_CSS_Selector_List::from_selectors( $input ); + $result = WP_CSS_Selector::from_selectors( $input ); $this->assertNull( $result ); } @@ -442,7 +475,7 @@ public function test_parse_invalid_selector_list2() { */ public function test_parse_empty_selector_list() { $input = " \t \t\n\r\f"; - $result = WP_CSS_Selector_List::from_selectors( $input ); + $result = WP_CSS_Selector::from_selectors( $input ); $this->assertNull( $result ); } } From 6a6969f435d659f9fc26c208faf4495c18c60278 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 3 Dec 2024 18:22:35 +0100 Subject: [PATCH 070/129] Rename files to align with class name --- .../{class-wp-css-selectors.php => class-wp-css-selector.php} | 0 src/wp-settings.php | 2 +- .../html-api/{wpCssSelectors.php => wpCssSelector-parsing.php} | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) rename src/wp-includes/html-api/{class-wp-css-selectors.php => class-wp-css-selector.php} (100%) rename tests/phpunit/tests/html-api/{wpCssSelectors.php => wpCssSelector-parsing.php} (99%) diff --git a/src/wp-includes/html-api/class-wp-css-selectors.php b/src/wp-includes/html-api/class-wp-css-selector.php similarity index 100% rename from src/wp-includes/html-api/class-wp-css-selectors.php rename to src/wp-includes/html-api/class-wp-css-selector.php diff --git a/src/wp-settings.php b/src/wp-settings.php index 6c799d5c95140..cfdd9234b7003 100644 --- a/src/wp-settings.php +++ b/src/wp-settings.php @@ -265,7 +265,7 @@ require ABSPATH . WPINC . '/html-api/class-wp-html-stack-event.php'; require ABSPATH . WPINC . '/html-api/class-wp-html-processor-state.php'; require ABSPATH . WPINC . '/html-api/class-wp-html-processor.php'; -require ABSPATH . WPINC . '/html-api/class-wp-css-selectors.php'; +require ABSPATH . WPINC . '/html-api/class-wp-css-selector.php'; require ABSPATH . WPINC . '/class-wp-http.php'; require ABSPATH . WPINC . '/class-wp-http-streams.php'; require ABSPATH . WPINC . '/class-wp-http-curl.php'; diff --git a/tests/phpunit/tests/html-api/wpCssSelectors.php b/tests/phpunit/tests/html-api/wpCssSelector-parsing.php similarity index 99% rename from tests/phpunit/tests/html-api/wpCssSelectors.php rename to tests/phpunit/tests/html-api/wpCssSelector-parsing.php index 19c1595253d84..4caa186158149 100644 --- a/tests/phpunit/tests/html-api/wpCssSelectors.php +++ b/tests/phpunit/tests/html-api/wpCssSelector-parsing.php @@ -10,7 +10,7 @@ * * @group html-api */ -class Tests_HtmlApi_WpCssSelectors extends WP_UnitTestCase { +class Tests_HtmlApi_WpCssSelector_Parsing extends WP_UnitTestCase { private $test_class; public function set_up(): void { From 27ca891846d35f6d18f0b0031147ece99bd11d9e Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 3 Dec 2024 21:00:08 +0100 Subject: [PATCH 071/129] Add html processor select test suite --- .../tests/html-api/wpHtmlProcessor-select.php | 68 +++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 tests/phpunit/tests/html-api/wpHtmlProcessor-select.php diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor-select.php b/tests/phpunit/tests/html-api/wpHtmlProcessor-select.php new file mode 100644 index 0000000000000..e70dedcfcd3c4 --- /dev/null +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor-select.php @@ -0,0 +1,68 @@ +' ); + $this->assertFalse( $processor->select( 'div' ) ); + } + + /** + * @ticket TBD + * + * @dataProvider data_selectors + */ + public function test_select( string $html, string $selector ) { + $processor = WP_HTML_Processor::create_full_parser( $html ); + $this->assertTrue( $processor->select( $selector ) ); + $this->assertTrue( $processor->get_attribute( 'match' ) ); + } + + /** + * Data provider. + * + * @return array + */ + public static function data_selectors(): array { + return array( + 'simple type' => array( '
', 'div' ), + 'any type' => array( '', '*' ), + 'simple class' => array( '
', '.x' ), + 'simple id' => array( '
', '#x' ), + 'simple attribute' => array( '
', '[att]' ), + 'attribute value' => array( '
', '[att=val]' ), + 'attribute quoted value' => array( '
', '[att="::"]' ), + 'complex any descendant' => array( '
', 'section *' ), + 'complex any child' => array( '
', 'section > *' ), + + 'list' => array( '

', 'a, p' ), + 'compound' => array( '

', 'section[att~="bar"]' ), + ); + } + + /** + * @ticket TBD + */ + public function test_select_all() { + $processor = WP_HTML_Processor::create_full_parser( '

' ); + $count = 0; + foreach ( $processor->select_all( 'div, .x, svg>rect, #y' ) as $_ ) { + ++$count; + $this->assertTrue( $processor->get_attribute( 'match' ) ); + } + $this->assertSame( 4, $count ); + } +} From 9ff276965a60f3a7ccd89facc67cc9d4b267d90e Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 3 Dec 2024 21:00:30 +0100 Subject: [PATCH 072/129] Fix select types --- src/wp-includes/html-api/class-wp-html-processor.php | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index bee0f63824abd..23ca6edc4ff7e 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -638,13 +638,15 @@ public function get_unsupported_exception() { /** * Use a selector to advance. * + * @todo _doing_it_wrong on null selector? + * * @param string $selectors - * @return Generator|null + * @return Generator */ public function select_all( string $selectors ): ?Generator { $select = WP_CSS_Selector::from_selectors( $selectors ); if ( null === $select ) { - return null; + return; } while ( $this->next_tag() ) { @@ -660,13 +662,10 @@ public function select_all( string $selectors ): ?Generator { * If iterating through matching elements, use `select_all` instead. * * @param string $selectors - * @return bool|null + * @return bool */ public function select( string $selectors ) { $selection = $this->select_all( $selectors ); - if ( null === $selection ) { - return null; - } foreach ( $selection as $_ ) { return true; } From d1a276b848ef8b9b5f954641ed762ad3d591b2cb Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 4 Dec 2024 13:55:57 +0100 Subject: [PATCH 073/129] Update class doc --- src/wp-includes/html-api/class-wp-css-selector.php | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-css-selector.php b/src/wp-includes/html-api/class-wp-css-selector.php index 7588eb72294bd..c27c81593059d 100644 --- a/src/wp-includes/html-api/class-wp-css-selector.php +++ b/src/wp-includes/html-api/class-wp-css-selector.php @@ -23,8 +23,7 @@ * A subset of the CSS selector grammar is supported. The grammar is defined in the CSS Syntax * specification, which is available at {@link https://www.w3.org/TR/selectors/#grammar}. * - * @todo Review this grammar, especially the complex selector for accurate support information. - * The supported grammar is: + * This class is rougly analogous to the in the grammar. The supported grammar is: * * = * = # @@ -43,6 +42,7 @@ * * @link https://www.w3.org/TR/selectors/#grammar Refer to the grammar for more details. * + * Note that this grammar has been adapted and does not support the full CSS selector grammar. * Supported selector syntax: * - Type selectors (tag names, e.g. `div`) * - Class selectors (e.g. `.class-name`) @@ -61,11 +61,11 @@ * - Next sibling (`el + el`) * - Subsequent sibling (`el ~ el`) * - * Future ideas - * - Namespace type selectors could be implemented with select namespaces in order to - * select elements from a namespace, for example: - * - `svg|*` to select all SVG elements - * - `html|title` to select only HTML TITLE elements. + * Future ideas: + * - Namespace type selectors could be implemented with select namespaces in order to + * select elements from a namespace, for example: + * - `svg|*` to select all SVG elements + * - `html|title` to select only HTML TITLE elements. * * @since TBD * From 4909b569c067ab556e81b0cbcce087d3d1867676 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 4 Dec 2024 16:00:36 +0100 Subject: [PATCH 074/129] Improve select_ method arguments, docs, implementation --- .../html-api/class-wp-html-processor.php | 57 ++++++++++++++----- 1 file changed, 42 insertions(+), 15 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 23ca6edc4ff7e..398c5c4fd096c 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -636,37 +636,64 @@ public function get_unsupported_exception() { } /** - * Use a selector to advance. + * Progress through a document pausing on tags matching the provided CSS selector string. + * + * @example + * + * $processor = WP_HTML_Processor::create_fragment( + * 'Example' + * ); + * foreach ( $processor->select_all( 'meta[property^="og:" i]' ) as $_ ) { + * // Loop is entered twice. + * var_dump( + * $processor->get_tag(), // string(4) "META" + * $processor->get_attribute( 'property' ), // string(7) "og:type" / string(14) "og:description" + * $processor->get_attribute( 'content' ), // string(7) "website" / string(11) "An example." + * ); + * } * - * @todo _doing_it_wrong on null selector? + * @since TBD * - * @param string $selectors - * @return Generator + * @param string $selector_string Selector string. + * @return Generator A generator pausing on each tag matching the selector. */ - public function select_all( string $selectors ): ?Generator { - $select = WP_CSS_Selector::from_selectors( $selectors ); - if ( null === $select ) { + public function select_all( string $selector_string ): ?Generator { + $selector = WP_CSS_Selector::from_selectors( $selector_string ); + if ( null === $selector ) { return; } while ( $this->next_tag() ) { - if ( $select->matches( $this ) ) { + if ( $selector->matches( $this ) ) { yield; } } } /** - * Select the next matching element. + * Move to the next tag matching the provided CSS selector string. * - * If iterating through matching elements, use `select_all` instead. + * This method will stop at the next match. To progress through all matches, use + * the `select_all` method. * - * @param string $selectors - * @return bool + * @example + * + * $processor = WP_HTML_Processor::create_fragment( + * 'Example' + * ); + * $processor->select( 'meta[charset]' ); + * var_dump( + * $processor->get_tag(), // string(4) "META" + * $processor->get_attribute( 'charset' ), // string(5) "utf-8" + * ); + * + * @since TBD + * + * @param string $selector_string + * @return bool True if a matching tag was found, otherwise false. */ - public function select( string $selectors ) { - $selection = $this->select_all( $selectors ); - foreach ( $selection as $_ ) { + public function select( string $selector_string ) { + foreach ( $this->select_all( $selector_string ) as $_ ) { return true; } return false; From 1d45225e46b85b2e8e9f8091cf9aefac3c46c2eb Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 4 Dec 2024 18:08:58 +0100 Subject: [PATCH 075/129] Split classes into their own files Satisfy the 1-class-per-file requirement --- .../class-wp-css-attribute-selector.php | 190 +++++++++ .../html-api/class-wp-css-class-selector.php | 14 + .../class-wp-css-complex-selector.php | 88 ++++ .../class-wp-css-compound-selector.php | 39 ++ .../html-api/class-wp-css-id-selector.php | 22 + .../html-api/class-wp-css-selector.php | 389 +----------------- .../html-api/class-wp-css-type-selector.php | 25 ++ ...nterface-wp-css-html-processor-matcher.php | 8 + src/wp-settings.php | 7 + 9 files changed, 396 insertions(+), 386 deletions(-) create mode 100644 src/wp-includes/html-api/class-wp-css-attribute-selector.php create mode 100644 src/wp-includes/html-api/class-wp-css-class-selector.php create mode 100644 src/wp-includes/html-api/class-wp-css-complex-selector.php create mode 100644 src/wp-includes/html-api/class-wp-css-compound-selector.php create mode 100644 src/wp-includes/html-api/class-wp-css-id-selector.php create mode 100644 src/wp-includes/html-api/class-wp-css-type-selector.php create mode 100644 src/wp-includes/html-api/interface-wp-css-html-processor-matcher.php diff --git a/src/wp-includes/html-api/class-wp-css-attribute-selector.php b/src/wp-includes/html-api/class-wp-css-attribute-selector.php new file mode 100644 index 0000000000000..be7332c85b72d --- /dev/null +++ b/src/wp-includes/html-api/class-wp-css-attribute-selector.php @@ -0,0 +1,190 @@ +get_attribute( $this->name ); + if ( null === $att_value ) { + return false; + } + + if ( null === $this->value ) { + return true; + } + + if ( true === $att_value ) { + $att_value = ''; + } + + $case_insensitive = self::MODIFIER_CASE_INSENSITIVE === $this->modifier; + + switch ( $this->matcher ) { + case self::MATCH_EXACT: + return $case_insensitive + ? 0 === strcasecmp( $att_value, $this->value ) + : $att_value === $this->value; + + case self::MATCH_ONE_OF_EXACT: + foreach ( $this->whitespace_delimited_list( $att_value ) as $val ) { + if ( + $case_insensitive + ? 0 === strcasecmp( $val, $this->value ) + : $val === $this->value + ) { + return true; + } + } + return false; + + case self::MATCH_EXACT_OR_EXACT_WITH_HYPHEN: + // Attempt the full match first + if ( + $case_insensitive + ? 0 === strcasecmp( $att_value, $this->value ) + : $att_value === $this->value + ) { + return true; + } + + // Partial match + if ( strlen( $att_value ) < strlen( $this->value ) + 1 ) { + return false; + } + + $starts_with = "{$this->value}-"; + return 0 === substr_compare( $att_value, $starts_with, 0, strlen( $starts_with ), $case_insensitive ); + + case self::MATCH_PREFIXED_BY: + return 0 === substr_compare( $att_value, $this->value, 0, strlen( $this->value ), $case_insensitive ); + + case self::MATCH_SUFFIXED_BY: + return 0 === substr_compare( $att_value, $this->value, -strlen( $this->value ), null, $case_insensitive ); + + case self::MATCH_CONTAINS: + return false !== ( + $case_insensitive + ? stripos( $att_value, $this->value ) + : strpos( $att_value, $this->value ) + ); + } + + throw new Exception( 'Unreachable' ); + } + + /** + * @param string $input + * + * @return Generator + */ + private function whitespace_delimited_list( string $input ): Generator { + $offset = strspn( $input, WP_CSS_Selector::WHITESPACE_CHARACTERS ); + + while ( $offset < strlen( $input ) ) { + // Find the byte length until the next boundary. + $length = strcspn( $input, WP_CSS_Selector::WHITESPACE_CHARACTERS, $offset ); + if ( 0 === $length ) { + return; + } + + $value = substr( $input, $offset, $length ); + $offset += $length + strspn( $input, WP_CSS_Selector::WHITESPACE_CHARACTERS, $offset + $length ); + + yield $value; + } + } + + /** + * [att=val] + * Represents an element with the att attribute whose value is exactly "val". + */ + const MATCH_EXACT = 'MATCH_EXACT'; + + /** + * [attr~=value] + * Represents elements with an attribute name of attr whose value is a + * whitespace-separated list of words, one of which is exactly value. + */ + const MATCH_ONE_OF_EXACT = 'MATCH_ONE_OF_EXACT'; + + /** + * [attr|=value] + * Represents elements with an attribute name of attr whose value can be exactly value or + * can begin with value immediately followed by a hyphen, - (U+002D). It is often used for + * language subcode matches. + */ + const MATCH_EXACT_OR_EXACT_WITH_HYPHEN = 'MATCH_EXACT_OR_EXACT_WITH_HYPHEN'; + + /** + * [attr^=value] + * Represents elements with an attribute name of attr whose value is prefixed (preceded) + * by value. + */ + const MATCH_PREFIXED_BY = 'MATCH_PREFIXED_BY'; + + /** + * [attr$=value] + * Represents elements with an attribute name of attr whose value is suffixed (followed) + * by value. + */ + const MATCH_SUFFIXED_BY = 'MATCH_SUFFIXED_BY'; + + /** + * [attr*=value] + * Represents elements with an attribute name of attr whose value contains at least one + * occurrence of value within the string. + */ + const MATCH_CONTAINS = 'MATCH_CONTAINS'; + + /** + * Modifier for case sensitive matching + * [attr=value s] + */ + const MODIFIER_CASE_SENSITIVE = 'case-sensitive'; + + /** + * Modifier for case insensitive matching + * [attr=value i] + */ + const MODIFIER_CASE_INSENSITIVE = 'case-insensitive'; + + + /** + * The attribute name. + * + * @var string + */ + public $name; + + /** + * The attribute matcher. + * + * @var null|self::MATCH_* + */ + public $matcher; + + /** + * The attribute value. + * + * @var string|null + */ + public $value; + + /** + * The attribute modifier. + * + * @var null|self::MODIFIER_* + */ + public $modifier; + + /** + * @param string $name + * @param null|self::MATCH_* $matcher + * @param null|string $value + * @param null|self::MODIFIER_* $modifier + */ + public function __construct( string $name, ?string $matcher = null, ?string $value = null, ?string $modifier = null ) { + $this->name = $name; + $this->matcher = $matcher; + $this->value = $value; + $this->modifier = $modifier; + } +} diff --git a/src/wp-includes/html-api/class-wp-css-class-selector.php b/src/wp-includes/html-api/class-wp-css-class-selector.php new file mode 100644 index 0000000000000..c4f858d4a05d9 --- /dev/null +++ b/src/wp-includes/html-api/class-wp-css-class-selector.php @@ -0,0 +1,14 @@ +has_class( $this->ident ); + } + + /** @var string */ + public $ident; + + public function __construct( string $ident ) { + $this->ident = $ident; + } +} diff --git a/src/wp-includes/html-api/class-wp-css-complex-selector.php b/src/wp-includes/html-api/class-wp-css-complex-selector.php new file mode 100644 index 0000000000000..520f3bf3d8fde --- /dev/null +++ b/src/wp-includes/html-api/class-wp-css-complex-selector.php @@ -0,0 +1,88 @@ + in the grammar. + * + * > = [ ? ] * + */ +final class WP_CSS_Complex_Selector implements WP_CSS_HTML_Processor_Matcher { + public function matches( WP_HTML_Processor $processor ): bool { + // First selector must match this location. + if ( ! $this->selectors[0]->matches( $processor ) ) { + return false; + } + + if ( count( $this->selectors ) === 1 ) { + return true; + } + + /** @var array $breadcrumbs */ + $breadcrumbs = array_slice( array_reverse( $processor->get_breadcrumbs() ), 1 ); + $selectors = array_slice( $this->selectors, 1 ); + return $this->explore_matches( $selectors, $breadcrumbs ); + } + + /** + * This only looks at breadcrumbs and can therefore only support type selectors. + * + * @param array $selectors + * @param array $breadcrumbs + */ + private function explore_matches( array $selectors, array $breadcrumbs ): bool { + if ( array() === $selectors ) { + return true; + } + if ( array() === $breadcrumbs ) { + return false; + } + + /** @var self::COMBINATOR_* $combinator */ + $combinator = $selectors[0]; + /** @var WP_CSS_Compound_Selector $selector */ + $selector = $selectors[1]; + + switch ( $combinator ) { + case self::COMBINATOR_CHILD: + if ( '*' === $selector->type_selector->ident || strcasecmp( $breadcrumbs[0], $selector->type_selector->ident ) === 0 ) { + return $this->explore_matches( array_slice( $selectors, 2 ), array_slice( $breadcrumbs, 1 ) ); + } + return $this->explore_matches( $selectors, array_slice( $breadcrumbs, 1 ) ); + + case self::COMBINATOR_DESCENDANT: + // Find _all_ the breadcrumbs that match and recurse from each of them. + for ( $i = 0; $i < count( $breadcrumbs ); $i++ ) { + if ( '*' === $selector->type_selector->ident || strcasecmp( $breadcrumbs[ $i ], $selector->type_selector->ident ) === 0 ) { + $next_crumbs = array_slice( $breadcrumbs, $i + 1 ); + if ( $this->explore_matches( array_slice( $selectors, 2 ), $next_crumbs ) ) { + return true; + } + } + } + return false; + + default: + throw new Exception( "Combinator '{$combinator}' is not supported yet." ); + } + } + + const COMBINATOR_CHILD = '>'; + const COMBINATOR_DESCENDANT = ' '; + const COMBINATOR_NEXT_SIBLING = '+'; + const COMBINATOR_SUBSEQUENT_SIBLING = '~'; + + /** + * even indexes are WP_CSS_Compound_Selector, odd indexes are string combinators. + * In reverse order to match the current element and then work up the tree. + * Any non-final selector is a type selector. + * + * @var array + */ + public $selectors = array(); + + /** + * @param array $selectors + */ + public function __construct( array $selectors ) { + $this->selectors = array_reverse( $selectors ); + } +} diff --git a/src/wp-includes/html-api/class-wp-css-compound-selector.php b/src/wp-includes/html-api/class-wp-css-compound-selector.php new file mode 100644 index 0000000000000..1162aaef78c1e --- /dev/null +++ b/src/wp-includes/html-api/class-wp-css-compound-selector.php @@ -0,0 +1,39 @@ + in the grammar. + * + * > = [ ? * ]! + */ +final class WP_CSS_Compound_Selector implements WP_CSS_HTML_Processor_Matcher { + public function matches( WP_HTML_Processor $processor ): bool { + if ( $this->type_selector ) { + if ( ! $this->type_selector->matches( $processor ) ) { + return false; + } + } + if ( null !== $this->subclass_selectors ) { + foreach ( $this->subclass_selectors as $subclass_selector ) { + if ( ! $subclass_selector->matches( $processor ) ) { + return false; + } + } + } + return true; + } + + /** @var WP_CSS_Type_Selector|null */ + public $type_selector; + + /** @var array|null */ + public $subclass_selectors; + + /** + * @param WP_CSS_Type_Selector|null $type_selector + * @param array $subclass_selectors + */ + public function __construct( ?WP_CSS_Type_Selector $type_selector, array $subclass_selectors ) { + $this->type_selector = $type_selector; + $this->subclass_selectors = array() === $subclass_selectors ? null : $subclass_selectors; + } +} diff --git a/src/wp-includes/html-api/class-wp-css-id-selector.php b/src/wp-includes/html-api/class-wp-css-id-selector.php new file mode 100644 index 0000000000000..cc0589327c829 --- /dev/null +++ b/src/wp-includes/html-api/class-wp-css-id-selector.php @@ -0,0 +1,22 @@ +ident = $ident; + } + + public function matches( WP_HTML_Processor $processor ): bool { + $id = $processor->get_attribute( 'id' ); + if ( ! is_string( $id ) ) { + return false; + } + + $case_insensitive = method_exists( $processor, 'is_quirks_mode' ) && $processor->is_quirks_mode(); + return $case_insensitive + ? 0 === strcasecmp( $id, $this->ident ) + : $processor->get_attribute( 'id' ) === $this->ident; + } +} diff --git a/src/wp-includes/html-api/class-wp-css-selector.php b/src/wp-includes/html-api/class-wp-css-selector.php index c27c81593059d..b776bad66146b 100644 --- a/src/wp-includes/html-api/class-wp-css-selector.php +++ b/src/wp-includes/html-api/class-wp-css-selector.php @@ -1,10 +1,6 @@ -get_token_type() !== '#tag' ) { return false; @@ -906,382 +902,3 @@ private static function check_if_three_code_points_would_start_an_ident_sequence return self::is_ident_start_codepoint( $input, $offset ); } } - -interface IWP_CSS_Selector_Matcher { - /** - * @return bool - */ - public function matches( WP_HTML_Processor $processor ): bool; -} - -final class WP_CSS_ID_Selector implements IWP_CSS_Selector_Matcher { - /** @var string */ - public $ident; - - public function __construct( string $ident ) { - $this->ident = $ident; - } - - public function matches( WP_HTML_Processor $processor ): bool { - $id = $processor->get_attribute( 'id' ); - if ( ! is_string( $id ) ) { - return false; - } - - $case_insensitive = method_exists( $processor, 'is_quirks_mode' ) && $processor->is_quirks_mode(); - return $case_insensitive - ? 0 === strcasecmp( $id, $this->ident ) - : $processor->get_attribute( 'id' ) === $this->ident; - } -} - -final class WP_CSS_Class_Selector implements IWP_CSS_Selector_Matcher { - public function matches( WP_HTML_Processor $processor ): bool { - return (bool) $processor->has_class( $this->ident ); - } - - /** @var string */ - public $ident; - - public function __construct( string $ident ) { - $this->ident = $ident; - } -} - -final class WP_CSS_Type_Selector implements IWP_CSS_Selector_Matcher { - public function matches( WP_HTML_Processor $processor ): bool { - $tag_name = $processor->get_tag(); - if ( null === $tag_name ) { - return false; - } - if ( '*' === $this->ident ) { - return true; - } - return 0 === strcasecmp( $tag_name, $this->ident ); - } - - /** - * @var string - * - * The type identifier string or '*'. - */ - public $ident; - - public function __construct( string $ident ) { - $this->ident = $ident; - } -} - -final class WP_CSS_Attribute_Selector implements IWP_CSS_Selector_Matcher { - public function matches( WP_HTML_Processor $processor ): bool { - $att_value = $processor->get_attribute( $this->name ); - if ( null === $att_value ) { - return false; - } - - if ( null === $this->value ) { - return true; - } - - if ( true === $att_value ) { - $att_value = ''; - } - - $case_insensitive = self::MODIFIER_CASE_INSENSITIVE === $this->modifier; - - switch ( $this->matcher ) { - case self::MATCH_EXACT: - return $case_insensitive - ? 0 === strcasecmp( $att_value, $this->value ) - : $att_value === $this->value; - - case self::MATCH_ONE_OF_EXACT: - foreach ( $this->whitespace_delimited_list( $att_value ) as $val ) { - if ( - $case_insensitive - ? 0 === strcasecmp( $val, $this->value ) - : $val === $this->value - ) { - return true; - } - } - return false; - - case self::MATCH_EXACT_OR_EXACT_WITH_HYPHEN: - // Attempt the full match first - if ( - $case_insensitive - ? 0 === strcasecmp( $att_value, $this->value ) - : $att_value === $this->value - ) { - return true; - } - - // Partial match - if ( strlen( $att_value ) < strlen( $this->value ) + 1 ) { - return false; - } - - $starts_with = "{$this->value}-"; - return 0 === substr_compare( $att_value, $starts_with, 0, strlen( $starts_with ), $case_insensitive ); - - case self::MATCH_PREFIXED_BY: - return 0 === substr_compare( $att_value, $this->value, 0, strlen( $this->value ), $case_insensitive ); - - case self::MATCH_SUFFIXED_BY: - return 0 === substr_compare( $att_value, $this->value, -strlen( $this->value ), null, $case_insensitive ); - - case self::MATCH_CONTAINS: - return false !== ( - $case_insensitive - ? stripos( $att_value, $this->value ) - : strpos( $att_value, $this->value ) - ); - } - - throw new Exception( 'Unreachable' ); - } - - /** - * @param string $input - * - * @return Generator - */ - private function whitespace_delimited_list( string $input ): Generator { - $offset = strspn( $input, WP_CSS_Selector::WHITESPACE_CHARACTERS ); - - while ( $offset < strlen( $input ) ) { - // Find the byte length until the next boundary. - $length = strcspn( $input, WP_CSS_Selector::WHITESPACE_CHARACTERS, $offset ); - if ( 0 === $length ) { - return; - } - - $value = substr( $input, $offset, $length ); - $offset += $length + strspn( $input, WP_CSS_Selector::WHITESPACE_CHARACTERS, $offset + $length ); - - yield $value; - } - } - - /** - * [att=val] - * Represents an element with the att attribute whose value is exactly "val". - */ - const MATCH_EXACT = 'MATCH_EXACT'; - - /** - * [attr~=value] - * Represents elements with an attribute name of attr whose value is a - * whitespace-separated list of words, one of which is exactly value. - */ - const MATCH_ONE_OF_EXACT = 'MATCH_ONE_OF_EXACT'; - - /** - * [attr|=value] - * Represents elements with an attribute name of attr whose value can be exactly value or - * can begin with value immediately followed by a hyphen, - (U+002D). It is often used for - * language subcode matches. - */ - const MATCH_EXACT_OR_EXACT_WITH_HYPHEN = 'MATCH_EXACT_OR_EXACT_WITH_HYPHEN'; - - /** - * [attr^=value] - * Represents elements with an attribute name of attr whose value is prefixed (preceded) - * by value. - */ - const MATCH_PREFIXED_BY = 'MATCH_PREFIXED_BY'; - - /** - * [attr$=value] - * Represents elements with an attribute name of attr whose value is suffixed (followed) - * by value. - */ - const MATCH_SUFFIXED_BY = 'MATCH_SUFFIXED_BY'; - - /** - * [attr*=value] - * Represents elements with an attribute name of attr whose value contains at least one - * occurrence of value within the string. - */ - const MATCH_CONTAINS = 'MATCH_CONTAINS'; - - /** - * Modifier for case sensitive matching - * [attr=value s] - */ - const MODIFIER_CASE_SENSITIVE = 'case-sensitive'; - - /** - * Modifier for case insensitive matching - * [attr=value i] - */ - const MODIFIER_CASE_INSENSITIVE = 'case-insensitive'; - - - /** - * The attribute name. - * - * @var string - */ - public $name; - - /** - * The attribute matcher. - * - * @var null|self::MATCH_* - */ - public $matcher; - - /** - * The attribute value. - * - * @var string|null - */ - public $value; - - /** - * The attribute modifier. - * - * @var null|self::MODIFIER_* - */ - public $modifier; - - /** - * @param string $name - * @param null|self::MATCH_* $matcher - * @param null|string $value - * @param null|self::MODIFIER_* $modifier - */ - public function __construct( string $name, ?string $matcher = null, ?string $value = null, ?string $modifier = null ) { - $this->name = $name; - $this->matcher = $matcher; - $this->value = $value; - $this->modifier = $modifier; - } -} - -/** - * This corresponds to in the grammar. - * - * > = [ ? * ]! - */ -final class WP_CSS_Compound_Selector implements IWP_CSS_Selector_Matcher { - public function matches( WP_HTML_Processor $processor ): bool { - if ( $this->type_selector ) { - if ( ! $this->type_selector->matches( $processor ) ) { - return false; - } - } - if ( null !== $this->subclass_selectors ) { - foreach ( $this->subclass_selectors as $subclass_selector ) { - if ( ! $subclass_selector->matches( $processor ) ) { - return false; - } - } - } - return true; - } - - /** @var WP_CSS_Type_Selector|null */ - public $type_selector; - - /** @var array|null */ - public $subclass_selectors; - - /** - * @param WP_CSS_Type_Selector|null $type_selector - * @param array $subclass_selectors - */ - public function __construct( ?WP_CSS_Type_Selector $type_selector, array $subclass_selectors ) { - $this->type_selector = $type_selector; - $this->subclass_selectors = array() === $subclass_selectors ? null : $subclass_selectors; - } -} - -/** - * This corresponds to in the grammar. - * - * > = [ ? ] * - */ -final class WP_CSS_Complex_Selector implements IWP_CSS_Selector_Matcher { - public function matches( WP_HTML_Processor $processor ): bool { - // First selector must match this location. - if ( ! $this->selectors[0]->matches( $processor ) ) { - return false; - } - - if ( count( $this->selectors ) === 1 ) { - return true; - } - - /** @var array $breadcrumbs */ - $breadcrumbs = array_slice( array_reverse( $processor->get_breadcrumbs() ), 1 ); - $selectors = array_slice( $this->selectors, 1 ); - return $this->explore_matches( $selectors, $breadcrumbs ); - } - - /** - * This only looks at breadcrumbs and can therefore only support type selectors. - * - * @param array $selectors - * @param array $breadcrumbs - */ - private function explore_matches( array $selectors, array $breadcrumbs ): bool { - if ( array() === $selectors ) { - return true; - } - if ( array() === $breadcrumbs ) { - return false; - } - - /** @var self::COMBINATOR_* $combinator */ - $combinator = $selectors[0]; - /** @var WP_CSS_Compound_Selector $selector */ - $selector = $selectors[1]; - - switch ( $combinator ) { - case self::COMBINATOR_CHILD: - if ( '*' === $selector->type_selector->ident || strcasecmp( $breadcrumbs[0], $selector->type_selector->ident ) === 0 ) { - return $this->explore_matches( array_slice( $selectors, 2 ), array_slice( $breadcrumbs, 1 ) ); - } - return $this->explore_matches( $selectors, array_slice( $breadcrumbs, 1 ) ); - - case self::COMBINATOR_DESCENDANT: - // Find _all_ the breadcrumbs that match and recurse from each of them. - for ( $i = 0; $i < count( $breadcrumbs ); $i++ ) { - if ( '*' === $selector->type_selector->ident || strcasecmp( $breadcrumbs[ $i ], $selector->type_selector->ident ) === 0 ) { - $next_crumbs = array_slice( $breadcrumbs, $i + 1 ); - if ( $this->explore_matches( array_slice( $selectors, 2 ), $next_crumbs ) ) { - return true; - } - } - } - return false; - - default: - throw new Exception( "Combinator '{$combinator}' is not supported yet." ); - } - } - - const COMBINATOR_CHILD = '>'; - const COMBINATOR_DESCENDANT = ' '; - const COMBINATOR_NEXT_SIBLING = '+'; - const COMBINATOR_SUBSEQUENT_SIBLING = '~'; - - /** - * even indexes are WP_CSS_Compound_Selector, odd indexes are string combinators. - * In reverse order to match the current element and then work up the tree. - * Any non-final selector is a type selector. - * - * @var array - */ - public $selectors = array(); - - /** - * @param array $selectors - */ - public function __construct( array $selectors ) { - $this->selectors = array_reverse( $selectors ); - } -} diff --git a/src/wp-includes/html-api/class-wp-css-type-selector.php b/src/wp-includes/html-api/class-wp-css-type-selector.php new file mode 100644 index 0000000000000..a2dcd16521cb5 --- /dev/null +++ b/src/wp-includes/html-api/class-wp-css-type-selector.php @@ -0,0 +1,25 @@ +get_tag(); + if ( null === $tag_name ) { + return false; + } + if ( '*' === $this->ident ) { + return true; + } + return 0 === strcasecmp( $tag_name, $this->ident ); + } + + /** + * @var string + * + * The type identifier string or '*'. + */ + public $ident; + + public function __construct( string $ident ) { + $this->ident = $ident; + } +} diff --git a/src/wp-includes/html-api/interface-wp-css-html-processor-matcher.php b/src/wp-includes/html-api/interface-wp-css-html-processor-matcher.php new file mode 100644 index 0000000000000..2ae29413b35d2 --- /dev/null +++ b/src/wp-includes/html-api/interface-wp-css-html-processor-matcher.php @@ -0,0 +1,8 @@ + Date: Wed, 4 Dec 2024 18:09:17 +0100 Subject: [PATCH 076/129] Remove redundant see phpdoc annotations --- src/wp-includes/html-api/class-wp-css-selector.php | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-css-selector.php b/src/wp-includes/html-api/class-wp-css-selector.php index b776bad66146b..487c100ab47e4 100644 --- a/src/wp-includes/html-api/class-wp-css-selector.php +++ b/src/wp-includes/html-api/class-wp-css-selector.php @@ -67,11 +67,10 @@ * * @access private * - * @see {@link https://www.w3.org/TR/css-syntax-3/} - * @see {@link https://www.w3.org/tr/selectors/} - * @see {@link https://www.w3.org/TR/selectors-api2/} - * @see {@link https://www.w3.org/TR/selectors-4/} - * + * @link https://www.w3.org/TR/css-syntax-3/ + * @link https://www.w3.org/tr/selectors/ + * @link https://www.w3.org/TR/selectors-api2/ + * @link https://www.w3.org/TR/selectors-4/ */ class WP_CSS_Selector implements WP_CSS_HTML_Processor_Matcher { public function matches( WP_HTML_Processor $processor ): bool { From 0c53c422de2f40206b9322f8f0ae3beaf85b5e4b Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 4 Dec 2024 18:28:54 +0100 Subject: [PATCH 077/129] Fix docs and return type on select_all --- src/wp-includes/html-api/class-wp-html-processor.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 398c5c4fd096c..9f7a43acaebbd 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -657,7 +657,7 @@ public function get_unsupported_exception() { * @param string $selector_string Selector string. * @return Generator A generator pausing on each tag matching the selector. */ - public function select_all( string $selector_string ): ?Generator { + public function select_all( string $selector_string ): Generator { $selector = WP_CSS_Selector::from_selectors( $selector_string ); if ( null === $selector ) { return; @@ -674,7 +674,7 @@ public function select_all( string $selector_string ): ?Generator { * Move to the next tag matching the provided CSS selector string. * * This method will stop at the next match. To progress through all matches, use - * the `select_all` method. + * the {@see WP_HTML_Processor::select_all()} method. * * @example * From d966e9ad7fdc9270fded62abb9e32923ced79d61 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 4 Dec 2024 18:31:05 +0100 Subject: [PATCH 078/129] Improve html select test docs --- tests/phpunit/tests/html-api/wpHtmlProcessor-select.php | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor-select.php b/tests/phpunit/tests/html-api/wpHtmlProcessor-select.php index e70dedcfcd3c4..c3a1e4121ecab 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor-select.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor-select.php @@ -1,6 +1,9 @@ Date: Wed, 4 Dec 2024 19:40:23 +0100 Subject: [PATCH 079/129] Add select support to tag processor Split up main CSS selector class and support more restricted selectors in the tag processor. --- .../class-wp-css-attribute-selector.php | 12 +- .../html-api/class-wp-css-class-selector.php | 4 +- .../class-wp-css-complex-selector-list.php | 165 ++++++++++++++++++ ...> class-wp-css-compound-selector-list.php} | 126 ++++--------- .../class-wp-css-compound-selector.php | 4 +- .../html-api/class-wp-css-id-selector.php | 7 +- .../html-api/class-wp-css-type-selector.php | 4 +- .../html-api/class-wp-html-processor.php | 11 +- .../html-api/class-wp-html-tag-processor.php | 69 ++++++++ ...face-wp-css-html-tag-processor-matcher.php | 8 + src/wp-settings.php | 4 +- .../html-api/wpCssComplexSelectorList.php | 107 ++++++++++++ ...sing.php => wpCssCompoundSelectorList.php} | 59 +------ .../tests/html-api/wpHtmlProcessor-select.php | 10 ++ .../html-api/wpHtmlTagProcessor-select.php | 92 ++++++++++ 15 files changed, 520 insertions(+), 162 deletions(-) create mode 100644 src/wp-includes/html-api/class-wp-css-complex-selector-list.php rename src/wp-includes/html-api/{class-wp-css-selector.php => class-wp-css-compound-selector-list.php} (87%) create mode 100644 src/wp-includes/html-api/interface-wp-css-html-tag-processor-matcher.php create mode 100644 tests/phpunit/tests/html-api/wpCssComplexSelectorList.php rename tests/phpunit/tests/html-api/{wpCssSelector-parsing.php => wpCssCompoundSelectorList.php} (89%) create mode 100644 tests/phpunit/tests/html-api/wpHtmlTagProcessor-select.php diff --git a/src/wp-includes/html-api/class-wp-css-attribute-selector.php b/src/wp-includes/html-api/class-wp-css-attribute-selector.php index be7332c85b72d..76ccdf3804b36 100644 --- a/src/wp-includes/html-api/class-wp-css-attribute-selector.php +++ b/src/wp-includes/html-api/class-wp-css-attribute-selector.php @@ -1,7 +1,9 @@ get_attribute( $this->name ); if ( null === $att_value ) { return false; @@ -76,17 +78,17 @@ public function matches( WP_HTML_Processor $processor ): bool { * @return Generator */ private function whitespace_delimited_list( string $input ): Generator { - $offset = strspn( $input, WP_CSS_Selector::WHITESPACE_CHARACTERS ); + $offset = strspn( $input, self::WHITESPACE_CHARACTERS ); while ( $offset < strlen( $input ) ) { // Find the byte length until the next boundary. - $length = strcspn( $input, WP_CSS_Selector::WHITESPACE_CHARACTERS, $offset ); + $length = strcspn( $input, self::WHITESPACE_CHARACTERS, $offset ); if ( 0 === $length ) { return; } $value = substr( $input, $offset, $length ); - $offset += $length + strspn( $input, WP_CSS_Selector::WHITESPACE_CHARACTERS, $offset + $length ); + $offset += $length + strspn( $input, self::WHITESPACE_CHARACTERS, $offset + $length ); yield $value; } diff --git a/src/wp-includes/html-api/class-wp-css-class-selector.php b/src/wp-includes/html-api/class-wp-css-class-selector.php index c4f858d4a05d9..c3e7ced008a6e 100644 --- a/src/wp-includes/html-api/class-wp-css-class-selector.php +++ b/src/wp-includes/html-api/class-wp-css-class-selector.php @@ -1,7 +1,7 @@ has_class( $this->ident ); } diff --git a/src/wp-includes/html-api/class-wp-css-complex-selector-list.php b/src/wp-includes/html-api/class-wp-css-complex-selector-list.php new file mode 100644 index 0000000000000..f3769a035f6e5 --- /dev/null +++ b/src/wp-includes/html-api/class-wp-css-complex-selector-list.php @@ -0,0 +1,165 @@ + in the grammar. See {@see WP_CSS_Compound_Selector_List} for more details on the grammar. + * + * This class supports the same selector syntax as {@see WP_CSS_Compound_Selector_List} as well as: + * - The following combinators: + * - Next sibling (`el + el`) + * - Subsequent sibling (`el ~ el`) + * + * @since TBD + * + * @access private + */ +class WP_CSS_Complex_Selector_List extends WP_CSS_Compound_Selector_List implements WP_CSS_HTML_Processor_Matcher { + /** + * Takes a CSS selector string and returns an instance of itself or `null` if the selector + * string is invalid or unsupported. + * + * @since TBD + * + * @param string $input CSS selectors. + * @return static|null + */ + public static function from_selectors( string $input ) { + // > A selector string is a list of one or more complex selectors ([SELECTORS4], section 3.1) that may be surrounded by whitespace… + $input = trim( $input, " \t\r\n\r" ); + + if ( '' === $input ) { + return null; + } + + /* + * > The input stream consists of the filtered code points pushed into it as the input byte stream is decoded. + * > + * > To filter code points from a stream of (unfiltered) code points input: + * > Replace any U+000D CARRIAGE RETURN (CR) code points, U+000C FORM FEED (FF) code points, or pairs of U+000D CARRIAGE RETURN (CR) followed by U+000A LINE FEED (LF) in input by a single U+000A LINE FEED (LF) code point. + * > Replace any U+0000 NULL or surrogate code points in input with U+FFFD REPLACEMENT CHARACTER (�). + * + * https://www.w3.org/TR/css-syntax-3/#input-preprocessing + */ + $input = str_replace( array( "\r\n" ), "\n", $input ); + $input = str_replace( array( "\r", "\f" ), "\n", $input ); + $input = str_replace( "\0", "\u{FFFD}", $input ); + + $offset = 0; + + $selector = self::parse_complex_selector( $input, $offset ); + if ( null === $selector ) { + return null; + } + self::parse_whitespace( $input, $offset ); + + $selectors = array( $selector ); + while ( $offset < strlen( $input ) ) { + // Each loop should stop on a `,` selector list delimiter. + if ( ',' !== $input[ $offset ] ) { + return null; + } + ++$offset; + self::parse_whitespace( $input, $offset ); + $selector = self::parse_complex_selector( $input, $offset ); + if ( null === $selector ) { + return null; + } + $selectors[] = $selector; + self::parse_whitespace( $input, $offset ); + } + + return new self( $selectors ); + } + + /* + * ------------------------------ + * Selector parsing functionality + * ------------------------------ + */ + + /** + * Parses a complex selector. + * + * > = [ ? ]* + * + * @return WP_CSS_Complex_Selector|null + */ + final protected static function parse_complex_selector( string $input, int &$offset ): ?WP_CSS_Complex_Selector { + if ( $offset >= strlen( $input ) ) { + return null; + } + + $updated_offset = $offset; + $selector = self::parse_compound_selector( $input, $updated_offset ); + if ( null === $selector ) { + return null; + } + + $selectors = array( $selector ); + $has_preceding_subclass_selector = null !== $selector->subclass_selectors; + + $found_whitespace = self::parse_whitespace( $input, $updated_offset ); + while ( $updated_offset < strlen( $input ) ) { + if ( + WP_CSS_Complex_Selector::COMBINATOR_CHILD === $input[ $updated_offset ] || + WP_CSS_Complex_Selector::COMBINATOR_NEXT_SIBLING === $input[ $updated_offset ] || + WP_CSS_Complex_Selector::COMBINATOR_SUBSEQUENT_SIBLING === $input[ $updated_offset ] + ) { + $combinator = $input[ $updated_offset ]; + ++$updated_offset; + self::parse_whitespace( $input, $updated_offset ); + + // Failure to find a selector here is a parse error + $selector = self::parse_compound_selector( $input, $updated_offset ); + } elseif ( $found_whitespace ) { + /* + * Whitespace is ambiguous, it could be a descendant combinator or + * insignificant whitespace. + */ + $selector = self::parse_compound_selector( $input, $updated_offset ); + if ( null === $selector ) { + break; + } + $combinator = WP_CSS_Complex_Selector::COMBINATOR_DESCENDANT; + } else { + break; + } + + if ( null === $selector ) { + return null; + } + + // `div > .className` is valid, but `.className > div` is not. + if ( $has_preceding_subclass_selector ) { + throw new Exception( 'Unsupported non-final subclass selector.' ); + } + $has_preceding_subclass_selector = null !== $selector->subclass_selectors; + + $selectors[] = $combinator; + $selectors[] = $selector; + + $found_whitespace = self::parse_whitespace( $input, $updated_offset ); + } + $offset = $updated_offset; + return new WP_CSS_Complex_Selector( $selectors ); + } +} diff --git a/src/wp-includes/html-api/class-wp-css-selector.php b/src/wp-includes/html-api/class-wp-css-compound-selector-list.php similarity index 87% rename from src/wp-includes/html-api/class-wp-css-selector.php rename to src/wp-includes/html-api/class-wp-css-compound-selector-list.php index 487c100ab47e4..2aae51d671f6b 100644 --- a/src/wp-includes/html-api/class-wp-css-selector.php +++ b/src/wp-includes/html-api/class-wp-css-compound-selector-list.php @@ -1,6 +1,6 @@ in the grammar. The supported grammar is: + * This class is analogous to in the grammar. The supported grammar is: * * = * = # @@ -38,6 +40,10 @@ * * @link https://www.w3.org/TR/selectors/#grammar Refer to the grammar for more details. * + * This class of selectors does not support "complex" selectors. That is any selector with a + * combinator such as descendent (`.ancestor .descendant`) or child (`.parent > .child`). + * See {@see WP_CSS_Complex_Selector_List} for support of some combinators. + * * Note that this grammar has been adapted and does not support the full CSS selector grammar. * Supported selector syntax: * - Type selectors (tag names, e.g. `div`) @@ -50,12 +56,10 @@ * - child (`el > .child`) * * Unsupported selector syntax: - * - Pseudo-element selectors (e.g. `::before`) - * - Pseudo-class selectors (e.g. `:hover` or `:nth-child(2)`) - * - Namespace prefixes (e.g. `svg|title` or `[xlink|href]`) - * - The following combinators: - * - Next sibling (`el + el`) - * - Subsequent sibling (`el ~ el`) + * - Pseudo-element selectors (`::before`) + * - Pseudo-class selectors (`:hover` or `:nth-child(2)`) + * - Namespace prefixes (`svg|title` or `[xlink|href]`) + * - No combinators are supported (descendant, child, next sibling, subsequent sibling) * * Future ideas: * - Namespace type selectors could be implemented with select namespaces in order to @@ -72,8 +76,12 @@ * @link https://www.w3.org/TR/selectors-api2/ * @link https://www.w3.org/TR/selectors-4/ */ -class WP_CSS_Selector implements WP_CSS_HTML_Processor_Matcher { - public function matches( WP_HTML_Processor $processor ): bool { +class WP_CSS_Compound_Selector_List implements WP_CSS_HTML_Tag_Processor_Matcher { + /** + * @param WP_HTML_Tag_Processor $processor + * @return bool + */ + public function matches( $processor ): bool { if ( $processor->get_token_type() !== '#tag' ) { return false; } @@ -87,14 +95,16 @@ public function matches( WP_HTML_Processor $processor ): bool { } /** - * @var array + * Array of selectors. + * + * @var array */ private $selectors; /** * Constructor. * - * @param array $selectors + * @param array $selectors Array of selectors. */ protected function __construct( array $selectors ) { $this->selectors = $selectors; @@ -107,10 +117,9 @@ protected function __construct( array $selectors ) { * @since TBD * * @param string $input CSS selectors. - * @return self|null + * @return static|null */ - public static function from_selectors( string $input ): ?self { - // > A selector string is a list of one or more complex selectors ([SELECTORS4], section 3.1) that may be surrounded by whitespace… + public static function from_selectors( string $input ) { $input = trim( $input, " \t\r\n\r" ); if ( '' === $input ) { @@ -132,7 +141,7 @@ public static function from_selectors( string $input ): ?self { $offset = 0; - $selector = self::parse_complex_selector( $input, $offset ); + $selector = self::parse_compound_selector( $input, $offset ); if ( null === $selector ) { return null; } @@ -146,7 +155,7 @@ public static function from_selectors( string $input ): ?self { } ++$offset; self::parse_whitespace( $input, $offset ); - $selector = self::parse_complex_selector( $input, $offset ); + $selector = self::parse_compound_selector( $input, $offset ); if ( null === $selector ) { return null; } @@ -391,73 +400,6 @@ final protected static function parse_compound_selector( string $input, int &$of return null; } - /** - * Parses a complex selector. - * - * > = [ ? ]* - * - * @return WP_CSS_Complex_Selector|null - */ - final protected static function parse_complex_selector( string $input, int &$offset ): ?WP_CSS_Complex_Selector { - if ( $offset >= strlen( $input ) ) { - return null; - } - - $updated_offset = $offset; - $selector = self::parse_compound_selector( $input, $updated_offset ); - if ( null === $selector ) { - return null; - } - - $selectors = array( $selector ); - $has_preceding_subclass_selector = null !== $selector->subclass_selectors; - - $found_whitespace = self::parse_whitespace( $input, $updated_offset ); - while ( $updated_offset < strlen( $input ) ) { - if ( - WP_CSS_Complex_Selector::COMBINATOR_CHILD === $input[ $updated_offset ] || - WP_CSS_Complex_Selector::COMBINATOR_NEXT_SIBLING === $input[ $updated_offset ] || - WP_CSS_Complex_Selector::COMBINATOR_SUBSEQUENT_SIBLING === $input[ $updated_offset ] - ) { - $combinator = $input[ $updated_offset ]; - ++$updated_offset; - self::parse_whitespace( $input, $updated_offset ); - - // Failure to find a selector here is a parse error - $selector = self::parse_compound_selector( $input, $updated_offset ); - } elseif ( $found_whitespace ) { - /* - * Whitespace is ambiguous, it could be a descendant combinator or - * insignificant whitespace. - */ - $selector = self::parse_compound_selector( $input, $updated_offset ); - if ( null === $selector ) { - break; - } - $combinator = WP_CSS_Complex_Selector::COMBINATOR_DESCENDANT; - } else { - break; - } - - if ( null === $selector ) { - return null; - } - - // `div > .className` is valid, but `.className > div` is not. - if ( $has_preceding_subclass_selector ) { - throw new Exception( 'Unsupported non-final subclass selector.' ); - } - $has_preceding_subclass_selector = null !== $selector->subclass_selectors; - - $selectors[] = $combinator; - $selectors[] = $selector; - - $found_whitespace = self::parse_whitespace( $input, $updated_offset ); - } - $offset = $updated_offset; - return new WP_CSS_Complex_Selector( $selectors ); - } - /** * Parses a subclass selector. * @@ -496,7 +438,7 @@ private static function parse_subclass_selector( string $input, int &$offset ) { const UTF8_MAX_CODEPOINT_VALUE = 0x10FFFF; const WHITESPACE_CHARACTERS = " \t\r\n\f"; - public static function parse_whitespace( string $input, int &$offset ): bool { + final public static function parse_whitespace( string $input, int &$offset ): bool { $length = strspn( $input, self::WHITESPACE_CHARACTERS, $offset ); $advanced = $length > 0; $offset += $length; @@ -692,9 +634,9 @@ final protected static function parse_string( string $input, int &$offset ): ?st * * @param string $input * @param int $offset - * @return string|null + * @return string */ - final protected static function consume_escaped_codepoint( $input, &$offset ): ?string { + final protected static function consume_escaped_codepoint( $input, &$offset ): string { $hex_length = strspn( $input, '0123456789abcdefABCDEF', $offset, 6 ); if ( $hex_length > 0 ) { /** @@ -771,7 +713,7 @@ final protected static function consume_escaped_codepoint( $input, &$offset ): ? * @param int $offset The byte offset in the string. * @return bool True if the next two codepoints are a valid escape, otherwise false. */ - private static function next_two_are_valid_escape( string $input, int $offset ): bool { + final protected static function next_two_are_valid_escape( string $input, int $offset ): bool { if ( $offset + 1 >= strlen( $input ) ) { return false; } @@ -858,7 +800,7 @@ final protected static function is_ident_codepoint( string $input, int $offset ) * @param int $offset The byte offset in the string. * @return bool True if the next three codepoints would start an ident sequence, otherwise false. */ - private static function check_if_three_code_points_would_start_an_ident_sequence( string $input, int $offset ): bool { + final protected static function check_if_three_code_points_would_start_an_ident_sequence( string $input, int $offset ): bool { if ( $offset >= strlen( $input ) ) { return false; } diff --git a/src/wp-includes/html-api/class-wp-css-compound-selector.php b/src/wp-includes/html-api/class-wp-css-compound-selector.php index 1162aaef78c1e..e64695abe9ab3 100644 --- a/src/wp-includes/html-api/class-wp-css-compound-selector.php +++ b/src/wp-includes/html-api/class-wp-css-compound-selector.php @@ -5,8 +5,8 @@ * * > = [ ? * ]! */ -final class WP_CSS_Compound_Selector implements WP_CSS_HTML_Processor_Matcher { - public function matches( WP_HTML_Processor $processor ): bool { +final class WP_CSS_Compound_Selector implements WP_CSS_HTML_Tag_Processor_Matcher { + public function matches( WP_HTML_Tag_Processor $processor ): bool { if ( $this->type_selector ) { if ( ! $this->type_selector->matches( $processor ) ) { return false; diff --git a/src/wp-includes/html-api/class-wp-css-id-selector.php b/src/wp-includes/html-api/class-wp-css-id-selector.php index cc0589327c829..83339ff839317 100644 --- a/src/wp-includes/html-api/class-wp-css-id-selector.php +++ b/src/wp-includes/html-api/class-wp-css-id-selector.php @@ -1,6 +1,6 @@ ident = $ident; } - public function matches( WP_HTML_Processor $processor ): bool { + public function matches( WP_HTML_Tag_Processor $processor ): bool { $id = $processor->get_attribute( 'id' ); if ( ! is_string( $id ) ) { return false; } - $case_insensitive = method_exists( $processor, 'is_quirks_mode' ) && $processor->is_quirks_mode(); + $case_insensitive = $processor->is_quirks_mode(); + return $case_insensitive ? 0 === strcasecmp( $id, $this->ident ) : $processor->get_attribute( 'id' ) === $this->ident; diff --git a/src/wp-includes/html-api/class-wp-css-type-selector.php b/src/wp-includes/html-api/class-wp-css-type-selector.php index a2dcd16521cb5..c65adce14047d 100644 --- a/src/wp-includes/html-api/class-wp-css-type-selector.php +++ b/src/wp-includes/html-api/class-wp-css-type-selector.php @@ -1,7 +1,7 @@ get_tag(); if ( null === $tag_name ) { return false; diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 9f7a43acaebbd..bbca730279876 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -657,9 +657,14 @@ public function get_unsupported_exception() { * @param string $selector_string Selector string. * @return Generator A generator pausing on each tag matching the selector. */ - public function select_all( string $selector_string ): Generator { - $selector = WP_CSS_Selector::from_selectors( $selector_string ); + public function select_all( $selector_string ): Generator { + $selector = WP_CSS_Complex_Selector_List::from_selectors( $selector_string ); if ( null === $selector ) { + _doing_it_wrong( + __METHOD__, + sprintf( 'Received unsupported or invalid selector "%s".', $selector_string ), + '6.8' + ); return; } @@ -692,7 +697,7 @@ public function select_all( string $selector_string ): Generator { * @param string $selector_string * @return bool True if a matching tag was found, otherwise false. */ - public function select( string $selector_string ) { + public function select( string $selector_string ): bool { foreach ( $this->select_all( $selector_string ) as $_ ) { return true; } diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index 7dadbc1bebdb2..a7633291b6bb2 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -860,6 +860,75 @@ public function change_parsing_namespace( string $new_namespace ): bool { return true; } + /** + * Progress through a document pausing on tags matching the provided CSS selector string. + * + * @example + * + * $processor = new WP_HTML_Tag_Processor( + * 'Example' + * ); + * foreach ( $processor->select_all( 'meta[property^="og:" i]' ) as $_ ) { + * // Loop is entered twice. + * var_dump( + * $processor->get_tag(), // string(4) "META" + * $processor->get_attribute( 'property' ), // string(7) "og:type" / string(14) "og:description" + * $processor->get_attribute( 'content' ), // string(7) "website" / string(11) "An example." + * ); + * } + * + * @since TBD + * + * @param string $selector_string Selector string. + * @return Generator A generator pausing on each tag matching the selector. + */ + public function select_all( $selector_string ): Generator { + $selector = WP_CSS_Compound_Selector_List::from_selectors( $selector_string ); + if ( null === $selector ) { + _doing_it_wrong( + __METHOD__, + sprintf( 'Received unsupported or invalid selector "%s".', $selector_string ), + '6.8' + ); + return; + } + + while ( $this->next_tag() ) { + if ( $selector->matches( $this ) ) { + yield; + } + } + } + + /** + * Move to the next tag matching the provided CSS selector string. + * + * This method will stop at the next match. To progress through all matches, use + * the {@see WP_HTML_Tag_Processor::select_all()} method. + * + * @example + * + * $processor = new WP_HTML_Tag_Processor( + * 'Example' + * ); + * $processor->select( 'meta[charset]' ); + * var_dump( + * $processor->get_tag(), // string(4) "META" + * $processor->get_attribute( 'charset' ), // string(5) "utf-8" + * ); + * + * @since TBD + * + * @param string $selector_string + * @return bool True if a matching tag was found, otherwise false. + */ + public function select( string $selector_string ): bool { + foreach ( $this->select_all( $selector_string ) as $_ ) { + return true; + } + return false; + } + /** * Finds the next tag matching the $query. * diff --git a/src/wp-includes/html-api/interface-wp-css-html-tag-processor-matcher.php b/src/wp-includes/html-api/interface-wp-css-html-tag-processor-matcher.php new file mode 100644 index 0000000000000..73d108150bb95 --- /dev/null +++ b/src/wp-includes/html-api/interface-wp-css-html-tag-processor-matcher.php @@ -0,0 +1,8 @@ +test_class = new class() extends WP_CSS_Complex_Selector_List { + public function __construct() { + parent::__construct( array() ); + } + + public static function test_parse_complex_selector( string $input, int &$offset ) { + return self::parse_complex_selector( $input, $offset ); + } + }; + } + + /** + * @ticket TBD + */ + public function test_parse_complex_selector() { + $input = 'el1 > .child#bar[baz=quux] , rest'; + $offset = 0; + $sel = $this->test_class::test_parse_complex_selector( $input, $offset ); + + $this->assertSame( 3, count( $sel->selectors ) ); + + $this->assertSame( 'el1', $sel->selectors[2]->type_selector->ident ); + $this->assertNull( $sel->selectors[2]->subclass_selectors ); + + $this->assertSame( WP_CSS_Complex_Selector::COMBINATOR_CHILD, $sel->selectors[1] ); + + $this->assertSame( 3, count( $sel->selectors[0]->subclass_selectors ) ); + $this->assertNull( $sel->selectors[0]->type_selector ); + $this->assertSame( 3, count( $sel->selectors[0]->subclass_selectors ) ); + $this->assertSame( 'child', $sel->selectors[0]->subclass_selectors[0]->ident ); + + $this->assertSame( ', rest', substr( $input, $offset ) ); + } + + /** + * @ticket TBD + */ + public function test_parse_invalid_complex_selector() { + $input = 'el.foo#bar[baz=quux] > , rest'; + $offset = 0; + $result = $this->test_class::test_parse_complex_selector( $input, $offset ); + $this->assertNull( $result ); + } + + /** + * @ticket TBD + */ + public function test_parse_empty_complex_selector() { + $input = ''; + $offset = 0; + $result = $this->test_class::test_parse_complex_selector( $input, $offset ); + $this->assertNull( $result ); + } + + /** + * @ticket TBD + */ + public function test_parse_complex_selector_list() { + $input = 'el1 el2 el.foo#bar[baz=quux], second > selector'; + $result = WP_CSS_Complex_Selector_List::from_selectors( $input ); + $this->assertNotNull( $result ); + } + + /** + * @ticket TBD + */ + public function test_parse_invalid_selector_list() { + $input = 'el,,'; + $result = WP_CSS_Complex_Selector_List::from_selectors( $input ); + $this->assertNull( $result ); + } + + /** + * @ticket TBD + */ + public function test_parse_invalid_selector_list2() { + $input = 'el!'; + $result = WP_CSS_Complex_Selector_List::from_selectors( $input ); + $this->assertNull( $result ); + } + + /** + * @ticket TBD + */ + public function test_parse_empty_selector_list() { + $input = " \t \t\n\r\f"; + $result = WP_CSS_Complex_Selector_List::from_selectors( $input ); + $this->assertNull( $result ); + } +} diff --git a/tests/phpunit/tests/html-api/wpCssSelector-parsing.php b/tests/phpunit/tests/html-api/wpCssCompoundSelectorList.php similarity index 89% rename from tests/phpunit/tests/html-api/wpCssSelector-parsing.php rename to tests/phpunit/tests/html-api/wpCssCompoundSelectorList.php index 4caa186158149..d94b61d49c14e 100644 --- a/tests/phpunit/tests/html-api/wpCssSelector-parsing.php +++ b/tests/phpunit/tests/html-api/wpCssCompoundSelectorList.php @@ -10,12 +10,12 @@ * * @group html-api */ -class Tests_HtmlApi_WpCssSelector_Parsing extends WP_UnitTestCase { +class Tests_HtmlApi_WpCssCompoundSelectorList extends WP_UnitTestCase { private $test_class; public function set_up(): void { parent::set_up(); - $this->test_class = new class() extends WP_CSS_Selector { + $this->test_class = new class() extends WP_CSS_Compound_Selector_List { public function __construct() { parent::__construct( array() ); } @@ -51,10 +51,6 @@ public static function test_parse_compound_selector( string $input, int &$offset return self::parse_compound_selector( $input, $offset ); } - public static function test_parse_complex_selector( string $input, int &$offset ) { - return self::parse_complex_selector( $input, $offset ); - } - /* * Utilities */ @@ -402,53 +398,12 @@ public function test_parse_empty_selector() { $this->assertSame( 0, $offset ); } - /** - * @ticket TBD - */ - public function test_parse_complex_selector() { - $input = 'el1 > .child#bar[baz=quux] , rest'; - $offset = 0; - $sel = $this->test_class::test_parse_complex_selector( $input, $offset ); - - $this->assertSame( 3, count( $sel->selectors ) ); - - $this->assertSame( 'el1', $sel->selectors[2]->type_selector->ident ); - $this->assertNull( $sel->selectors[2]->subclass_selectors ); - - $this->assertSame( WP_CSS_Complex_Selector::COMBINATOR_CHILD, $sel->selectors[1] ); - - $this->assertSame( 3, count( $sel->selectors[0]->subclass_selectors ) ); - $this->assertNull( $sel->selectors[0]->type_selector ); - $this->assertSame( 3, count( $sel->selectors[0]->subclass_selectors ) ); - $this->assertSame( 'child', $sel->selectors[0]->subclass_selectors[0]->ident ); - - $this->assertSame( ', rest', substr( $input, $offset ) ); - } - - /** - * @ticket TBD - */ - public function test_parse_invalid_complex_selector() { - $input = 'el.foo#bar[baz=quux] > , rest'; - $offset = 0; - $result = $this->test_class::test_parse_complex_selector( $input, $offset ); - $this->assertNull( $result ); - } - - public function test_parse_empty_complex_selector() { - $input = ''; - $offset = 0; - $result = $this->test_class::test_parse_complex_selector( $input, $offset ); - $this->assertNull( $result ); - } - - /** * @ticket TBD */ public function test_parse_selector_list() { - $input = 'el1 el2 el.foo#bar[baz=quux], rest'; - $result = WP_CSS_Selector::from_selectors( $input ); + $input = 'el1, el2, el.foo#bar[baz=quux]'; + $result = WP_CSS_Compound_Selector_List::from_selectors( $input ); $this->assertNotNull( $result ); } @@ -457,7 +412,7 @@ public function test_parse_selector_list() { */ public function test_parse_invalid_selector_list() { $input = 'el,,'; - $result = WP_CSS_Selector::from_selectors( $input ); + $result = WP_CSS_Compound_Selector_List::from_selectors( $input ); $this->assertNull( $result ); } @@ -466,7 +421,7 @@ public function test_parse_invalid_selector_list() { */ public function test_parse_invalid_selector_list2() { $input = 'el!'; - $result = WP_CSS_Selector::from_selectors( $input ); + $result = WP_CSS_Compound_Selector_List::from_selectors( $input ); $this->assertNull( $result ); } @@ -475,7 +430,7 @@ public function test_parse_invalid_selector_list2() { */ public function test_parse_empty_selector_list() { $input = " \t \t\n\r\f"; - $result = WP_CSS_Selector::from_selectors( $input ); + $result = WP_CSS_Compound_Selector_List::from_selectors( $input ); $this->assertNull( $result ); } } diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor-select.php b/tests/phpunit/tests/html-api/wpHtmlProcessor-select.php index c3a1e4121ecab..733a7135f1b17 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor-select.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor-select.php @@ -66,4 +66,14 @@ public function test_select_all() { } $this->assertSame( 4, $count ); } + + /** + * @ticket TBD + * + * @expectedIncorrectUsage WP_HTML_Processor::select_all + */ + public function test_invalid_selector() { + $processor = WP_HTML_Processor::create_fragment( 'irrelevant' ); + $this->assertFalse( $processor->select( '[invalid!selector]' ) ); + } } diff --git a/tests/phpunit/tests/html-api/wpHtmlTagProcessor-select.php b/tests/phpunit/tests/html-api/wpHtmlTagProcessor-select.php new file mode 100644 index 0000000000000..c42c69ff0a095 --- /dev/null +++ b/tests/phpunit/tests/html-api/wpHtmlTagProcessor-select.php @@ -0,0 +1,92 @@ +' ); + $this->assertFalse( $processor->select( 'div' ) ); + } + + /** + * @ticket TBD + * + * @dataProvider data_selectors + */ + public function test_select( string $html, string $selector ) { + $processor = new WP_HTML_Tag_Processor( $html ); + $this->assertTrue( $processor->select( $selector ) ); + $this->assertTrue( $processor->get_attribute( 'match' ) ); + } + + /** + * Data provider. + * + * @return array + */ + public static function data_selectors(): array { + return array( + 'simple type' => array( '

', 'div' ), + 'any type' => array( '', '*' ), + 'simple class' => array( '
', '.x' ), + 'simple id' => array( '
', '#x' ), + 'simple attribute' => array( '
', '[att]' ), + 'attribute value' => array( '
', '[att=val]' ), + 'attribute quoted value' => array( '
', '[att="::"]' ), + + 'list' => array( '

', 'a, p' ), + 'compound' => array( '

', 'section[att~="bar"]' ), + ); + } + + /** + * @ticket TBD + */ + public function test_select_all() { + $processor = new WP_HTML_Tag_Processor( '

' ); + $count = 0; + foreach ( $processor->select_all( 'div, .x, rect, #y' ) as $_ ) { + ++$count; + $this->assertTrue( $processor->get_attribute( 'match' ) ); + } + $this->assertSame( 4, $count ); + } + + /** + * @ticket TBD + * + * @expectedIncorrectUsage WP_HTML_Tag_Processor::select_all + * + * @dataProvider data_invalid_selectors + */ + public function test_invalid_selector( string $selector ) { + $processor = new WP_HTML_Tag_Processor( 'irrelevant' ); + $this->assertFalse( $processor->select( $selector ) ); + } + + /** + * Data provider. + * + * @return array + */ + public static function data_invalid_selectors(): array { + return array( + 'complex descendant' => array( 'div *' ), + 'complex child' => array( 'div > *' ), + 'invalid selector' => array( '[invalid!selector]' ), + ); + } +} From 2036a83f77a419fd1f3df89c7c7a316d4a42d5bb Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 4 Dec 2024 21:36:19 +0100 Subject: [PATCH 080/129] Simplify whitspace splitting function --- .../html-api/class-wp-css-attribute-selector.php | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-css-attribute-selector.php b/src/wp-includes/html-api/class-wp-css-attribute-selector.php index 76ccdf3804b36..1a7a9ffb37716 100644 --- a/src/wp-includes/html-api/class-wp-css-attribute-selector.php +++ b/src/wp-includes/html-api/class-wp-css-attribute-selector.php @@ -78,16 +78,15 @@ public function matches( WP_HTML_Tag_Processor $processor ): bool { * @return Generator */ private function whitespace_delimited_list( string $input ): Generator { + // Start by skipping whitespace. $offset = strspn( $input, self::WHITESPACE_CHARACTERS ); while ( $offset < strlen( $input ) ) { // Find the byte length until the next boundary. $length = strcspn( $input, self::WHITESPACE_CHARACTERS, $offset ); - if ( 0 === $length ) { - return; - } + $value = substr( $input, $offset, $length ); - $value = substr( $input, $offset, $length ); + // Move past trailing whitespace. $offset += $length + strspn( $input, self::WHITESPACE_CHARACTERS, $offset + $length ); yield $value; From 3421a4e0d634686fd820db906eb6077503985fe8 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 4 Dec 2024 21:41:15 +0100 Subject: [PATCH 081/129] Remove unreachable code --- src/wp-includes/html-api/class-wp-css-attribute-selector.php | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-css-attribute-selector.php b/src/wp-includes/html-api/class-wp-css-attribute-selector.php index 1a7a9ffb37716..17787dd70815b 100644 --- a/src/wp-includes/html-api/class-wp-css-attribute-selector.php +++ b/src/wp-includes/html-api/class-wp-css-attribute-selector.php @@ -68,8 +68,6 @@ public function matches( WP_HTML_Tag_Processor $processor ): bool { : strpos( $att_value, $this->value ) ); } - - throw new Exception( 'Unreachable' ); } /** From 784b2d913cbf469a3847b93a46c9c202f19091b7 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 4 Dec 2024 21:41:25 +0100 Subject: [PATCH 082/129] Add a lot of selector integration tests --- .../html-api/wpHtmlTagProcessor-select.php | 44 +++++++++++++++---- 1 file changed, 35 insertions(+), 9 deletions(-) diff --git a/tests/phpunit/tests/html-api/wpHtmlTagProcessor-select.php b/tests/phpunit/tests/html-api/wpHtmlTagProcessor-select.php index c42c69ff0a095..66f32f905c04f 100644 --- a/tests/phpunit/tests/html-api/wpHtmlTagProcessor-select.php +++ b/tests/phpunit/tests/html-api/wpHtmlTagProcessor-select.php @@ -39,16 +39,42 @@ public function test_select( string $html, string $selector ) { */ public static function data_selectors(): array { return array( - 'simple type' => array( '

', 'div' ), - 'any type' => array( '', '*' ), - 'simple class' => array( '
', '.x' ), - 'simple id' => array( '
', '#x' ), - 'simple attribute' => array( '
', '[att]' ), - 'attribute value' => array( '
', '[att=val]' ), - 'attribute quoted value' => array( '
', '[att="::"]' ), + 'simple type' => array( '

', 'div' ), + 'any type' => array( '
', '*' ), + 'simple class' => array( '
', '.x' ), + 'simple id' => array( '
', '#x' ), + 'boolean attribute' => array( '
', '[att]' ), + 'boolean attribute with string match' => array( '
', '[att=""]' ), - 'list' => array( '

', 'a, p' ), - 'compound' => array( '

', 'section[att~="bar"]' ), + 'attribute value' => array( '
', '[att=val]' ), + 'attribute quoted value' => array( '
', '[att="::"]' ), + 'attribute case insensitive' => array( '
', '[att="VAL"i]' ), + 'attribute case sensitive mod' => array( '
', '[att="val"s]' ), + + 'attribute one of' => array( '
', '[att~="b"]' ), + 'attribute one of insensitive' => array( '
', '[att~="b"i]' ), + 'attribute one of mod sensitive' => array( '
', '[att~="b"s]' ), + 'attribute one of whitespace cases' => array( "
", '[att~="b"]' ), + + 'attribute with-hyphen (no hyphen)' => array( '

', '[att|="special"]' ), + 'attribute with-hyphen (hyphen prefix)' => array( '

', '[att|="special"]' ), + 'attribute with-hyphen insensitive' => array( '

', '[att|="special"i]' ), + 'attribute with-hyphen sensitive mod' => array( '

', '[att|="special"s]' ), + + 'attribute prefixed' => array( '

', '[att^="p"]' ), + 'attribute prefixed insensitive' => array( '

', '[att^="p"i]' ), + 'attribute prefixed sensitive mod' => array( '

', '[att^="p"s]' ), + + 'attribute suffixed' => array( '

', '[att$="x"]' ), + 'attribute suffixed insensitive' => array( '

', '[att$="x"i]' ), + 'attribute suffixed sensitive mod' => array( '

', '[att$="x"s]' ), + + 'attribute contains' => array( '

', '[att*="x"]' ), + 'attribute contains insensitive' => array( '

', '[att*="x"i]' ), + 'attribute contains sensitive mod' => array( '

', '[att*="x"s]' ), + + 'list' => array( '

', 'a, p' ), + 'compound' => array( '

', 'section[att="bar"]' ), ); } From 4d4c5fe2db713a4a85a8c4073e3e39f44731d140 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 4 Dec 2024 21:48:39 +0100 Subject: [PATCH 083/129] Extract normalize input method --- .../class-wp-css-complex-selector-list.php | 16 +------ .../class-wp-css-compound-selector-list.php | 43 +++++++++++++------ 2 files changed, 30 insertions(+), 29 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-css-complex-selector-list.php b/src/wp-includes/html-api/class-wp-css-complex-selector-list.php index f3769a035f6e5..59b08532868a8 100644 --- a/src/wp-includes/html-api/class-wp-css-complex-selector-list.php +++ b/src/wp-includes/html-api/class-wp-css-complex-selector-list.php @@ -43,26 +43,12 @@ class WP_CSS_Complex_Selector_List extends WP_CSS_Compound_Selector_List impleme * @return static|null */ public static function from_selectors( string $input ) { - // > A selector string is a list of one or more complex selectors ([SELECTORS4], section 3.1) that may be surrounded by whitespace… - $input = trim( $input, " \t\r\n\r" ); + $input = self::normalize_selector_input( $input ); if ( '' === $input ) { return null; } - /* - * > The input stream consists of the filtered code points pushed into it as the input byte stream is decoded. - * > - * > To filter code points from a stream of (unfiltered) code points input: - * > Replace any U+000D CARRIAGE RETURN (CR) code points, U+000C FORM FEED (FF) code points, or pairs of U+000D CARRIAGE RETURN (CR) followed by U+000A LINE FEED (LF) in input by a single U+000A LINE FEED (LF) code point. - * > Replace any U+0000 NULL or surrogate code points in input with U+FFFD REPLACEMENT CHARACTER (�). - * - * https://www.w3.org/TR/css-syntax-3/#input-preprocessing - */ - $input = str_replace( array( "\r\n" ), "\n", $input ); - $input = str_replace( array( "\r", "\f" ), "\n", $input ); - $input = str_replace( "\0", "\u{FFFD}", $input ); - $offset = 0; $selector = self::parse_complex_selector( $input, $offset ); diff --git a/src/wp-includes/html-api/class-wp-css-compound-selector-list.php b/src/wp-includes/html-api/class-wp-css-compound-selector-list.php index 2aae51d671f6b..a41b0ac9cd530 100644 --- a/src/wp-includes/html-api/class-wp-css-compound-selector-list.php +++ b/src/wp-includes/html-api/class-wp-css-compound-selector-list.php @@ -120,25 +120,12 @@ protected function __construct( array $selectors ) { * @return static|null */ public static function from_selectors( string $input ) { - $input = trim( $input, " \t\r\n\r" ); + $input = self::normalize_selector_input( $input ); if ( '' === $input ) { return null; } - /* - * > The input stream consists of the filtered code points pushed into it as the input byte stream is decoded. - * > - * > To filter code points from a stream of (unfiltered) code points input: - * > Replace any U+000D CARRIAGE RETURN (CR) code points, U+000C FORM FEED (FF) code points, or pairs of U+000D CARRIAGE RETURN (CR) followed by U+000A LINE FEED (LF) in input by a single U+000A LINE FEED (LF) code point. - * > Replace any U+0000 NULL or surrogate code points in input with U+FFFD REPLACEMENT CHARACTER (�). - * - * https://www.w3.org/TR/css-syntax-3/#input-preprocessing - */ - $input = str_replace( array( "\r\n" ), "\n", $input ); - $input = str_replace( array( "\r", "\f" ), "\n", $input ); - $input = str_replace( "\0", "\u{FFFD}", $input ); - $offset = 0; $selector = self::parse_compound_selector( $input, $offset ); @@ -842,4 +829,32 @@ final protected static function check_if_three_code_points_would_start_an_ident_ // > Return false. return self::is_ident_start_codepoint( $input, $offset ); } + + /** + * @todo doc… + */ + final protected static function normalize_selector_input( string $input ): string { + /* + * > A selector string is a list of one or more complex selectors ([SELECTORS4], section 3.1) that may be surrounded by whitespace… + * + * This list includes \f. + * A later step would normalize it to a known whitespace character, but it can be trimmed here as well. + */ + $input = trim( $input, " \t\r\n\r\f" ); + + /* + * > The input stream consists of the filtered code points pushed into it as the input byte stream is decoded. + * > + * > To filter code points from a stream of (unfiltered) code points input: + * > Replace any U+000D CARRIAGE RETURN (CR) code points, U+000C FORM FEED (FF) code points, or pairs of U+000D CARRIAGE RETURN (CR) followed by U+000A LINE FEED (LF) in input by a single U+000A LINE FEED (LF) code point. + * > Replace any U+0000 NULL or surrogate code points in input with U+FFFD REPLACEMENT CHARACTER (�). + * + * https://www.w3.org/TR/css-syntax-3/#input-preprocessing + */ + $input = str_replace( array( "\r\n" ), "\n", $input ); + $input = str_replace( array( "\r", "\f" ), "\n", $input ); + $input = str_replace( "\0", "\u{FFFD}", $input ); + + return $input; + } } From dbc37fc2d819057c9678364021d1d14ee8f91292 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 4 Dec 2024 21:52:54 +0100 Subject: [PATCH 084/129] tests --- tests/phpunit/tests/html-api/wpCssCompoundSelectorList.php | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/phpunit/tests/html-api/wpCssCompoundSelectorList.php b/tests/phpunit/tests/html-api/wpCssCompoundSelectorList.php index d94b61d49c14e..2a20e317338bd 100644 --- a/tests/phpunit/tests/html-api/wpCssCompoundSelectorList.php +++ b/tests/phpunit/tests/html-api/wpCssCompoundSelectorList.php @@ -366,6 +366,7 @@ public static function data_attribute_selectors(): array { 'Invalid: [att s]' => array( '[att s]' ), "Invalid: [att='val\\n']" => array( "[att='val\n']" ), 'Invalid: [att=val i ' => array( '[att=val i ' ), + 'Invalid: [att="val"ix' => array( '[att="val"ix' ), ); } From d241f31643a14f70ed3469121d6f45ce0db143d0 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 4 Dec 2024 21:57:08 +0100 Subject: [PATCH 085/129] Add nonfinal subclass selector test --- .../html-api/class-wp-css-complex-selector-list.php | 8 ++++++-- .../tests/html-api/wpCssComplexSelectorList.php | 10 ++++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-css-complex-selector-list.php b/src/wp-includes/html-api/class-wp-css-complex-selector-list.php index 59b08532868a8..0413b8dea426a 100644 --- a/src/wp-includes/html-api/class-wp-css-complex-selector-list.php +++ b/src/wp-includes/html-api/class-wp-css-complex-selector-list.php @@ -134,9 +134,13 @@ final protected static function parse_complex_selector( string $input, int &$off return null; } - // `div > .className` is valid, but `.className > div` is not. + /* + * Subclass selectors in non-final position is not supported: + * - `div > .className` is valid + * - `.className > div` is not + */ if ( $has_preceding_subclass_selector ) { - throw new Exception( 'Unsupported non-final subclass selector.' ); + return null; } $has_preceding_subclass_selector = null !== $selector->subclass_selectors; diff --git a/tests/phpunit/tests/html-api/wpCssComplexSelectorList.php b/tests/phpunit/tests/html-api/wpCssComplexSelectorList.php index 5b485a5029db5..5cceddbdddd30 100644 --- a/tests/phpunit/tests/html-api/wpCssComplexSelectorList.php +++ b/tests/phpunit/tests/html-api/wpCssComplexSelectorList.php @@ -59,6 +59,16 @@ public function test_parse_invalid_complex_selector() { $this->assertNull( $result ); } + /** + * @ticket TBD + */ + public function test_parse_invalid_complex_selector_nonfinal_subclass() { + $input = 'el.foo#bar[baz=quux] > final, rest'; + $offset = 0; + $result = $this->test_class::test_parse_complex_selector( $input, $offset ); + $this->assertNull( $result ); + } + /** * @ticket TBD */ From 663070b34b7b9b04413a6d8b7cf0f20645d7eadb Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 5 Dec 2024 12:38:54 +0100 Subject: [PATCH 086/129] Fix logic bug in child selector exploration --- src/wp-includes/html-api/class-wp-css-complex-selector.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-css-complex-selector.php b/src/wp-includes/html-api/class-wp-css-complex-selector.php index 520f3bf3d8fde..ed4d2e7a6e662 100644 --- a/src/wp-includes/html-api/class-wp-css-complex-selector.php +++ b/src/wp-includes/html-api/class-wp-css-complex-selector.php @@ -46,7 +46,7 @@ private function explore_matches( array $selectors, array $breadcrumbs ): bool { if ( '*' === $selector->type_selector->ident || strcasecmp( $breadcrumbs[0], $selector->type_selector->ident ) === 0 ) { return $this->explore_matches( array_slice( $selectors, 2 ), array_slice( $breadcrumbs, 1 ) ); } - return $this->explore_matches( $selectors, array_slice( $breadcrumbs, 1 ) ); + return false; case self::COMBINATOR_DESCENDANT: // Find _all_ the breadcrumbs that match and recurse from each of them. From 5478af99a8ecbbff54503f3230f247bc06f56fdf Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 5 Dec 2024 12:54:58 +0100 Subject: [PATCH 087/129] Improve selector integration tests --- .../tests/html-api/wpHtmlProcessor-select.php | 62 +++++++------- .../html-api/wpHtmlTagProcessor-select.php | 83 +++++++++---------- 2 files changed, 72 insertions(+), 73 deletions(-) diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor-select.php b/tests/phpunit/tests/html-api/wpHtmlProcessor-select.php index 733a7135f1b17..8515be63d83f8 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor-select.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor-select.php @@ -26,54 +26,60 @@ public function test_select_miss() { * * @dataProvider data_selectors */ - public function test_select( string $html, string $selector ) { + public function test_select_all( string $html, string $selector, int $match_count ) { $processor = WP_HTML_Processor::create_full_parser( $html ); - $this->assertTrue( $processor->select( $selector ) ); - $this->assertTrue( $processor->get_attribute( 'match' ) ); + $count = 0; + foreach ( $processor->select_all( $selector ) as $_ ) { + $breadcrumb_string = implode( ', ', $processor->get_breadcrumbs() ); + $this->assertTrue( + $processor->get_attribute( 'match' ), + "Matched unexpected tag {$processor->get_tag()} @ {$breadcrumb_string}" + ); + ++$count; + } + $this->assertSame( $match_count, $count, 'Did not match expected number of tags.' ); } /** * Data provider. * + * Most selectors are covered by the tag processor selector tests. + * This suite should focus on complex selectors. + * * @return array */ public static function data_selectors(): array { return array( - 'simple type' => array( '
', 'div' ), - 'any type' => array( '', '*' ), - 'simple class' => array( '
', '.x' ), - 'simple id' => array( '
', '#x' ), - 'simple attribute' => array( '
', '[att]' ), - 'attribute value' => array( '
', '[att=val]' ), - 'attribute quoted value' => array( '
', '[att="::"]' ), - 'complex any descendant' => array( '
', 'section *' ), - 'complex any child' => array( '
', 'section > *' ), - - 'list' => array( '

', 'a, p' ), - 'compound' => array( '

', 'section[att~="bar"]' ), + 'any descendant' => array( '

', 'section *', 4 ), + 'any child 1' => array( '

', 'section > *', 2 ), + 'any child 2' => array( '

', 'div > *', 1 ), ); } /** * @ticket TBD + * + * @expectedIncorrectUsage WP_HTML_Processor::select_all + * + * @dataProvider data_invalid_selectors */ - public function test_select_all() { - $processor = WP_HTML_Processor::create_full_parser( '

' ); - $count = 0; - foreach ( $processor->select_all( 'div, .x, svg>rect, #y' ) as $_ ) { - ++$count; - $this->assertTrue( $processor->get_attribute( 'match' ) ); - } - $this->assertSame( 4, $count ); + public function test_invalid_selector( string $selector ) { + $processor = WP_HTML_Processor::create_fragment( 'irrelevant' ); + $this->assertFalse( $processor->select( $selector ) ); } /** - * @ticket TBD + * Data provider. * - * @expectedIncorrectUsage WP_HTML_Processor::select_all + * @return array */ - public function test_invalid_selector() { - $processor = WP_HTML_Processor::create_fragment( 'irrelevant' ); - $this->assertFalse( $processor->select( '[invalid!selector]' ) ); + public static function data_invalid_selectors(): array { + return array( + 'invalid selector' => array( '[invalid!selector]' ), + + // The class selectors below are not allowed in non-final position. + 'unsupported child selector' => array( '.parent > .child' ), + 'unsupported descendant selector' => array( '.ancestor .descendant' ), + ); } } diff --git a/tests/phpunit/tests/html-api/wpHtmlTagProcessor-select.php b/tests/phpunit/tests/html-api/wpHtmlTagProcessor-select.php index 66f32f905c04f..6bc6ba1e6edbc 100644 --- a/tests/phpunit/tests/html-api/wpHtmlTagProcessor-select.php +++ b/tests/phpunit/tests/html-api/wpHtmlTagProcessor-select.php @@ -26,10 +26,17 @@ public function test_select_miss() { * * @dataProvider data_selectors */ - public function test_select( string $html, string $selector ) { + public function test_select( string $html, string $selector, int $match_count ) { $processor = new WP_HTML_Tag_Processor( $html ); - $this->assertTrue( $processor->select( $selector ) ); - $this->assertTrue( $processor->get_attribute( 'match' ) ); + $count = 0; + foreach ( $processor->select_all( $selector ) as $_ ) { + $this->assertTrue( + $processor->get_attribute( 'match' ), + "Matched unexpected tag {$processor->get_tag()}" + ); + ++$count; + } + $this->assertSame( $match_count, $count, 'Did not match expected number of tags.' ); } /** @@ -39,58 +46,44 @@ public function test_select( string $html, string $selector ) { */ public static function data_selectors(): array { return array( - 'simple type' => array( '

', 'div' ), - 'any type' => array( '
', '*' ), - 'simple class' => array( '
', '.x' ), - 'simple id' => array( '
', '#x' ), - 'boolean attribute' => array( '
', '[att]' ), - 'boolean attribute with string match' => array( '
', '[att=""]' ), + 'simple type' => array( '
', 'div', 2 ), + 'any type' => array( '
', '*', 2 ), + 'simple class' => array( '
', '.x', 2 ), + 'simple id' => array( '
', '#x', 2 ), - 'attribute value' => array( '
', '[att=val]' ), - 'attribute quoted value' => array( '
', '[att="::"]' ), - 'attribute case insensitive' => array( '
', '[att="VAL"i]' ), - 'attribute case sensitive mod' => array( '
', '[att="val"s]' ), + 'attribute presence' => array( '
', '[att]', 2 ), + 'attribute empty string match' => array( '
', '[att=""]', 2 ), + 'attribute value' => array( '

', '[att=val]', 2 ), + 'attribute quoted value' => array( '

', '[att="::"]', 2 ), + 'attribute case insensitive' => array( '

', '[att="VAL"i]', 2 ), + 'attribute case sensitive mod' => array( '

', '[att="val"s]', 2 ), - 'attribute one of' => array( '

', '[att~="b"]' ), - 'attribute one of insensitive' => array( '
', '[att~="b"i]' ), - 'attribute one of mod sensitive' => array( '
', '[att~="b"s]' ), - 'attribute one of whitespace cases' => array( "
", '[att~="b"]' ), + 'attribute one of' => array( '

', '[att~="b"]', 3 ), + 'attribute one of insensitive' => array( '

', '[att~="b"i]', 1 ), + 'attribute one of mod sensitive' => array( '
', '[att~="b"s]', 1 ), + 'attribute one of whitespace cases' => array( "
", '[att~="b"]', 1 ), - 'attribute with-hyphen (no hyphen)' => array( '

', '[att|="special"]' ), - 'attribute with-hyphen (hyphen prefix)' => array( '

', '[att|="special"]' ), - 'attribute with-hyphen insensitive' => array( '

', '[att|="special"i]' ), - 'attribute with-hyphen sensitive mod' => array( '

', '[att|="special"s]' ), + 'attribute with-hyphen' => array( '

', '[att|="special"]', 2 ), + 'attribute with-hyphen insensitive' => array( '

', '[att|="special" i]', 2 ), + 'attribute with-hyphen sensitive mod' => array( '

', '[att|="special"s]', 1 ), - 'attribute prefixed' => array( '

', '[att^="p"]' ), - 'attribute prefixed insensitive' => array( '

', '[att^="p"i]' ), - 'attribute prefixed sensitive mod' => array( '

', '[att^="p"s]' ), + 'attribute prefixed' => array( '

', '[att^="p"]', 2 ), + 'attribute prefixed insensitive' => array( '

', '[att^="p"i]', 1 ), + 'attribute prefixed sensitive mod' => array( '

', '[att^="p"s]', 1 ), - 'attribute suffixed' => array( '

', '[att$="x"]' ), - 'attribute suffixed insensitive' => array( '

', '[att$="x"i]' ), - 'attribute suffixed sensitive mod' => array( '

', '[att$="x"s]' ), + 'attribute suffixed' => array( '

', '[att$="x"]', 2 ), + 'attribute suffixed insensitive' => array( '

', '[att$="x"i]', 1 ), + 'attribute suffixed sensitive mod' => array( '

', '[att$="x"s]', 1 ), - 'attribute contains' => array( '

', '[att*="x"]' ), - 'attribute contains insensitive' => array( '

', '[att*="x"i]' ), - 'attribute contains sensitive mod' => array( '

', '[att*="x"s]' ), + 'attribute contains' => array( '

', '[att*="x"]', 2 ), + 'attribute contains insensitive' => array( '

', '[att*="x"i]', 1 ), + 'attribute contains sensitive mod' => array( '

', '[att*="x"s]', 1 ), - 'list' => array( '

', 'a, p' ), - 'compound' => array( '

', 'section[att="bar"]' ), + 'list' => array( '

', 'a, p, .class, #id, [att]', 2 ), + 'compound' => array( '

', 'custom-el[att="bar"][ fruit ~= "banana" i]', 1 ), ); } - /** - * @ticket TBD - */ - public function test_select_all() { - $processor = new WP_HTML_Tag_Processor( '

' ); - $count = 0; - foreach ( $processor->select_all( 'div, .x, rect, #y' ) as $_ ) { - ++$count; - $this->assertTrue( $processor->get_attribute( 'match' ) ); - } - $this->assertSame( 4, $count ); - } - /** * @ticket TBD * From 4f6bf948404cae07425b676048109be3a52d8853 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 5 Dec 2024 13:13:03 +0100 Subject: [PATCH 088/129] Try abstract class instead of interface --- src/wp-includes/html-api/class-wp-css-attribute-selector.php | 2 +- src/wp-includes/html-api/class-wp-css-class-selector.php | 2 +- .../html-api/class-wp-css-complex-selector-list.php | 2 +- src/wp-includes/html-api/class-wp-css-complex-selector.php | 2 +- .../html-api/class-wp-css-compound-selector-list.php | 2 +- src/wp-includes/html-api/class-wp-css-compound-selector.php | 2 +- src/wp-includes/html-api/class-wp-css-id-selector.php | 2 +- src/wp-includes/html-api/class-wp-css-type-selector.php | 2 +- .../html-api/interface-wp-css-html-processor-matcher.php | 4 ++-- .../html-api/interface-wp-css-html-tag-processor-matcher.php | 4 ++-- 10 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-css-attribute-selector.php b/src/wp-includes/html-api/class-wp-css-attribute-selector.php index 17787dd70815b..4cf554c10eca9 100644 --- a/src/wp-includes/html-api/class-wp-css-attribute-selector.php +++ b/src/wp-includes/html-api/class-wp-css-attribute-selector.php @@ -1,6 +1,6 @@ has_class( $this->ident ); } diff --git a/src/wp-includes/html-api/class-wp-css-complex-selector-list.php b/src/wp-includes/html-api/class-wp-css-complex-selector-list.php index 0413b8dea426a..669139097fa75 100644 --- a/src/wp-includes/html-api/class-wp-css-complex-selector-list.php +++ b/src/wp-includes/html-api/class-wp-css-complex-selector-list.php @@ -32,7 +32,7 @@ * * @access private */ -class WP_CSS_Complex_Selector_List extends WP_CSS_Compound_Selector_List implements WP_CSS_HTML_Processor_Matcher { +class WP_CSS_Complex_Selector_List extends WP_CSS_Compound_Selector_List { /** * Takes a CSS selector string and returns an instance of itself or `null` if the selector * string is invalid or unsupported. diff --git a/src/wp-includes/html-api/class-wp-css-complex-selector.php b/src/wp-includes/html-api/class-wp-css-complex-selector.php index ed4d2e7a6e662..4f83476898ec0 100644 --- a/src/wp-includes/html-api/class-wp-css-complex-selector.php +++ b/src/wp-includes/html-api/class-wp-css-complex-selector.php @@ -5,7 +5,7 @@ * * > = [ ? ] * */ -final class WP_CSS_Complex_Selector implements WP_CSS_HTML_Processor_Matcher { +final class WP_CSS_Complex_Selector extends WP_CSS_HTML_Processor_Matcher { public function matches( WP_HTML_Processor $processor ): bool { // First selector must match this location. if ( ! $this->selectors[0]->matches( $processor ) ) { diff --git a/src/wp-includes/html-api/class-wp-css-compound-selector-list.php b/src/wp-includes/html-api/class-wp-css-compound-selector-list.php index a41b0ac9cd530..0095b22977b0a 100644 --- a/src/wp-includes/html-api/class-wp-css-compound-selector-list.php +++ b/src/wp-includes/html-api/class-wp-css-compound-selector-list.php @@ -76,7 +76,7 @@ * @link https://www.w3.org/TR/selectors-api2/ * @link https://www.w3.org/TR/selectors-4/ */ -class WP_CSS_Compound_Selector_List implements WP_CSS_HTML_Tag_Processor_Matcher { +class WP_CSS_Compound_Selector_List extends WP_CSS_HTML_Tag_Processor_Matcher { /** * @param WP_HTML_Tag_Processor $processor * @return bool diff --git a/src/wp-includes/html-api/class-wp-css-compound-selector.php b/src/wp-includes/html-api/class-wp-css-compound-selector.php index e64695abe9ab3..3340515569bdd 100644 --- a/src/wp-includes/html-api/class-wp-css-compound-selector.php +++ b/src/wp-includes/html-api/class-wp-css-compound-selector.php @@ -5,7 +5,7 @@ * * > = [ ? * ]! */ -final class WP_CSS_Compound_Selector implements WP_CSS_HTML_Tag_Processor_Matcher { +final class WP_CSS_Compound_Selector extends WP_CSS_HTML_Tag_Processor_Matcher { public function matches( WP_HTML_Tag_Processor $processor ): bool { if ( $this->type_selector ) { if ( ! $this->type_selector->matches( $processor ) ) { diff --git a/src/wp-includes/html-api/class-wp-css-id-selector.php b/src/wp-includes/html-api/class-wp-css-id-selector.php index 83339ff839317..15cb2745ede9e 100644 --- a/src/wp-includes/html-api/class-wp-css-id-selector.php +++ b/src/wp-includes/html-api/class-wp-css-id-selector.php @@ -1,6 +1,6 @@ get_tag(); if ( null === $tag_name ) { diff --git a/src/wp-includes/html-api/interface-wp-css-html-processor-matcher.php b/src/wp-includes/html-api/interface-wp-css-html-processor-matcher.php index 2ae29413b35d2..aa280ddefa696 100644 --- a/src/wp-includes/html-api/interface-wp-css-html-processor-matcher.php +++ b/src/wp-includes/html-api/interface-wp-css-html-processor-matcher.php @@ -1,8 +1,8 @@ Date: Thu, 5 Dec 2024 13:13:06 +0100 Subject: [PATCH 089/129] Revert "Try abstract class instead of interface" This reverts commit 74881651faf991eabceb090707ce8b43c2a25316. --- src/wp-includes/html-api/class-wp-css-attribute-selector.php | 2 +- src/wp-includes/html-api/class-wp-css-class-selector.php | 2 +- .../html-api/class-wp-css-complex-selector-list.php | 2 +- src/wp-includes/html-api/class-wp-css-complex-selector.php | 2 +- .../html-api/class-wp-css-compound-selector-list.php | 2 +- src/wp-includes/html-api/class-wp-css-compound-selector.php | 2 +- src/wp-includes/html-api/class-wp-css-id-selector.php | 2 +- src/wp-includes/html-api/class-wp-css-type-selector.php | 2 +- .../html-api/interface-wp-css-html-processor-matcher.php | 4 ++-- .../html-api/interface-wp-css-html-tag-processor-matcher.php | 4 ++-- 10 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-css-attribute-selector.php b/src/wp-includes/html-api/class-wp-css-attribute-selector.php index 4cf554c10eca9..17787dd70815b 100644 --- a/src/wp-includes/html-api/class-wp-css-attribute-selector.php +++ b/src/wp-includes/html-api/class-wp-css-attribute-selector.php @@ -1,6 +1,6 @@ has_class( $this->ident ); } diff --git a/src/wp-includes/html-api/class-wp-css-complex-selector-list.php b/src/wp-includes/html-api/class-wp-css-complex-selector-list.php index 669139097fa75..0413b8dea426a 100644 --- a/src/wp-includes/html-api/class-wp-css-complex-selector-list.php +++ b/src/wp-includes/html-api/class-wp-css-complex-selector-list.php @@ -32,7 +32,7 @@ * * @access private */ -class WP_CSS_Complex_Selector_List extends WP_CSS_Compound_Selector_List { +class WP_CSS_Complex_Selector_List extends WP_CSS_Compound_Selector_List implements WP_CSS_HTML_Processor_Matcher { /** * Takes a CSS selector string and returns an instance of itself or `null` if the selector * string is invalid or unsupported. diff --git a/src/wp-includes/html-api/class-wp-css-complex-selector.php b/src/wp-includes/html-api/class-wp-css-complex-selector.php index 4f83476898ec0..ed4d2e7a6e662 100644 --- a/src/wp-includes/html-api/class-wp-css-complex-selector.php +++ b/src/wp-includes/html-api/class-wp-css-complex-selector.php @@ -5,7 +5,7 @@ * * > = [ ? ] * */ -final class WP_CSS_Complex_Selector extends WP_CSS_HTML_Processor_Matcher { +final class WP_CSS_Complex_Selector implements WP_CSS_HTML_Processor_Matcher { public function matches( WP_HTML_Processor $processor ): bool { // First selector must match this location. if ( ! $this->selectors[0]->matches( $processor ) ) { diff --git a/src/wp-includes/html-api/class-wp-css-compound-selector-list.php b/src/wp-includes/html-api/class-wp-css-compound-selector-list.php index 0095b22977b0a..a41b0ac9cd530 100644 --- a/src/wp-includes/html-api/class-wp-css-compound-selector-list.php +++ b/src/wp-includes/html-api/class-wp-css-compound-selector-list.php @@ -76,7 +76,7 @@ * @link https://www.w3.org/TR/selectors-api2/ * @link https://www.w3.org/TR/selectors-4/ */ -class WP_CSS_Compound_Selector_List extends WP_CSS_HTML_Tag_Processor_Matcher { +class WP_CSS_Compound_Selector_List implements WP_CSS_HTML_Tag_Processor_Matcher { /** * @param WP_HTML_Tag_Processor $processor * @return bool diff --git a/src/wp-includes/html-api/class-wp-css-compound-selector.php b/src/wp-includes/html-api/class-wp-css-compound-selector.php index 3340515569bdd..e64695abe9ab3 100644 --- a/src/wp-includes/html-api/class-wp-css-compound-selector.php +++ b/src/wp-includes/html-api/class-wp-css-compound-selector.php @@ -5,7 +5,7 @@ * * > = [ ? * ]! */ -final class WP_CSS_Compound_Selector extends WP_CSS_HTML_Tag_Processor_Matcher { +final class WP_CSS_Compound_Selector implements WP_CSS_HTML_Tag_Processor_Matcher { public function matches( WP_HTML_Tag_Processor $processor ): bool { if ( $this->type_selector ) { if ( ! $this->type_selector->matches( $processor ) ) { diff --git a/src/wp-includes/html-api/class-wp-css-id-selector.php b/src/wp-includes/html-api/class-wp-css-id-selector.php index 15cb2745ede9e..83339ff839317 100644 --- a/src/wp-includes/html-api/class-wp-css-id-selector.php +++ b/src/wp-includes/html-api/class-wp-css-id-selector.php @@ -1,6 +1,6 @@ get_tag(); if ( null === $tag_name ) { diff --git a/src/wp-includes/html-api/interface-wp-css-html-processor-matcher.php b/src/wp-includes/html-api/interface-wp-css-html-processor-matcher.php index aa280ddefa696..2ae29413b35d2 100644 --- a/src/wp-includes/html-api/interface-wp-css-html-processor-matcher.php +++ b/src/wp-includes/html-api/interface-wp-css-html-processor-matcher.php @@ -1,8 +1,8 @@ Date: Thu, 5 Dec 2024 14:51:39 +0100 Subject: [PATCH 090/129] Clean up and document attribute selector --- .../class-wp-css-attribute-selector.php | 214 ++++++++++-------- 1 file changed, 122 insertions(+), 92 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-css-attribute-selector.php b/src/wp-includes/html-api/class-wp-css-attribute-selector.php index 17787dd70815b..7036dd3775cc1 100644 --- a/src/wp-includes/html-api/class-wp-css-attribute-selector.php +++ b/src/wp-includes/html-api/class-wp-css-attribute-selector.php @@ -1,96 +1,23 @@ get_attribute( $this->name ); - if ( null === $att_value ) { - return false; - } - - if ( null === $this->value ) { - return true; - } - - if ( true === $att_value ) { - $att_value = ''; - } - - $case_insensitive = self::MODIFIER_CASE_INSENSITIVE === $this->modifier; - - switch ( $this->matcher ) { - case self::MATCH_EXACT: - return $case_insensitive - ? 0 === strcasecmp( $att_value, $this->value ) - : $att_value === $this->value; - - case self::MATCH_ONE_OF_EXACT: - foreach ( $this->whitespace_delimited_list( $att_value ) as $val ) { - if ( - $case_insensitive - ? 0 === strcasecmp( $val, $this->value ) - : $val === $this->value - ) { - return true; - } - } - return false; - - case self::MATCH_EXACT_OR_EXACT_WITH_HYPHEN: - // Attempt the full match first - if ( - $case_insensitive - ? 0 === strcasecmp( $att_value, $this->value ) - : $att_value === $this->value - ) { - return true; - } - - // Partial match - if ( strlen( $att_value ) < strlen( $this->value ) + 1 ) { - return false; - } - - $starts_with = "{$this->value}-"; - return 0 === substr_compare( $att_value, $starts_with, 0, strlen( $starts_with ), $case_insensitive ); - - case self::MATCH_PREFIXED_BY: - return 0 === substr_compare( $att_value, $this->value, 0, strlen( $this->value ), $case_insensitive ); - - case self::MATCH_SUFFIXED_BY: - return 0 === substr_compare( $att_value, $this->value, -strlen( $this->value ), null, $case_insensitive ); - - case self::MATCH_CONTAINS: - return false !== ( - $case_insensitive - ? stripos( $att_value, $this->value ) - : strpos( $att_value, $this->value ) - ); - } - } - - /** - * @param string $input - * - * @return Generator - */ - private function whitespace_delimited_list( string $input ): Generator { - // Start by skipping whitespace. - $offset = strspn( $input, self::WHITESPACE_CHARACTERS ); - - while ( $offset < strlen( $input ) ) { - // Find the byte length until the next boundary. - $length = strcspn( $input, self::WHITESPACE_CHARACTERS, $offset ); - $value = substr( $input, $offset, $length ); - - // Move past trailing whitespace. - $offset += $length + strspn( $input, self::WHITESPACE_CHARACTERS, $offset + $length ); - - yield $value; - } - } - /** * [att=val] * Represents an element with the att attribute whose value is exactly "val". @@ -145,11 +72,11 @@ private function whitespace_delimited_list( string $input ): Generator { */ const MODIFIER_CASE_INSENSITIVE = 'case-insensitive'; - /** * The attribute name. * * @var string + * @readonly */ public $name; @@ -157,6 +84,7 @@ private function whitespace_delimited_list( string $input ): Generator { * The attribute matcher. * * @var null|self::MATCH_* + * @readonly */ public $matcher; @@ -164,6 +92,7 @@ private function whitespace_delimited_list( string $input ): Generator { * The attribute value. * * @var string|null + * @readonly */ public $value; @@ -171,10 +100,13 @@ private function whitespace_delimited_list( string $input ): Generator { * The attribute modifier. * * @var null|self::MODIFIER_* + * @readonly */ public $modifier; /** + * Constructor. + * * @param string $name * @param null|self::MATCH_* $matcher * @param null|string $value @@ -186,4 +118,102 @@ public function __construct( string $name, ?string $matcher = null, ?string $val $this->value = $value; $this->modifier = $modifier; } + + /** + * Determines if the processor's current position matches the selector. + * + * @param WP_HTML_Tag_Processor $processor + * @return bool True if the processor's current position matches the selector. + */ + public function matches( WP_HTML_Tag_Processor $processor ): bool { + $att_value = $processor->get_attribute( $this->name ); + if ( null === $att_value ) { + return false; + } + + if ( null === $this->value ) { + return true; + } + + if ( true === $att_value ) { + $att_value = ''; + } + + $case_insensitive = self::MODIFIER_CASE_INSENSITIVE === $this->modifier; + + switch ( $this->matcher ) { + case self::MATCH_EXACT: + return $case_insensitive + ? 0 === strcasecmp( $att_value, $this->value ) + : $att_value === $this->value; + + case self::MATCH_ONE_OF_EXACT: + foreach ( $this->whitespace_delimited_list( $att_value ) as $val ) { + if ( + $case_insensitive + ? 0 === strcasecmp( $val, $this->value ) + : $val === $this->value + ) { + return true; + } + } + return false; + + case self::MATCH_EXACT_OR_EXACT_WITH_HYPHEN: + // Attempt the full match first + if ( + $case_insensitive + ? 0 === strcasecmp( $att_value, $this->value ) + : $att_value === $this->value + ) { + return true; + } + + // Partial match + if ( strlen( $att_value ) < strlen( $this->value ) + 1 ) { + return false; + } + + $starts_with = "{$this->value}-"; + return 0 === substr_compare( $att_value, $starts_with, 0, strlen( $starts_with ), $case_insensitive ); + + case self::MATCH_PREFIXED_BY: + return 0 === substr_compare( $att_value, $this->value, 0, strlen( $this->value ), $case_insensitive ); + + case self::MATCH_SUFFIXED_BY: + return 0 === substr_compare( $att_value, $this->value, -strlen( $this->value ), null, $case_insensitive ); + + case self::MATCH_CONTAINS: + return false !== ( + $case_insensitive + ? stripos( $att_value, $this->value ) + : strpos( $att_value, $this->value ) + ); + } + } + + /** + * Splits a string into a list of whitespace delimited values. + * + * This is useful for the {@see WP_CSS_Attribute_Selector::MATCH_ONE_OF_EXACT} matcher. + * + * @param string $input + * + * @return Generator + */ + private function whitespace_delimited_list( string $input ): Generator { + // Start by skipping whitespace. + $offset = strspn( $input, " \t\r\n\f" ); + + while ( $offset < strlen( $input ) ) { + // Find the byte length until the next boundary. + $length = strcspn( $input, " \t\r\n\f", $offset ); + $value = substr( $input, $offset, $length ); + + // Move past trailing whitespace. + $offset += $length + strspn( $input, " \t\r\n\f", $offset + $length ); + + yield $value; + } + } } From 32ee2a71197572ea713b6a9a3ee1a9e6b53c0d09 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 5 Dec 2024 19:43:08 +0100 Subject: [PATCH 091/129] Update ticket number in tests --- .../html-api/wpCssComplexSelectorList.php | 16 ++++++------ .../html-api/wpCssCompoundSelectorList.php | 26 +++++++++---------- .../tests/html-api/wpHtmlProcessor-select.php | 6 ++--- .../html-api/wpHtmlTagProcessor-select.php | 6 ++--- 4 files changed, 27 insertions(+), 27 deletions(-) diff --git a/tests/phpunit/tests/html-api/wpCssComplexSelectorList.php b/tests/phpunit/tests/html-api/wpCssComplexSelectorList.php index 5cceddbdddd30..0b17e57847662 100644 --- a/tests/phpunit/tests/html-api/wpCssComplexSelectorList.php +++ b/tests/phpunit/tests/html-api/wpCssComplexSelectorList.php @@ -27,7 +27,7 @@ public static function test_parse_complex_selector( string $input, int &$offset } /** - * @ticket TBD + * @ticket 62653 */ public function test_parse_complex_selector() { $input = 'el1 > .child#bar[baz=quux] , rest'; @@ -50,7 +50,7 @@ public function test_parse_complex_selector() { } /** - * @ticket TBD + * @ticket 62653 */ public function test_parse_invalid_complex_selector() { $input = 'el.foo#bar[baz=quux] > , rest'; @@ -60,7 +60,7 @@ public function test_parse_invalid_complex_selector() { } /** - * @ticket TBD + * @ticket 62653 */ public function test_parse_invalid_complex_selector_nonfinal_subclass() { $input = 'el.foo#bar[baz=quux] > final, rest'; @@ -70,7 +70,7 @@ public function test_parse_invalid_complex_selector_nonfinal_subclass() { } /** - * @ticket TBD + * @ticket 62653 */ public function test_parse_empty_complex_selector() { $input = ''; @@ -80,7 +80,7 @@ public function test_parse_empty_complex_selector() { } /** - * @ticket TBD + * @ticket 62653 */ public function test_parse_complex_selector_list() { $input = 'el1 el2 el.foo#bar[baz=quux], second > selector'; @@ -89,7 +89,7 @@ public function test_parse_complex_selector_list() { } /** - * @ticket TBD + * @ticket 62653 */ public function test_parse_invalid_selector_list() { $input = 'el,,'; @@ -98,7 +98,7 @@ public function test_parse_invalid_selector_list() { } /** - * @ticket TBD + * @ticket 62653 */ public function test_parse_invalid_selector_list2() { $input = 'el!'; @@ -107,7 +107,7 @@ public function test_parse_invalid_selector_list2() { } /** - * @ticket TBD + * @ticket 62653 */ public function test_parse_empty_selector_list() { $input = " \t \t\n\r\f"; diff --git a/tests/phpunit/tests/html-api/wpCssCompoundSelectorList.php b/tests/phpunit/tests/html-api/wpCssCompoundSelectorList.php index 2a20e317338bd..b5a2d9956679d 100644 --- a/tests/phpunit/tests/html-api/wpCssCompoundSelectorList.php +++ b/tests/phpunit/tests/html-api/wpCssCompoundSelectorList.php @@ -114,7 +114,7 @@ public static function data_idents(): array { } /** - * @ticket TBD + * @ticket 62653 */ public function test_is_ident_and_is_ident_start() { $this->assertFalse( $this->test_class::test_is_ident_codepoint( '[', 0 ) ); @@ -124,7 +124,7 @@ public function test_is_ident_and_is_ident_start() { } /** - * @ticket TBD + * @ticket 62653 * * @dataProvider data_idents */ @@ -141,7 +141,7 @@ public function test_parse_ident( string $input, ?string $expected = null, ?stri } /** - * @ticket TBD + * @ticket 62653 * * @dataProvider data_strings */ @@ -192,7 +192,7 @@ public static function data_strings(): array { } /** - * @ticket TBD + * @ticket 62653 * * @dataProvider data_id_selectors */ @@ -226,7 +226,7 @@ public static function data_id_selectors(): array { } /** - * @ticket TBD + * @ticket 62653 * * @dataProvider data_class_selectors */ @@ -260,7 +260,7 @@ public static function data_class_selectors(): array { } /** - * @ticket TBD + * @ticket 62653 * * @dataProvider data_type_selectors */ @@ -296,7 +296,7 @@ public static function data_type_selectors(): array { } /** - * @ticket TBD + * @ticket 62653 * * @dataProvider data_attribute_selectors */ @@ -371,7 +371,7 @@ public static function data_attribute_selectors(): array { } /** - * @ticket TBD + * @ticket 62653 */ public function test_parse_selector() { $input = 'el.foo#bar[baz=quux] > .child'; @@ -389,7 +389,7 @@ public function test_parse_selector() { } /** - * @ticket TBD + * @ticket 62653 */ public function test_parse_empty_selector() { $input = ''; @@ -400,7 +400,7 @@ public function test_parse_empty_selector() { } /** - * @ticket TBD + * @ticket 62653 */ public function test_parse_selector_list() { $input = 'el1, el2, el.foo#bar[baz=quux]'; @@ -409,7 +409,7 @@ public function test_parse_selector_list() { } /** - * @ticket TBD + * @ticket 62653 */ public function test_parse_invalid_selector_list() { $input = 'el,,'; @@ -418,7 +418,7 @@ public function test_parse_invalid_selector_list() { } /** - * @ticket TBD + * @ticket 62653 */ public function test_parse_invalid_selector_list2() { $input = 'el!'; @@ -427,7 +427,7 @@ public function test_parse_invalid_selector_list2() { } /** - * @ticket TBD + * @ticket 62653 */ public function test_parse_empty_selector_list() { $input = " \t \t\n\r\f"; diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor-select.php b/tests/phpunit/tests/html-api/wpHtmlProcessor-select.php index 8515be63d83f8..40e1d96978afe 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor-select.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor-select.php @@ -14,7 +14,7 @@ */ class Tests_HtmlApi_WpHtmlProcessor_Select extends WP_UnitTestCase { /** - * @ticket TBD + * @ticket 62653 */ public function test_select_miss() { $processor = WP_HTML_Processor::create_full_parser( '' ); @@ -22,7 +22,7 @@ public function test_select_miss() { } /** - * @ticket TBD + * @ticket 62653 * * @dataProvider data_selectors */ @@ -57,7 +57,7 @@ public static function data_selectors(): array { } /** - * @ticket TBD + * @ticket 62653 * * @expectedIncorrectUsage WP_HTML_Processor::select_all * diff --git a/tests/phpunit/tests/html-api/wpHtmlTagProcessor-select.php b/tests/phpunit/tests/html-api/wpHtmlTagProcessor-select.php index 6bc6ba1e6edbc..586e38b4bafb2 100644 --- a/tests/phpunit/tests/html-api/wpHtmlTagProcessor-select.php +++ b/tests/phpunit/tests/html-api/wpHtmlTagProcessor-select.php @@ -14,7 +14,7 @@ */ class Tests_HtmlApi_WpHtmlTagProcessor_Select extends WP_UnitTestCase { /** - * @ticket TBD + * @ticket 62653 */ public function test_select_miss() { $processor = new WP_HTML_Tag_Processor( '' ); @@ -22,7 +22,7 @@ public function test_select_miss() { } /** - * @ticket TBD + * @ticket 62653 * * @dataProvider data_selectors */ @@ -85,7 +85,7 @@ public static function data_selectors(): array { } /** - * @ticket TBD + * @ticket 62653 * * @expectedIncorrectUsage WP_HTML_Tag_Processor::select_all * From 5922494030b000bf4d229975a5fd1968c14b20fc Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 5 Dec 2024 21:28:24 +0100 Subject: [PATCH 092/129] Improve some types --- .../html-api/class-wp-css-complex-selector.php | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-css-complex-selector.php b/src/wp-includes/html-api/class-wp-css-complex-selector.php index ed4d2e7a6e662..a4cfd46622560 100644 --- a/src/wp-includes/html-api/class-wp-css-complex-selector.php +++ b/src/wp-includes/html-api/class-wp-css-complex-selector.php @@ -16,7 +16,7 @@ public function matches( WP_HTML_Processor $processor ): bool { return true; } - /** @var array $breadcrumbs */ + /** @var string[] */ $breadcrumbs = array_slice( array_reverse( $processor->get_breadcrumbs() ), 1 ); $selectors = array_slice( $this->selectors, 1 ); return $this->explore_matches( $selectors, $breadcrumbs ); @@ -26,7 +26,7 @@ public function matches( WP_HTML_Processor $processor ): bool { * This only looks at breadcrumbs and can therefore only support type selectors. * * @param array $selectors - * @param array $breadcrumbs + * @param string[] $breadcrumbs */ private function explore_matches( array $selectors, array $breadcrumbs ): bool { if ( array() === $selectors ) { @@ -36,9 +36,9 @@ private function explore_matches( array $selectors, array $breadcrumbs ): bool { return false; } - /** @var self::COMBINATOR_* $combinator */ + /** @var self::COMBINATOR_* */ $combinator = $selectors[0]; - /** @var WP_CSS_Compound_Selector $selector */ + /** @var WP_CSS_Compound_Selector */ $selector = $selectors[1]; switch ( $combinator ) { From e492aa60e2db167ec87a048f64fab13378ec4694 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 5 Dec 2024 22:16:57 +0100 Subject: [PATCH 093/129] Fix and improve string token parsing --- .../class-wp-css-compound-selector-list.php | 19 +++++++++++++------ .../html-api/wpCssCompoundSelectorList.php | 9 ++++++--- 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-css-compound-selector-list.php b/src/wp-includes/html-api/class-wp-css-compound-selector-list.php index a41b0ac9cd530..8cca2e27c9ec3 100644 --- a/src/wp-includes/html-api/class-wp-css-compound-selector-list.php +++ b/src/wp-includes/html-api/class-wp-css-compound-selector-list.php @@ -548,7 +548,7 @@ final protected static function parse_ident( string $input, int &$offset ): ?str * @return string|null */ final protected static function parse_string( string $input, int &$offset ): ?string { - if ( $offset + 1 >= strlen( $input ) ) { + if ( $offset >= strlen( $input ) ) { return null; } @@ -559,8 +559,19 @@ final protected static function parse_string( string $input, int &$offset ): ?st $string_token = ''; - $updated_offset = $offset + 1; + $updated_offset = $offset + 1; + $anything_else_mask = "\\\n{$ending_code_point}"; while ( $updated_offset < strlen( $input ) ) { + $anything_else_length = strcspn( $input, $anything_else_mask, $updated_offset ); + if ( $anything_else_length > 0 ) { + $string_token .= substr( $input, $updated_offset, $anything_else_length ); + $updated_offset += $anything_else_length; + + if ( $updated_offset >= strlen( $input ) ) { + break; + } + } + switch ( $input[ $updated_offset ] ) { case '\\': ++$updated_offset; @@ -587,10 +598,6 @@ final protected static function parse_string( string $input, int &$offset ): ?st case $ending_code_point: ++$updated_offset; break 2; - - default: - $string_token .= $input[ $updated_offset ]; - ++$updated_offset; } } diff --git a/tests/phpunit/tests/html-api/wpCssCompoundSelectorList.php b/tests/phpunit/tests/html-api/wpCssCompoundSelectorList.php index b5a2d9956679d..715e0e26bc9cd 100644 --- a/tests/phpunit/tests/html-api/wpCssCompoundSelectorList.php +++ b/tests/phpunit/tests/html-api/wpCssCompoundSelectorList.php @@ -181,13 +181,16 @@ public static function data_strings(): array { "'foo\\" => array( "'foo\\", 'foo', '' ), + '"' => array( '"', '', '' ), + '"\\"' => array( '"\\"', '"', '' ), + '"missing close' => array( '"missing close', 'missing close', '' ), + // Invalid 'Invalid: (empty string)' => array( '' ), - "Invalid: 'newline\\n'" => array( "'newline\n'" ), - 'Invalid: foo' => array( 'foo' ), - 'Invalid: \\"' => array( '\\"' ), 'Invalid: .foo' => array( '.foo' ), 'Invalid: #foo' => array( '#foo' ), + "Invalid: 'newline\\n'" => array( "'newline\n'" ), + 'Invalid: foo' => array( 'foo' ), ); } From 81c67582deef44766e188482586538cdbe84272d Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 5 Dec 2024 22:17:11 +0100 Subject: [PATCH 094/129] Update attribute selector tests --- .../html-api/wpCssCompoundSelectorList.php | 82 ++++++++++--------- 1 file changed, 45 insertions(+), 37 deletions(-) diff --git a/tests/phpunit/tests/html-api/wpCssCompoundSelectorList.php b/tests/phpunit/tests/html-api/wpCssCompoundSelectorList.php index 715e0e26bc9cd..6d1b142c17ea9 100644 --- a/tests/phpunit/tests/html-api/wpCssCompoundSelectorList.php +++ b/tests/phpunit/tests/html-api/wpCssCompoundSelectorList.php @@ -331,45 +331,53 @@ public function test_parse_attribute( */ public static function data_attribute_selectors(): array { return array( - '[href]' => array( '[href]', 'href', null, null, null, '' ), - '[href] type' => array( '[href] type', 'href', null, null, null, ' type' ), - '[href]#id' => array( '[href]#id', 'href', null, null, null, '#id' ), - '[href].class' => array( '[href].class', 'href', null, null, null, '.class' ), - '[href][href2]' => array( '[href][href2]', 'href', null, null, null, '[href2]' ), - '[\n href\t\r]' => array( "[\n href\t\r]", 'href', null, null, null, '' ), - '[href=foo]' => array( '[href=foo]', 'href', WP_CSS_Attribute_Selector::MATCH_EXACT, 'foo', null, '' ), - '[href \n = bar ]' => array( "[href \n = bar ]", 'href', WP_CSS_Attribute_Selector::MATCH_EXACT, 'bar', null, '' ), - '[href \n ^= baz ]' => array( "[href \n ^= baz ]", 'href', WP_CSS_Attribute_Selector::MATCH_PREFIXED_BY, 'baz', null, '' ), - - '[match $= insensitive i]' => array( '[match $= insensitive i]', 'match', WP_CSS_Attribute_Selector::MATCH_SUFFIXED_BY, 'insensitive', WP_CSS_Attribute_Selector::MODIFIER_CASE_INSENSITIVE, '' ), - '[match|=sensitive s]' => array( '[match|=sensitive s]', 'match', WP_CSS_Attribute_Selector::MATCH_EXACT_OR_EXACT_WITH_HYPHEN, 'sensitive', WP_CSS_Attribute_Selector::MODIFIER_CASE_SENSITIVE, '' ), - '[att=val I]' => array( '[att=val I]', 'att', WP_CSS_Attribute_Selector::MATCH_EXACT, 'val', WP_CSS_Attribute_Selector::MODIFIER_CASE_INSENSITIVE, '' ), - '[att=val S]' => array( '[att=val S]', 'att', WP_CSS_Attribute_Selector::MATCH_EXACT, 'val', WP_CSS_Attribute_Selector::MODIFIER_CASE_SENSITIVE, '' ), - - '[match~="quoted[][]"]' => array( '[match~="quoted[][]"]', 'match', WP_CSS_Attribute_Selector::MATCH_ONE_OF_EXACT, 'quoted[][]', null, '' ), - "[match$='quoted!{}']" => array( "[match$='quoted!{}']", 'match', WP_CSS_Attribute_Selector::MATCH_SUFFIXED_BY, 'quoted!{}', null, '' ), - "[match*='quoted's]" => array( "[match*='quoted's]", 'match', WP_CSS_Attribute_Selector::MATCH_CONTAINS, 'quoted', WP_CSS_Attribute_Selector::MODIFIER_CASE_SENSITIVE, '' ), - - '[escape-nl="foo\\nbar"]' => array( "[escape-nl='foo\\\nbar']", 'escape-nl', WP_CSS_Attribute_Selector::MATCH_EXACT, 'foobar', null, '' ), - '[escape-seq="\\31 23"]' => array( "[escape-seq='\\31 23']", 'escape-seq', WP_CSS_Attribute_Selector::MATCH_EXACT, '123', null, '' ), + '[href]' => array( '[href]', 'href', null, null, null, '' ), + '[href] type' => array( '[href] type', 'href', null, null, null, ' type' ), + '[href]#id' => array( '[href]#id', 'href', null, null, null, '#id' ), + '[href].class' => array( '[href].class', 'href', null, null, null, '.class' ), + '[href][href2]' => array( '[href][href2]', 'href', null, null, null, '[href2]' ), + '[\n href\t\r]' => array( "[\n href\t\r]", 'href', null, null, null, '' ), + '[href=foo]' => array( '[href=foo]', 'href', WP_CSS_Attribute_Selector::MATCH_EXACT, 'foo', null, '' ), + '[href \n = bar ]' => array( "[href \n = bar ]", 'href', WP_CSS_Attribute_Selector::MATCH_EXACT, 'bar', null, '' ), + '[href \n ^= baz ]' => array( "[href \n ^= baz ]", 'href', WP_CSS_Attribute_Selector::MATCH_PREFIXED_BY, 'baz', null, '' ), + + '[match $= insensitive i]' => array( '[match $= insensitive i]', 'match', WP_CSS_Attribute_Selector::MATCH_SUFFIXED_BY, 'insensitive', WP_CSS_Attribute_Selector::MODIFIER_CASE_INSENSITIVE, '' ), + '[match|=sensitive s]' => array( '[match|=sensitive s]', 'match', WP_CSS_Attribute_Selector::MATCH_EXACT_OR_EXACT_WITH_HYPHEN, 'sensitive', WP_CSS_Attribute_Selector::MODIFIER_CASE_SENSITIVE, '' ), + '[att=val I]' => array( '[att=val I]', 'att', WP_CSS_Attribute_Selector::MATCH_EXACT, 'val', WP_CSS_Attribute_Selector::MODIFIER_CASE_INSENSITIVE, '' ), + '[att=val S]' => array( '[att=val S]', 'att', WP_CSS_Attribute_Selector::MATCH_EXACT, 'val', WP_CSS_Attribute_Selector::MODIFIER_CASE_SENSITIVE, '' ), + + '[match~="quoted[][]"]' => array( '[match~="quoted[][]"]', 'match', WP_CSS_Attribute_Selector::MATCH_ONE_OF_EXACT, 'quoted[][]', null, '' ), + "[match$='quoted!{}']" => array( "[match$='quoted!{}']", 'match', WP_CSS_Attribute_Selector::MATCH_SUFFIXED_BY, 'quoted!{}', null, '' ), + "[match*='quoted's]" => array( "[match*='quoted's]", 'match', WP_CSS_Attribute_Selector::MATCH_CONTAINS, 'quoted', WP_CSS_Attribute_Selector::MODIFIER_CASE_SENSITIVE, '' ), + + '[escape-nl="foo\\nbar"]' => array( "[escape-nl='foo\\\nbar']", 'escape-nl', WP_CSS_Attribute_Selector::MATCH_EXACT, 'foobar', null, '' ), + '[escape-seq="\\31 23"]' => array( "[escape-seq='\\31 23']", 'escape-seq', WP_CSS_Attribute_Selector::MATCH_EXACT, '123', null, '' ), + + 'Unterminated: [att' => array( '[att', 'att', null, null, null, '' ), + 'Unterminated: [att="' => array( '[att="', 'att', WP_CSS_Attribute_Selector::MATCH_EXACT, '', null, '' ), + 'Unterminated: [att="\\"' => array( '[att="\\"', 'att', WP_CSS_Attribute_Selector::MATCH_EXACT, '"', null, '' ), + 'Unterminated: [att="x"' => array( '[att="x"', 'att', WP_CSS_Attribute_Selector::MATCH_EXACT, 'x', null, '' ), + 'Unterminated: [att="x\\"i]' => array( '[att="x\\"i]', 'att', WP_CSS_Attribute_Selector::MATCH_EXACT, 'x"i]', null, '' ), + 'Unterminated: [att="x" i' => array( '[att="x" i', 'att', WP_CSS_Attribute_Selector::MATCH_EXACT, 'x', WP_CSS_Attribute_Selector::MODIFIER_CASE_INSENSITIVE, '' ), + 'Unterminated: [att = x i' => array( '[att = x i', 'att', WP_CSS_Attribute_Selector::MATCH_EXACT, 'x', WP_CSS_Attribute_Selector::MODIFIER_CASE_INSENSITIVE, '' ), // Invalid - 'Invalid: (empty string)' => array( '' ), - 'Invalid: foo' => array( 'foo' ), - 'Invalid: [foo' => array( '[foo' ), - 'Invalid: [#foo]' => array( '[#foo]' ), - 'Invalid: [*|*]' => array( '[*|*]' ), - 'Invalid: [ns|*]' => array( '[ns|*]' ), - 'Invalid: [* |att]' => array( '[* |att]' ), - 'Invalid: [*| att]' => array( '[*| att]' ), - 'Invalid: [att * =]' => array( '[att * =]' ), - 'Invalid: [att+=val]' => array( '[att+=val]' ), - 'Invalid: [att=val ' => array( '[att=val ' ), - 'Invalid: [att i]' => array( '[att i]' ), - 'Invalid: [att s]' => array( '[att s]' ), - "Invalid: [att='val\\n']" => array( "[att='val\n']" ), - 'Invalid: [att=val i ' => array( '[att=val i ' ), - 'Invalid: [att="val"ix' => array( '[att="val"ix' ), + 'Invalid: (empty string)' => array( '' ), + 'Invalid: foo' => array( 'foo' ), + 'Invalid: [foo' => array( '[foo' ), + 'Invalid: [#foo]' => array( '[#foo]' ), + 'Invalid: [*|*]' => array( '[*|*]' ), + 'Invalid: [ns|*]' => array( '[ns|*]' ), + 'Invalid: [* |att]' => array( '[* |att]' ), + 'Invalid: [*| att]' => array( '[*| att]' ), + 'Invalid: [att * =]' => array( '[att * =]' ), + 'Invalid: [att+=val]' => array( '[att+=val]' ), + 'Invalid: [att=val ' => array( '[att=val ' ), + 'Invalid: [att i]' => array( '[att i]' ), + 'Invalid: [att s]' => array( '[att s]' ), + "Invalid: [att='val\\n']" => array( "[att='val\n']" ), + 'Invalid: [att=val i ' => array( '[att=val i ' ), + 'Invalid: [att="val"ix' => array( '[att="val"ix' ), ); } From 7bccf3eada582c8b66ec24781dc151a1afbfe9b6 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 5 Dec 2024 22:36:26 +0100 Subject: [PATCH 095/129] Revert "Update attribute selector tests" This reverts commit 7df9ed91a1360d80c1dcb87980af941010b926ba. --- .../html-api/wpCssCompoundSelectorList.php | 82 +++++++++---------- 1 file changed, 37 insertions(+), 45 deletions(-) diff --git a/tests/phpunit/tests/html-api/wpCssCompoundSelectorList.php b/tests/phpunit/tests/html-api/wpCssCompoundSelectorList.php index 6d1b142c17ea9..715e0e26bc9cd 100644 --- a/tests/phpunit/tests/html-api/wpCssCompoundSelectorList.php +++ b/tests/phpunit/tests/html-api/wpCssCompoundSelectorList.php @@ -331,53 +331,45 @@ public function test_parse_attribute( */ public static function data_attribute_selectors(): array { return array( - '[href]' => array( '[href]', 'href', null, null, null, '' ), - '[href] type' => array( '[href] type', 'href', null, null, null, ' type' ), - '[href]#id' => array( '[href]#id', 'href', null, null, null, '#id' ), - '[href].class' => array( '[href].class', 'href', null, null, null, '.class' ), - '[href][href2]' => array( '[href][href2]', 'href', null, null, null, '[href2]' ), - '[\n href\t\r]' => array( "[\n href\t\r]", 'href', null, null, null, '' ), - '[href=foo]' => array( '[href=foo]', 'href', WP_CSS_Attribute_Selector::MATCH_EXACT, 'foo', null, '' ), - '[href \n = bar ]' => array( "[href \n = bar ]", 'href', WP_CSS_Attribute_Selector::MATCH_EXACT, 'bar', null, '' ), - '[href \n ^= baz ]' => array( "[href \n ^= baz ]", 'href', WP_CSS_Attribute_Selector::MATCH_PREFIXED_BY, 'baz', null, '' ), - - '[match $= insensitive i]' => array( '[match $= insensitive i]', 'match', WP_CSS_Attribute_Selector::MATCH_SUFFIXED_BY, 'insensitive', WP_CSS_Attribute_Selector::MODIFIER_CASE_INSENSITIVE, '' ), - '[match|=sensitive s]' => array( '[match|=sensitive s]', 'match', WP_CSS_Attribute_Selector::MATCH_EXACT_OR_EXACT_WITH_HYPHEN, 'sensitive', WP_CSS_Attribute_Selector::MODIFIER_CASE_SENSITIVE, '' ), - '[att=val I]' => array( '[att=val I]', 'att', WP_CSS_Attribute_Selector::MATCH_EXACT, 'val', WP_CSS_Attribute_Selector::MODIFIER_CASE_INSENSITIVE, '' ), - '[att=val S]' => array( '[att=val S]', 'att', WP_CSS_Attribute_Selector::MATCH_EXACT, 'val', WP_CSS_Attribute_Selector::MODIFIER_CASE_SENSITIVE, '' ), - - '[match~="quoted[][]"]' => array( '[match~="quoted[][]"]', 'match', WP_CSS_Attribute_Selector::MATCH_ONE_OF_EXACT, 'quoted[][]', null, '' ), - "[match$='quoted!{}']" => array( "[match$='quoted!{}']", 'match', WP_CSS_Attribute_Selector::MATCH_SUFFIXED_BY, 'quoted!{}', null, '' ), - "[match*='quoted's]" => array( "[match*='quoted's]", 'match', WP_CSS_Attribute_Selector::MATCH_CONTAINS, 'quoted', WP_CSS_Attribute_Selector::MODIFIER_CASE_SENSITIVE, '' ), - - '[escape-nl="foo\\nbar"]' => array( "[escape-nl='foo\\\nbar']", 'escape-nl', WP_CSS_Attribute_Selector::MATCH_EXACT, 'foobar', null, '' ), - '[escape-seq="\\31 23"]' => array( "[escape-seq='\\31 23']", 'escape-seq', WP_CSS_Attribute_Selector::MATCH_EXACT, '123', null, '' ), - - 'Unterminated: [att' => array( '[att', 'att', null, null, null, '' ), - 'Unterminated: [att="' => array( '[att="', 'att', WP_CSS_Attribute_Selector::MATCH_EXACT, '', null, '' ), - 'Unterminated: [att="\\"' => array( '[att="\\"', 'att', WP_CSS_Attribute_Selector::MATCH_EXACT, '"', null, '' ), - 'Unterminated: [att="x"' => array( '[att="x"', 'att', WP_CSS_Attribute_Selector::MATCH_EXACT, 'x', null, '' ), - 'Unterminated: [att="x\\"i]' => array( '[att="x\\"i]', 'att', WP_CSS_Attribute_Selector::MATCH_EXACT, 'x"i]', null, '' ), - 'Unterminated: [att="x" i' => array( '[att="x" i', 'att', WP_CSS_Attribute_Selector::MATCH_EXACT, 'x', WP_CSS_Attribute_Selector::MODIFIER_CASE_INSENSITIVE, '' ), - 'Unterminated: [att = x i' => array( '[att = x i', 'att', WP_CSS_Attribute_Selector::MATCH_EXACT, 'x', WP_CSS_Attribute_Selector::MODIFIER_CASE_INSENSITIVE, '' ), + '[href]' => array( '[href]', 'href', null, null, null, '' ), + '[href] type' => array( '[href] type', 'href', null, null, null, ' type' ), + '[href]#id' => array( '[href]#id', 'href', null, null, null, '#id' ), + '[href].class' => array( '[href].class', 'href', null, null, null, '.class' ), + '[href][href2]' => array( '[href][href2]', 'href', null, null, null, '[href2]' ), + '[\n href\t\r]' => array( "[\n href\t\r]", 'href', null, null, null, '' ), + '[href=foo]' => array( '[href=foo]', 'href', WP_CSS_Attribute_Selector::MATCH_EXACT, 'foo', null, '' ), + '[href \n = bar ]' => array( "[href \n = bar ]", 'href', WP_CSS_Attribute_Selector::MATCH_EXACT, 'bar', null, '' ), + '[href \n ^= baz ]' => array( "[href \n ^= baz ]", 'href', WP_CSS_Attribute_Selector::MATCH_PREFIXED_BY, 'baz', null, '' ), + + '[match $= insensitive i]' => array( '[match $= insensitive i]', 'match', WP_CSS_Attribute_Selector::MATCH_SUFFIXED_BY, 'insensitive', WP_CSS_Attribute_Selector::MODIFIER_CASE_INSENSITIVE, '' ), + '[match|=sensitive s]' => array( '[match|=sensitive s]', 'match', WP_CSS_Attribute_Selector::MATCH_EXACT_OR_EXACT_WITH_HYPHEN, 'sensitive', WP_CSS_Attribute_Selector::MODIFIER_CASE_SENSITIVE, '' ), + '[att=val I]' => array( '[att=val I]', 'att', WP_CSS_Attribute_Selector::MATCH_EXACT, 'val', WP_CSS_Attribute_Selector::MODIFIER_CASE_INSENSITIVE, '' ), + '[att=val S]' => array( '[att=val S]', 'att', WP_CSS_Attribute_Selector::MATCH_EXACT, 'val', WP_CSS_Attribute_Selector::MODIFIER_CASE_SENSITIVE, '' ), + + '[match~="quoted[][]"]' => array( '[match~="quoted[][]"]', 'match', WP_CSS_Attribute_Selector::MATCH_ONE_OF_EXACT, 'quoted[][]', null, '' ), + "[match$='quoted!{}']" => array( "[match$='quoted!{}']", 'match', WP_CSS_Attribute_Selector::MATCH_SUFFIXED_BY, 'quoted!{}', null, '' ), + "[match*='quoted's]" => array( "[match*='quoted's]", 'match', WP_CSS_Attribute_Selector::MATCH_CONTAINS, 'quoted', WP_CSS_Attribute_Selector::MODIFIER_CASE_SENSITIVE, '' ), + + '[escape-nl="foo\\nbar"]' => array( "[escape-nl='foo\\\nbar']", 'escape-nl', WP_CSS_Attribute_Selector::MATCH_EXACT, 'foobar', null, '' ), + '[escape-seq="\\31 23"]' => array( "[escape-seq='\\31 23']", 'escape-seq', WP_CSS_Attribute_Selector::MATCH_EXACT, '123', null, '' ), // Invalid - 'Invalid: (empty string)' => array( '' ), - 'Invalid: foo' => array( 'foo' ), - 'Invalid: [foo' => array( '[foo' ), - 'Invalid: [#foo]' => array( '[#foo]' ), - 'Invalid: [*|*]' => array( '[*|*]' ), - 'Invalid: [ns|*]' => array( '[ns|*]' ), - 'Invalid: [* |att]' => array( '[* |att]' ), - 'Invalid: [*| att]' => array( '[*| att]' ), - 'Invalid: [att * =]' => array( '[att * =]' ), - 'Invalid: [att+=val]' => array( '[att+=val]' ), - 'Invalid: [att=val ' => array( '[att=val ' ), - 'Invalid: [att i]' => array( '[att i]' ), - 'Invalid: [att s]' => array( '[att s]' ), - "Invalid: [att='val\\n']" => array( "[att='val\n']" ), - 'Invalid: [att=val i ' => array( '[att=val i ' ), - 'Invalid: [att="val"ix' => array( '[att="val"ix' ), + 'Invalid: (empty string)' => array( '' ), + 'Invalid: foo' => array( 'foo' ), + 'Invalid: [foo' => array( '[foo' ), + 'Invalid: [#foo]' => array( '[#foo]' ), + 'Invalid: [*|*]' => array( '[*|*]' ), + 'Invalid: [ns|*]' => array( '[ns|*]' ), + 'Invalid: [* |att]' => array( '[* |att]' ), + 'Invalid: [*| att]' => array( '[*| att]' ), + 'Invalid: [att * =]' => array( '[att * =]' ), + 'Invalid: [att+=val]' => array( '[att+=val]' ), + 'Invalid: [att=val ' => array( '[att=val ' ), + 'Invalid: [att i]' => array( '[att i]' ), + 'Invalid: [att s]' => array( '[att s]' ), + "Invalid: [att='val\\n']" => array( "[att='val\n']" ), + 'Invalid: [att=val i ' => array( '[att=val i ' ), + 'Invalid: [att="val"ix' => array( '[att="val"ix' ), ); } From 3949cc53b4bebdc8324a07a8ce49bd6ede291e53 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 5 Dec 2024 22:51:04 +0100 Subject: [PATCH 096/129] Improve some complex selector match tests --- .../tests/html-api/wpHtmlProcessor-select.php | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor-select.php b/tests/phpunit/tests/html-api/wpHtmlProcessor-select.php index 40e1d96978afe..d94190ff91077 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor-select.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor-select.php @@ -43,16 +43,18 @@ public function test_select_all( string $html, string $selector, int $match_coun /** * Data provider. * - * Most selectors are covered by the tag processor selector tests. - * This suite should focus on complex selectors. - * * @return array */ public static function data_selectors(): array { return array( - 'any descendant' => array( '

', 'section *', 4 ), - 'any child 1' => array( '

', 'section > *', 2 ), - 'any child 2' => array( '

', 'div > *', 1 ), + 'any' => array( '

', '*', 5 ), + 'quirks mode ID' => array( '

In quirks mode, ID matching is case-insensitive.', '#id', 2 ), + 'quirks mode class' => array( '

In quirks mode, class matching is case-insensitive.', '.c', 2 ), + 'no-quirks mode ID' => array( '

In no-quirks mode, ID matching is case-sensitive.', '#id', 1 ), + 'no-quirks mode class' => array( '

In no-quirks mode, class matching is case-sensitive.', '.c', 1 ), + 'any descendant' => array( '

', 'section *', 4 ), + 'any child 1' => array( '

', 'section > *', 2 ), + 'any child 2' => array( '

', 'div > *', 1 ), ); } From c696889197fab2308490cab7b47bf654eed63a61 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Mon, 9 Dec 2024 15:45:55 +0100 Subject: [PATCH 097/129] Add and use matches_tag type selector method --- .../html-api/class-wp-css-complex-selector.php | 4 ++-- src/wp-includes/html-api/class-wp-css-type-selector.php | 8 ++++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-css-complex-selector.php b/src/wp-includes/html-api/class-wp-css-complex-selector.php index a4cfd46622560..a532e87ecc15d 100644 --- a/src/wp-includes/html-api/class-wp-css-complex-selector.php +++ b/src/wp-includes/html-api/class-wp-css-complex-selector.php @@ -43,7 +43,7 @@ private function explore_matches( array $selectors, array $breadcrumbs ): bool { switch ( $combinator ) { case self::COMBINATOR_CHILD: - if ( '*' === $selector->type_selector->ident || strcasecmp( $breadcrumbs[0], $selector->type_selector->ident ) === 0 ) { + if ( $selector->type_selector->matches_tag( $breadcrumbs[0] ) ) { return $this->explore_matches( array_slice( $selectors, 2 ), array_slice( $breadcrumbs, 1 ) ); } return false; @@ -51,7 +51,7 @@ private function explore_matches( array $selectors, array $breadcrumbs ): bool { case self::COMBINATOR_DESCENDANT: // Find _all_ the breadcrumbs that match and recurse from each of them. for ( $i = 0; $i < count( $breadcrumbs ); $i++ ) { - if ( '*' === $selector->type_selector->ident || strcasecmp( $breadcrumbs[ $i ], $selector->type_selector->ident ) === 0 ) { + if ( $selector->type_selector->matches_tag( $breadcrumbs[ $i ] ) ) { $next_crumbs = array_slice( $breadcrumbs, $i + 1 ); if ( $this->explore_matches( array_slice( $selectors, 2 ), $next_crumbs ) ) { return true; diff --git a/src/wp-includes/html-api/class-wp-css-type-selector.php b/src/wp-includes/html-api/class-wp-css-type-selector.php index c65adce14047d..2a6bb952f5448 100644 --- a/src/wp-includes/html-api/class-wp-css-type-selector.php +++ b/src/wp-includes/html-api/class-wp-css-type-selector.php @@ -6,6 +6,14 @@ public function matches( WP_HTML_Tag_Processor $processor ): bool { if ( null === $tag_name ) { return false; } + return $this->matches_tag( $tag_name ); + } + + /** + * @param string $tag_name + * @return bool + */ + public function matches_tag( string $tag_name ): bool { if ( '*' === $this->ident ) { return true; } From c19355151ee667b055d3414c9272907e37069b82 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Mon, 9 Dec 2024 16:00:36 +0100 Subject: [PATCH 098/129] Improve complex selector structure Separate the self selector from relative selectors --- .../class-wp-css-complex-selector-list.php | 50 ++++---- .../class-wp-css-complex-selector.php | 110 ++++++++++++------ .../class-wp-css-compound-selector.php | 2 +- .../html-api/wpCssComplexSelectorList.php | 25 ++-- .../tests/html-api/wpHtmlProcessor-select.php | 17 +-- 5 files changed, 123 insertions(+), 81 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-css-complex-selector-list.php b/src/wp-includes/html-api/class-wp-css-complex-selector-list.php index 0413b8dea426a..4a9fc03f582f8 100644 --- a/src/wp-includes/html-api/class-wp-css-complex-selector-list.php +++ b/src/wp-includes/html-api/class-wp-css-complex-selector-list.php @@ -95,16 +95,18 @@ final protected static function parse_complex_selector( string $input, int &$off } $updated_offset = $offset; - $selector = self::parse_compound_selector( $input, $updated_offset ); - if ( null === $selector ) { + $self_selector = self::parse_compound_selector( $input, $updated_offset ); + if ( null === $self_selector ) { return null; } - - $selectors = array( $selector ); - $has_preceding_subclass_selector = null !== $selector->subclass_selectors; + /** @var array{WP_CSS_Compound_Selector, string}[] */ + $selectors = array(); $found_whitespace = self::parse_whitespace( $input, $updated_offset ); while ( $updated_offset < strlen( $input ) ) { + $combinator = null; + $next_selector = null; + if ( WP_CSS_Complex_Selector::COMBINATOR_CHILD === $input[ $updated_offset ] || WP_CSS_Complex_Selector::COMBINATOR_NEXT_SIBLING === $input[ $updated_offset ] || @@ -114,42 +116,40 @@ final protected static function parse_complex_selector( string $input, int &$off ++$updated_offset; self::parse_whitespace( $input, $updated_offset ); - // Failure to find a selector here is a parse error - $selector = self::parse_compound_selector( $input, $updated_offset ); + // A combinator has been found, failure to find a selector here is a parse error. + $next_selector = self::parse_compound_selector( $input, $updated_offset ); + if ( null === $next_selector ) { + return null; + } } elseif ( $found_whitespace ) { /* * Whitespace is ambiguous, it could be a descendant combinator or * insignificant whitespace. */ - $selector = self::parse_compound_selector( $input, $updated_offset ); - if ( null === $selector ) { - break; + $next_selector = self::parse_compound_selector( $input, $updated_offset ); + if ( null !== $next_selector ) { + $combinator = WP_CSS_Complex_Selector::COMBINATOR_DESCENDANT; } - $combinator = WP_CSS_Complex_Selector::COMBINATOR_DESCENDANT; - } else { - break; } - if ( null === $selector ) { - return null; + if ( null === $next_selector ) { + break; } - /* - * Subclass selectors in non-final position is not supported: - * - `div > .className` is valid - * - `.className > div` is not - */ - if ( $has_preceding_subclass_selector ) { + // $self_selector will pass to a relative selector where only the type selector is allowed. + if ( null !== $self_selector->subclass_selectors || null === $self_selector->type_selector ) { return null; } - $has_preceding_subclass_selector = null !== $selector->subclass_selectors; - $selectors[] = $combinator; - $selectors[] = $selector; + /** @var array{WP_CSS_Compound_Selector, string} */ + $selector_pair = array( $self_selector->type_selector, $combinator ); + $selectors[] = $selector_pair; + $self_selector = $next_selector; $found_whitespace = self::parse_whitespace( $input, $updated_offset ); } $offset = $updated_offset; - return new WP_CSS_Complex_Selector( $selectors ); + + return new WP_CSS_Complex_Selector( $self_selector, array_reverse( $selectors ) ); } } diff --git a/src/wp-includes/html-api/class-wp-css-complex-selector.php b/src/wp-includes/html-api/class-wp-css-complex-selector.php index a532e87ecc15d..9db2912d3ac16 100644 --- a/src/wp-includes/html-api/class-wp-css-complex-selector.php +++ b/src/wp-includes/html-api/class-wp-css-complex-selector.php @@ -6,26 +6,87 @@ * > = [ ? ] * */ final class WP_CSS_Complex_Selector implements WP_CSS_HTML_Processor_Matcher { + const COMBINATOR_CHILD = '>'; + const COMBINATOR_DESCENDANT = ' '; + const COMBINATOR_NEXT_SIBLING = '+'; + const COMBINATOR_SUBSEQUENT_SIBLING = '~'; + + /** + * This is the selector in the final position of the complex selector. This corresponds to the + * selected element. + * + * @example + * + * $self_selector + * ┏━━━━┻━━━━┓ + * .heading h1 > el.selected + * + * @readonly + * @var WP_CSS_Compound_Selector + */ + public $self_selector; + + /** + * This is the selector in the final position of the complex selector. This corresponds to the + * selected element. + * + * @example + * + * $relative_selectors + * ┏━━━━━━┻━━━━┓ + * .heading h1 > el.selected + * + * The example would have the following relative selectors (note that the order is reversed): + * + * @example + * + * array ( + * array( + * WP_CSS_Type_Selector( 'ident' => 'h1' ), + * '>', // WP_CSS_Complex_Selector::COMBINATOR_CHILD + * ), + * array( + * new WP_CSS_Type_Selector( 'header' ), + * ' ', // WP_CSS_Complex_Selector::COMBINATOR_DESCENDANT + * ), + * ) + * + * @readonly + * @var array{WP_CSS_Type_Selector, string}[] + */ + public $relative_selectors; + + /** + * @param WP_CSS_Compound_Selector $self_selector + * @param array{WP_CSS_Type_Selector, string}[] $selectors + */ + public function __construct( + WP_CSS_Compound_Selector $self_selector, + ?array $relative_selectors + ) { + $this->self_selector = $self_selector; + $this->relative_selectors = $relative_selectors; + } + public function matches( WP_HTML_Processor $processor ): bool { // First selector must match this location. - if ( ! $this->selectors[0]->matches( $processor ) ) { + if ( ! $this->self_selector->matches( $processor ) ) { return false; } - if ( count( $this->selectors ) === 1 ) { + if ( null === $this->relative_selectors || array() === $this->relative_selectors ) { return true; } /** @var string[] */ $breadcrumbs = array_slice( array_reverse( $processor->get_breadcrumbs() ), 1 ); - $selectors = array_slice( $this->selectors, 1 ); - return $this->explore_matches( $selectors, $breadcrumbs ); + return $this->explore_matches( $this->relative_selectors, $breadcrumbs ); } /** * This only looks at breadcrumbs and can therefore only support type selectors. * - * @param array $selectors + * @param array{WP_CSS_Type_Selector, string}[] $selectors * @param string[] $breadcrumbs */ private function explore_matches( array $selectors, array $breadcrumbs ): bool { @@ -36,24 +97,22 @@ private function explore_matches( array $selectors, array $breadcrumbs ): bool { return false; } - /** @var self::COMBINATOR_* */ - $combinator = $selectors[0]; - /** @var WP_CSS_Compound_Selector */ - $selector = $selectors[1]; + $selector = $selectors[0][0]; + $combinator = $selectors[0][1]; switch ( $combinator ) { case self::COMBINATOR_CHILD: - if ( $selector->type_selector->matches_tag( $breadcrumbs[0] ) ) { - return $this->explore_matches( array_slice( $selectors, 2 ), array_slice( $breadcrumbs, 1 ) ); + if ( $selector->matches_tag( $breadcrumbs[0] ) ) { + return $this->explore_matches( array_slice( $selectors, 1 ), array_slice( $breadcrumbs, 1 ) ); } return false; case self::COMBINATOR_DESCENDANT: // Find _all_ the breadcrumbs that match and recurse from each of them. for ( $i = 0; $i < count( $breadcrumbs ); $i++ ) { - if ( $selector->type_selector->matches_tag( $breadcrumbs[ $i ] ) ) { - $next_crumbs = array_slice( $breadcrumbs, $i + 1 ); - if ( $this->explore_matches( array_slice( $selectors, 2 ), $next_crumbs ) ) { + if ( $selector->matches_tag( $breadcrumbs[ $i ] ) ) { + $next_breadcrumbs = array_slice( $breadcrumbs, $i + 1 ); + if ( $this->explore_matches( array_slice( $selectors, 1 ), $next_breadcrumbs ) ) { return true; } } @@ -61,28 +120,7 @@ private function explore_matches( array $selectors, array $breadcrumbs ): bool { return false; default: - throw new Exception( "Combinator '{$combinator}' is not supported yet." ); + throw new Exception( "Unsupported combinator '{$combinator}' found." ); } } - - const COMBINATOR_CHILD = '>'; - const COMBINATOR_DESCENDANT = ' '; - const COMBINATOR_NEXT_SIBLING = '+'; - const COMBINATOR_SUBSEQUENT_SIBLING = '~'; - - /** - * even indexes are WP_CSS_Compound_Selector, odd indexes are string combinators. - * In reverse order to match the current element and then work up the tree. - * Any non-final selector is a type selector. - * - * @var array - */ - public $selectors = array(); - - /** - * @param array $selectors - */ - public function __construct( array $selectors ) { - $this->selectors = array_reverse( $selectors ); - } } diff --git a/src/wp-includes/html-api/class-wp-css-compound-selector.php b/src/wp-includes/html-api/class-wp-css-compound-selector.php index e64695abe9ab3..2ef2051880936 100644 --- a/src/wp-includes/html-api/class-wp-css-compound-selector.php +++ b/src/wp-includes/html-api/class-wp-css-compound-selector.php @@ -25,7 +25,7 @@ public function matches( WP_HTML_Tag_Processor $processor ): bool { /** @var WP_CSS_Type_Selector|null */ public $type_selector; - /** @var array|null */ + /** @var (WP_CSS_ID_Selector|WP_CSS_Class_Selector|WP_CSS_Attribute_Selector)[]|null */ public $subclass_selectors; /** diff --git a/tests/phpunit/tests/html-api/wpCssComplexSelectorList.php b/tests/phpunit/tests/html-api/wpCssComplexSelectorList.php index 0b17e57847662..795e230033cdb 100644 --- a/tests/phpunit/tests/html-api/wpCssComplexSelectorList.php +++ b/tests/phpunit/tests/html-api/wpCssComplexSelectorList.php @@ -20,7 +20,7 @@ public function __construct() { parent::__construct( array() ); } - public static function test_parse_complex_selector( string $input, int &$offset ) { + public static function test_parse_complex_selector( string $input, int &$offset ): ?WP_CSS_Complex_Selector { return self::parse_complex_selector( $input, $offset ); } }; @@ -30,21 +30,24 @@ public static function test_parse_complex_selector( string $input, int &$offset * @ticket 62653 */ public function test_parse_complex_selector() { - $input = 'el1 > .child#bar[baz=quux] , rest'; + $input = 'el1 el2 > .child#bar[baz=quux] , rest'; $offset = 0; - $sel = $this->test_class::test_parse_complex_selector( $input, $offset ); - $this->assertSame( 3, count( $sel->selectors ) ); + /** @var WP_CSS_Complex_Selector|null */ + $sel = $this->test_class::test_parse_complex_selector( $input, $offset ); - $this->assertSame( 'el1', $sel->selectors[2]->type_selector->ident ); - $this->assertNull( $sel->selectors[2]->subclass_selectors ); + $this->assertSame( 2, count( $sel->relative_selectors ) ); - $this->assertSame( WP_CSS_Complex_Selector::COMBINATOR_CHILD, $sel->selectors[1] ); + // Relative selectors should be reverse ordered. + $this->assertSame( 'el2', $sel->relative_selectors[0][0]->ident ); + $this->assertSame( WP_CSS_Complex_Selector::COMBINATOR_CHILD, $sel->relative_selectors[0][1] ); - $this->assertSame( 3, count( $sel->selectors[0]->subclass_selectors ) ); - $this->assertNull( $sel->selectors[0]->type_selector ); - $this->assertSame( 3, count( $sel->selectors[0]->subclass_selectors ) ); - $this->assertSame( 'child', $sel->selectors[0]->subclass_selectors[0]->ident ); + $this->assertSame( 'el1', $sel->relative_selectors[1][0]->ident ); + $this->assertSame( WP_CSS_Complex_Selector::COMBINATOR_DESCENDANT, $sel->relative_selectors[1][1] ); + + $this->assertSame( 3, count( $sel->self_selector->subclass_selectors ) ); + $this->assertNull( $sel->self_selector->type_selector ); + $this->assertSame( 'child', $sel->self_selector->subclass_selectors[0]->ident ); $this->assertSame( ', rest', substr( $input, $offset ) ); } diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor-select.php b/tests/phpunit/tests/html-api/wpHtmlProcessor-select.php index d94190ff91077..21828faf42e80 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor-select.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor-select.php @@ -47,14 +47,15 @@ public function test_select_all( string $html, string $selector, int $match_coun */ public static function data_selectors(): array { return array( - 'any' => array( '

', '*', 5 ), - 'quirks mode ID' => array( '

In quirks mode, ID matching is case-insensitive.', '#id', 2 ), - 'quirks mode class' => array( '

In quirks mode, class matching is case-insensitive.', '.c', 2 ), - 'no-quirks mode ID' => array( '

In no-quirks mode, ID matching is case-sensitive.', '#id', 1 ), - 'no-quirks mode class' => array( '

In no-quirks mode, class matching is case-sensitive.', '.c', 1 ), - 'any descendant' => array( '

', 'section *', 4 ), - 'any child 1' => array( '

', 'section > *', 2 ), - 'any child 2' => array( '

', 'div > *', 1 ), + 'any' => array( '

', '*', 5 ), + 'quirks mode ID' => array( '

In quirks mode, ID matching is case-insensitive.', '#id', 2 ), + 'quirks mode class' => array( '

In quirks mode, class matching is case-insensitive.', '.c', 2 ), + 'no-quirks mode ID' => array( '

In no-quirks mode, ID matching is case-sensitive.', '#id', 1 ), + 'no-quirks mode class' => array( '

In no-quirks mode, class matching is case-sensitive.', '.c', 1 ), + 'any descendant' => array( '

', 'section *', 4 ), + 'any child matches all children' => array( '

', 'section > *', 2 ), + + 'multiple complex selectors' => array( '

', 'section > div p > i', 1 ), ); } From 9dd811432a685b4efa15692a9d5d5dae43b475c0 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Mon, 9 Dec 2024 16:10:05 +0100 Subject: [PATCH 099/129] Rework structure of complex_selector class --- .../class-wp-css-complex-selector.php | 32 ++++++++++++------- .../html-api/wpCssComplexSelectorList.php | 10 +++--- 2 files changed, 25 insertions(+), 17 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-css-complex-selector.php b/src/wp-includes/html-api/class-wp-css-complex-selector.php index 9db2912d3ac16..1f03f133c8806 100644 --- a/src/wp-includes/html-api/class-wp-css-complex-selector.php +++ b/src/wp-includes/html-api/class-wp-css-complex-selector.php @@ -12,7 +12,7 @@ final class WP_CSS_Complex_Selector implements WP_CSS_HTML_Processor_Matcher { const COMBINATOR_SUBSEQUENT_SIBLING = '~'; /** - * This is the selector in the final position of the complex selector. This corresponds to the + * The "self selector" is the last element in a complex selector, it corresponds to the * selected element. * * @example @@ -27,12 +27,20 @@ final class WP_CSS_Complex_Selector implements WP_CSS_HTML_Processor_Matcher { public $self_selector; /** - * This is the selector in the final position of the complex selector. This corresponds to the - * selected element. + * The "context selectors" are zero or more elements that provide additional constraints for + * the "self selector." + * + * In this example selector, and element like `` is selected iff: + * - it is a child of an `H1` element + * - *and* that `H1` element is a descendant of a `HEADING` element. + * + * The `H1` and `HEADING` parts of this selector are the "context selectors." Note that this + * terminology is used for purposes of this class but does not correspond to language in the + * CSS or selector specifications. * * @example * - * $relative_selectors + * $context_selectors * ┏━━━━━━┻━━━━┓ * .heading h1 > el.selected * @@ -52,20 +60,20 @@ final class WP_CSS_Complex_Selector implements WP_CSS_HTML_Processor_Matcher { * ) * * @readonly - * @var array{WP_CSS_Type_Selector, string}[] + * @var array{WP_CSS_Type_Selector, string}[]|null */ - public $relative_selectors; + public $context_selectors; /** * @param WP_CSS_Compound_Selector $self_selector - * @param array{WP_CSS_Type_Selector, string}[] $selectors + * @param array{WP_CSS_Type_Selector, string}[]|null $selectors */ public function __construct( WP_CSS_Compound_Selector $self_selector, - ?array $relative_selectors + ?array $context_selectors ) { - $this->self_selector = $self_selector; - $this->relative_selectors = $relative_selectors; + $this->self_selector = $self_selector; + $this->context_selectors = $context_selectors; } public function matches( WP_HTML_Processor $processor ): bool { @@ -74,13 +82,13 @@ public function matches( WP_HTML_Processor $processor ): bool { return false; } - if ( null === $this->relative_selectors || array() === $this->relative_selectors ) { + if ( null === $this->context_selectors || array() === $this->context_selectors ) { return true; } /** @var string[] */ $breadcrumbs = array_slice( array_reverse( $processor->get_breadcrumbs() ), 1 ); - return $this->explore_matches( $this->relative_selectors, $breadcrumbs ); + return $this->explore_matches( $this->context_selectors, $breadcrumbs ); } /** diff --git a/tests/phpunit/tests/html-api/wpCssComplexSelectorList.php b/tests/phpunit/tests/html-api/wpCssComplexSelectorList.php index 795e230033cdb..dc89869ea2e66 100644 --- a/tests/phpunit/tests/html-api/wpCssComplexSelectorList.php +++ b/tests/phpunit/tests/html-api/wpCssComplexSelectorList.php @@ -36,14 +36,14 @@ public function test_parse_complex_selector() { /** @var WP_CSS_Complex_Selector|null */ $sel = $this->test_class::test_parse_complex_selector( $input, $offset ); - $this->assertSame( 2, count( $sel->relative_selectors ) ); + $this->assertSame( 2, count( $sel->context_selectors ) ); // Relative selectors should be reverse ordered. - $this->assertSame( 'el2', $sel->relative_selectors[0][0]->ident ); - $this->assertSame( WP_CSS_Complex_Selector::COMBINATOR_CHILD, $sel->relative_selectors[0][1] ); + $this->assertSame( 'el2', $sel->context_selectors[0][0]->ident ); + $this->assertSame( WP_CSS_Complex_Selector::COMBINATOR_CHILD, $sel->context_selectors[0][1] ); - $this->assertSame( 'el1', $sel->relative_selectors[1][0]->ident ); - $this->assertSame( WP_CSS_Complex_Selector::COMBINATOR_DESCENDANT, $sel->relative_selectors[1][1] ); + $this->assertSame( 'el1', $sel->context_selectors[1][0]->ident ); + $this->assertSame( WP_CSS_Complex_Selector::COMBINATOR_DESCENDANT, $sel->context_selectors[1][1] ); $this->assertSame( 3, count( $sel->self_selector->subclass_selectors ) ); $this->assertNull( $sel->self_selector->type_selector ); From b134308e4017f53d40d55df4aa0b03842d51974f Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Mon, 9 Dec 2024 16:54:08 +0100 Subject: [PATCH 100/129] Improve documentation --- .../html-api/class-wp-css-complex-selector.php | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-css-complex-selector.php b/src/wp-includes/html-api/class-wp-css-complex-selector.php index 1f03f133c8806..2d4d0212b24f2 100644 --- a/src/wp-includes/html-api/class-wp-css-complex-selector.php +++ b/src/wp-includes/html-api/class-wp-css-complex-selector.php @@ -30,7 +30,11 @@ final class WP_CSS_Complex_Selector implements WP_CSS_HTML_Processor_Matcher { * The "context selectors" are zero or more elements that provide additional constraints for * the "self selector." * - * In this example selector, and element like `` is selected iff: + * These selectors are represented as 2-tuples where the element at index 0 is the selector and + * the element at index 1 is the combinator string constant from this class, + * e.g. `WP_CSS_Complex_Selector::COMBINATOR_CHILD`. + * + * In the example selector below, an element like `` is selected iff: * - it is a child of an `H1` element * - *and* that `H1` element is a descendant of a `HEADING` element. * @@ -44,7 +48,7 @@ final class WP_CSS_Complex_Selector implements WP_CSS_HTML_Processor_Matcher { * ┏━━━━━━┻━━━━┓ * .heading h1 > el.selected * - * The example would have the following relative selectors (note that the order is reversed): + * The example would have the following relative selectors: * * @example * @@ -59,6 +63,10 @@ final class WP_CSS_Complex_Selector implements WP_CSS_HTML_Processor_Matcher { * ), * ) * + * Note that the order of context selectors is reversed. This is to match the self selector + * first and then match the context selectors beginning with the selector closest to the self + * selector. + * * @readonly * @var array{WP_CSS_Type_Selector, string}[]|null */ From 94c06ef32fd69eecd7ccddd40714edef1a79f493 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Mon, 9 Dec 2024 18:11:55 +0100 Subject: [PATCH 101/129] Document complex selector class --- .../class-wp-css-complex-selector.php | 52 ++++++++++++++++--- 1 file changed, 46 insertions(+), 6 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-css-complex-selector.php b/src/wp-includes/html-api/class-wp-css-complex-selector.php index 2d4d0212b24f2..bd51884901d93 100644 --- a/src/wp-includes/html-api/class-wp-css-complex-selector.php +++ b/src/wp-includes/html-api/class-wp-css-complex-selector.php @@ -1,14 +1,47 @@ in the grammar. + * CSS complex selector. + * + * This class implements a CSS complex selector and is used to test for matching HTML tags + * in a {@see WP_HTML_Tag_Processor}. + * + * A complex selector is a selector with zero or more combinator-selector pairs. + * + * @since TBD * - * > = [ ? ] * + * @access private */ final class WP_CSS_Complex_Selector implements WP_CSS_HTML_Processor_Matcher { - const COMBINATOR_CHILD = '>'; - const COMBINATOR_DESCENDANT = ' '; - const COMBINATOR_NEXT_SIBLING = '+'; + /** + * Child combinator. + */ + const COMBINATOR_CHILD = '>'; + + /** + * Descendant combinator. + */ + const COMBINATOR_DESCENDANT = ' '; + + /** + * Next sibling combinator. + * + * This combinator is not currently supported. + */ + const COMBINATOR_NEXT_SIBLING = '+'; + + /** + * Subsequent sibling combinator. + * + * This combinator is not currently supported. + */ const COMBINATOR_SUBSEQUENT_SIBLING = '~'; /** @@ -84,6 +117,12 @@ public function __construct( $this->context_selectors = $context_selectors; } + /** + * Determines if the processor's current position matches the selector. + * + * @param WP_HTML_Processor $processor The processor. + * @return bool True if the processor's current position matches the selector. + */ public function matches( WP_HTML_Processor $processor ): bool { // First selector must match this location. if ( ! $this->self_selector->matches( $processor ) ) { @@ -100,10 +139,11 @@ public function matches( WP_HTML_Processor $processor ): bool { } /** - * This only looks at breadcrumbs and can therefore only support type selectors. + * Checks for matches recursively comparing context selectors with breadcrumbs. * * @param array{WP_CSS_Type_Selector, string}[] $selectors * @param string[] $breadcrumbs + * @return bool True if a match is found, otherwise false. */ private function explore_matches( array $selectors, array $breadcrumbs ): bool { if ( array() === $selectors ) { From f46fceda45dd38676191126761fb3c4c4439d0be Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Mon, 9 Dec 2024 18:17:39 +0100 Subject: [PATCH 102/129] Document matches functions --- .../html-api/class-wp-css-attribute-selector.php | 2 +- src/wp-includes/html-api/class-wp-css-class-selector.php | 6 ++++++ .../html-api/class-wp-css-compound-selector-list.php | 6 ++++-- src/wp-includes/html-api/class-wp-css-compound-selector.php | 6 ++++++ src/wp-includes/html-api/class-wp-css-id-selector.php | 6 ++++++ src/wp-includes/html-api/class-wp-css-type-selector.php | 6 ++++++ .../html-api/interface-wp-css-html-processor-matcher.php | 5 ++++- .../interface-wp-css-html-tag-processor-matcher.php | 5 ++++- 8 files changed, 37 insertions(+), 5 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-css-attribute-selector.php b/src/wp-includes/html-api/class-wp-css-attribute-selector.php index 7036dd3775cc1..dae71c4295348 100644 --- a/src/wp-includes/html-api/class-wp-css-attribute-selector.php +++ b/src/wp-includes/html-api/class-wp-css-attribute-selector.php @@ -122,7 +122,7 @@ public function __construct( string $name, ?string $matcher = null, ?string $val /** * Determines if the processor's current position matches the selector. * - * @param WP_HTML_Tag_Processor $processor + * @param WP_HTML_Tag_Processor $processor The processor. * @return bool True if the processor's current position matches the selector. */ public function matches( WP_HTML_Tag_Processor $processor ): bool { diff --git a/src/wp-includes/html-api/class-wp-css-class-selector.php b/src/wp-includes/html-api/class-wp-css-class-selector.php index c3e7ced008a6e..c9ab061578025 100644 --- a/src/wp-includes/html-api/class-wp-css-class-selector.php +++ b/src/wp-includes/html-api/class-wp-css-class-selector.php @@ -1,6 +1,12 @@ has_class( $this->ident ); } diff --git a/src/wp-includes/html-api/class-wp-css-compound-selector-list.php b/src/wp-includes/html-api/class-wp-css-compound-selector-list.php index 8cca2e27c9ec3..ce116a236e171 100644 --- a/src/wp-includes/html-api/class-wp-css-compound-selector-list.php +++ b/src/wp-includes/html-api/class-wp-css-compound-selector-list.php @@ -78,8 +78,10 @@ */ class WP_CSS_Compound_Selector_List implements WP_CSS_HTML_Tag_Processor_Matcher { /** - * @param WP_HTML_Tag_Processor $processor - * @return bool + * Determines if the processor's current position matches the selector. + * + * @param WP_HTML_Tag_Processor $processor The processor. + * @return bool True if the processor's current position matches the selector. */ public function matches( $processor ): bool { if ( $processor->get_token_type() !== '#tag' ) { diff --git a/src/wp-includes/html-api/class-wp-css-compound-selector.php b/src/wp-includes/html-api/class-wp-css-compound-selector.php index 2ef2051880936..0ae507803c42f 100644 --- a/src/wp-includes/html-api/class-wp-css-compound-selector.php +++ b/src/wp-includes/html-api/class-wp-css-compound-selector.php @@ -6,6 +6,12 @@ * > = [ ? * ]! */ final class WP_CSS_Compound_Selector implements WP_CSS_HTML_Tag_Processor_Matcher { + /** + * Determines if the processor's current position matches the selector. + * + * @param WP_HTML_Tag_Processor $processor The processor. + * @return bool True if the processor's current position matches the selector. + */ public function matches( WP_HTML_Tag_Processor $processor ): bool { if ( $this->type_selector ) { if ( ! $this->type_selector->matches( $processor ) ) { diff --git a/src/wp-includes/html-api/class-wp-css-id-selector.php b/src/wp-includes/html-api/class-wp-css-id-selector.php index 83339ff839317..7e64432430409 100644 --- a/src/wp-includes/html-api/class-wp-css-id-selector.php +++ b/src/wp-includes/html-api/class-wp-css-id-selector.php @@ -8,6 +8,12 @@ public function __construct( string $ident ) { $this->ident = $ident; } + /** + * Determines if the processor's current position matches the selector. + * + * @param WP_HTML_Tag_Processor $processor The processor. + * @return bool True if the processor's current position matches the selector. + */ public function matches( WP_HTML_Tag_Processor $processor ): bool { $id = $processor->get_attribute( 'id' ); if ( ! is_string( $id ) ) { diff --git a/src/wp-includes/html-api/class-wp-css-type-selector.php b/src/wp-includes/html-api/class-wp-css-type-selector.php index 2a6bb952f5448..6bba9f7e2450e 100644 --- a/src/wp-includes/html-api/class-wp-css-type-selector.php +++ b/src/wp-includes/html-api/class-wp-css-type-selector.php @@ -1,6 +1,12 @@ get_tag(); if ( null === $tag_name ) { diff --git a/src/wp-includes/html-api/interface-wp-css-html-processor-matcher.php b/src/wp-includes/html-api/interface-wp-css-html-processor-matcher.php index 2ae29413b35d2..b77ef40931d83 100644 --- a/src/wp-includes/html-api/interface-wp-css-html-processor-matcher.php +++ b/src/wp-includes/html-api/interface-wp-css-html-processor-matcher.php @@ -2,7 +2,10 @@ interface WP_CSS_HTML_Processor_Matcher { /** - * @return bool + * Determines if the processor's current position matches the selector. + * + * @param WP_HTML_Processor $processor The processor. + * @return bool True if the processor's current position matches the selector. */ public function matches( WP_HTML_Processor $processor ): bool; } diff --git a/src/wp-includes/html-api/interface-wp-css-html-tag-processor-matcher.php b/src/wp-includes/html-api/interface-wp-css-html-tag-processor-matcher.php index 73d108150bb95..302ee8972a162 100644 --- a/src/wp-includes/html-api/interface-wp-css-html-tag-processor-matcher.php +++ b/src/wp-includes/html-api/interface-wp-css-html-tag-processor-matcher.php @@ -2,7 +2,10 @@ interface WP_CSS_HTML_Tag_Processor_Matcher { /** - * @return bool + * Determines if the processor's current position matches the selector. + * + * @param WP_HTML_Tag_Processor $processor The processor. + * @return bool True if the processor's current position matches the selector. */ public function matches( WP_HTML_Tag_Processor $processor ): bool; } From 1bacfd71810f4e39bcb5fd0eb83688c82878ea4a Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Mon, 9 Dec 2024 18:17:58 +0100 Subject: [PATCH 103/129] Simplify condition in compound::matches --- src/wp-includes/html-api/class-wp-css-compound-selector.php | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-css-compound-selector.php b/src/wp-includes/html-api/class-wp-css-compound-selector.php index 0ae507803c42f..f281146110f30 100644 --- a/src/wp-includes/html-api/class-wp-css-compound-selector.php +++ b/src/wp-includes/html-api/class-wp-css-compound-selector.php @@ -13,10 +13,8 @@ final class WP_CSS_Compound_Selector implements WP_CSS_HTML_Tag_Processor_Matche * @return bool True if the processor's current position matches the selector. */ public function matches( WP_HTML_Tag_Processor $processor ): bool { - if ( $this->type_selector ) { - if ( ! $this->type_selector->matches( $processor ) ) { - return false; - } + if ( $this->type_selector && ! $this->type_selector->matches( $processor ) ) { + return false; } if ( null !== $this->subclass_selectors ) { foreach ( $this->subclass_selectors as $subclass_selector ) { From a274ea0ffaed3785d12909c657b91594b76b13f4 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Mon, 9 Dec 2024 18:20:19 +0100 Subject: [PATCH 104/129] Change class require order --- src/wp-settings.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/wp-settings.php b/src/wp-settings.php index b1f25042aa7d6..b52fe8ab6181c 100644 --- a/src/wp-settings.php +++ b/src/wp-settings.php @@ -267,10 +267,10 @@ require ABSPATH . WPINC . '/html-api/class-wp-html-processor.php'; require ABSPATH . WPINC . '/html-api/interface-wp-css-html-tag-processor-matcher.php'; require ABSPATH . WPINC . '/html-api/interface-wp-css-html-processor-matcher.php'; +require ABSPATH . WPINC . '/html-api/class-wp-css-attribute-selector.php'; +require ABSPATH . WPINC . '/html-api/class-wp-css-class-selector.php'; require ABSPATH . WPINC . '/html-api/class-wp-css-id-selector.php'; require ABSPATH . WPINC . '/html-api/class-wp-css-type-selector.php'; -require ABSPATH . WPINC . '/html-api/class-wp-css-class-selector.php'; -require ABSPATH . WPINC . '/html-api/class-wp-css-attribute-selector.php'; require ABSPATH . WPINC . '/html-api/class-wp-css-compound-selector.php'; require ABSPATH . WPINC . '/html-api/class-wp-css-complex-selector.php'; require ABSPATH . WPINC . '/html-api/class-wp-css-compound-selector-list.php'; From 12a0a99d4c4e7e51fcb18cae4b384dfe41f137a2 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Mon, 9 Dec 2024 18:21:12 +0100 Subject: [PATCH 105/129] Annotate matches processor argument type --- .../html-api/class-wp-css-compound-selector-list.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-css-compound-selector-list.php b/src/wp-includes/html-api/class-wp-css-compound-selector-list.php index ce116a236e171..27900d40a238c 100644 --- a/src/wp-includes/html-api/class-wp-css-compound-selector-list.php +++ b/src/wp-includes/html-api/class-wp-css-compound-selector-list.php @@ -83,7 +83,7 @@ class WP_CSS_Compound_Selector_List implements WP_CSS_HTML_Tag_Processor_Matcher * @param WP_HTML_Tag_Processor $processor The processor. * @return bool True if the processor's current position matches the selector. */ - public function matches( $processor ): bool { + public function matches( WP_HTML_Tag_Processor $processor ): bool { if ( $processor->get_token_type() !== '#tag' ) { return false; } From 0e2b34aba90e3dad4354c362efe363ec8bb63532 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Mon, 9 Dec 2024 18:28:06 +0100 Subject: [PATCH 106/129] Document class selector and update class_name property --- .../html-api/class-wp-css-class-selector.php | 42 +++++++++++++++---- .../html-api/wpCssComplexSelectorList.php | 2 +- .../html-api/wpCssCompoundSelectorList.php | 4 +- 3 files changed, 37 insertions(+), 11 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-css-class-selector.php b/src/wp-includes/html-api/class-wp-css-class-selector.php index c9ab061578025..cdd38d951e45c 100644 --- a/src/wp-includes/html-api/class-wp-css-class-selector.php +++ b/src/wp-includes/html-api/class-wp-css-class-selector.php @@ -1,6 +1,39 @@ class_name = $class_name; + } + /** * Determines if the processor's current position matches the selector. * @@ -8,13 +41,6 @@ final class WP_CSS_Class_Selector implements WP_CSS_HTML_Tag_Processor_Matcher { * @return bool True if the processor's current position matches the selector. */ public function matches( WP_HTML_Tag_Processor $processor ): bool { - return (bool) $processor->has_class( $this->ident ); - } - - /** @var string */ - public $ident; - - public function __construct( string $ident ) { - $this->ident = $ident; + return (bool) $processor->has_class( $this->class_name ); } } diff --git a/tests/phpunit/tests/html-api/wpCssComplexSelectorList.php b/tests/phpunit/tests/html-api/wpCssComplexSelectorList.php index dc89869ea2e66..1bf77f8c60317 100644 --- a/tests/phpunit/tests/html-api/wpCssComplexSelectorList.php +++ b/tests/phpunit/tests/html-api/wpCssComplexSelectorList.php @@ -47,7 +47,7 @@ public function test_parse_complex_selector() { $this->assertSame( 3, count( $sel->self_selector->subclass_selectors ) ); $this->assertNull( $sel->self_selector->type_selector ); - $this->assertSame( 'child', $sel->self_selector->subclass_selectors[0]->ident ); + $this->assertSame( 'child', $sel->self_selector->subclass_selectors[0]->class_name ); $this->assertSame( ', rest', substr( $input, $offset ) ); } diff --git a/tests/phpunit/tests/html-api/wpCssCompoundSelectorList.php b/tests/phpunit/tests/html-api/wpCssCompoundSelectorList.php index 715e0e26bc9cd..fa45ed767d5ca 100644 --- a/tests/phpunit/tests/html-api/wpCssCompoundSelectorList.php +++ b/tests/phpunit/tests/html-api/wpCssCompoundSelectorList.php @@ -239,7 +239,7 @@ public function test_parse_class( string $input, ?string $expected = null, ?stri if ( null === $expected ) { $this->assertNull( $result ); } else { - $this->assertSame( $expected, $result->ident ); + $this->assertSame( $expected, $result->class_name ); $this->assertSame( $rest, substr( $input, $offset ) ); } } @@ -383,7 +383,7 @@ public function test_parse_selector() { $this->assertSame( 'el', $sel->type_selector->ident ); $this->assertSame( 3, count( $sel->subclass_selectors ) ); - $this->assertSame( 'foo', $sel->subclass_selectors[0]->ident, 'foo' ); + $this->assertSame( 'foo', $sel->subclass_selectors[0]->class_name, 'foo' ); $this->assertSame( 'bar', $sel->subclass_selectors[1]->ident, 'bar' ); $this->assertSame( 'baz', $sel->subclass_selectors[2]->name, 'baz' ); $this->assertSame( WP_CSS_Attribute_Selector::MATCH_EXACT, $sel->subclass_selectors[2]->matcher ); From dea10291c67b55512658af04bc483385965acc08 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Mon, 9 Dec 2024 18:33:24 +0100 Subject: [PATCH 107/129] Document ID selector class, rename id property --- .../html-api/class-wp-css-id-selector.php | 38 ++++++++++++++++--- .../html-api/wpCssCompoundSelectorList.php | 4 +- 2 files changed, 34 insertions(+), 8 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-css-id-selector.php b/src/wp-includes/html-api/class-wp-css-id-selector.php index 7e64432430409..5bb6438df6eb3 100644 --- a/src/wp-includes/html-api/class-wp-css-id-selector.php +++ b/src/wp-includes/html-api/class-wp-css-id-selector.php @@ -1,11 +1,37 @@ ident = $ident; + /** + * Constructor. + * + * @param string $id The ID to match. + */ + public function __construct( string $id ) { + $this->id = $id; } /** @@ -23,7 +49,7 @@ public function matches( WP_HTML_Tag_Processor $processor ): bool { $case_insensitive = $processor->is_quirks_mode(); return $case_insensitive - ? 0 === strcasecmp( $id, $this->ident ) - : $processor->get_attribute( 'id' ) === $this->ident; + ? 0 === strcasecmp( $id, $this->id ) + : $processor->get_attribute( 'id' ) === $this->id; } } diff --git a/tests/phpunit/tests/html-api/wpCssCompoundSelectorList.php b/tests/phpunit/tests/html-api/wpCssCompoundSelectorList.php index fa45ed767d5ca..8334ebd5a3a75 100644 --- a/tests/phpunit/tests/html-api/wpCssCompoundSelectorList.php +++ b/tests/phpunit/tests/html-api/wpCssCompoundSelectorList.php @@ -205,7 +205,7 @@ public function test_parse_id( string $input, ?string $expected = null, ?string if ( null === $expected ) { $this->assertNull( $result ); } else { - $this->assertSame( $expected, $result->ident ); + $this->assertSame( $expected, $result->id ); $this->assertSame( $rest, substr( $input, $offset ) ); } } @@ -384,7 +384,7 @@ public function test_parse_selector() { $this->assertSame( 'el', $sel->type_selector->ident ); $this->assertSame( 3, count( $sel->subclass_selectors ) ); $this->assertSame( 'foo', $sel->subclass_selectors[0]->class_name, 'foo' ); - $this->assertSame( 'bar', $sel->subclass_selectors[1]->ident, 'bar' ); + $this->assertSame( 'bar', $sel->subclass_selectors[1]->id, 'bar' ); $this->assertSame( 'baz', $sel->subclass_selectors[2]->name, 'baz' ); $this->assertSame( WP_CSS_Attribute_Selector::MATCH_EXACT, $sel->subclass_selectors[2]->matcher ); $this->assertSame( 'quux', $sel->subclass_selectors[2]->value ); From d268f4cfe03a3872865c94b5e03c2e815f106576 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Mon, 9 Dec 2024 18:38:47 +0100 Subject: [PATCH 108/129] Document type selector class and rename type property --- .../html-api/class-wp-css-type-selector.php | 49 ++++++++++++++----- .../html-api/wpCssComplexSelectorList.php | 4 +- .../html-api/wpCssCompoundSelectorList.php | 4 +- 3 files changed, 40 insertions(+), 17 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-css-type-selector.php b/src/wp-includes/html-api/class-wp-css-type-selector.php index 6bba9f7e2450e..66d6a1f2db48f 100644 --- a/src/wp-includes/html-api/class-wp-css-type-selector.php +++ b/src/wp-includes/html-api/class-wp-css-type-selector.php @@ -1,6 +1,38 @@ type = $type; + } + /** * Determines if the processor's current position matches the selector. * @@ -16,24 +48,15 @@ public function matches( WP_HTML_Tag_Processor $processor ): bool { } /** + * Checks whether the selector matches the provided tag name. + * * @param string $tag_name * @return bool */ public function matches_tag( string $tag_name ): bool { - if ( '*' === $this->ident ) { + if ( '*' === $this->type ) { return true; } - return 0 === strcasecmp( $tag_name, $this->ident ); - } - - /** - * @var string - * - * The type identifier string or '*'. - */ - public $ident; - - public function __construct( string $ident ) { - $this->ident = $ident; + return 0 === strcasecmp( $tag_name, $this->type ); } } diff --git a/tests/phpunit/tests/html-api/wpCssComplexSelectorList.php b/tests/phpunit/tests/html-api/wpCssComplexSelectorList.php index 1bf77f8c60317..076d5b6f65ee6 100644 --- a/tests/phpunit/tests/html-api/wpCssComplexSelectorList.php +++ b/tests/phpunit/tests/html-api/wpCssComplexSelectorList.php @@ -39,10 +39,10 @@ public function test_parse_complex_selector() { $this->assertSame( 2, count( $sel->context_selectors ) ); // Relative selectors should be reverse ordered. - $this->assertSame( 'el2', $sel->context_selectors[0][0]->ident ); + $this->assertSame( 'el2', $sel->context_selectors[0][0]->type ); $this->assertSame( WP_CSS_Complex_Selector::COMBINATOR_CHILD, $sel->context_selectors[0][1] ); - $this->assertSame( 'el1', $sel->context_selectors[1][0]->ident ); + $this->assertSame( 'el1', $sel->context_selectors[1][0]->type ); $this->assertSame( WP_CSS_Complex_Selector::COMBINATOR_DESCENDANT, $sel->context_selectors[1][1] ); $this->assertSame( 3, count( $sel->self_selector->subclass_selectors ) ); diff --git a/tests/phpunit/tests/html-api/wpCssCompoundSelectorList.php b/tests/phpunit/tests/html-api/wpCssCompoundSelectorList.php index 8334ebd5a3a75..1dfdc79714e2c 100644 --- a/tests/phpunit/tests/html-api/wpCssCompoundSelectorList.php +++ b/tests/phpunit/tests/html-api/wpCssCompoundSelectorList.php @@ -273,7 +273,7 @@ public function test_parse_type( string $input, ?string $expected = null, ?strin if ( null === $expected ) { $this->assertNull( $result ); } else { - $this->assertSame( $expected, $result->ident ); + $this->assertSame( $expected, $result->type ); $this->assertSame( $rest, substr( $input, $offset ) ); } } @@ -381,7 +381,7 @@ public function test_parse_selector() { $offset = 0; $sel = $this->test_class::test_parse_compound_selector( $input, $offset ); - $this->assertSame( 'el', $sel->type_selector->ident ); + $this->assertSame( 'el', $sel->type_selector->type ); $this->assertSame( 3, count( $sel->subclass_selectors ) ); $this->assertSame( 'foo', $sel->subclass_selectors[0]->class_name, 'foo' ); $this->assertSame( 'bar', $sel->subclass_selectors[1]->id, 'bar' ); From d89fbd989d86fb16f2d34eb896031d32db817055 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Mon, 9 Dec 2024 19:03:35 +0100 Subject: [PATCH 109/129] Document compound selector --- .../class-wp-css-compound-selector.php | 63 ++++++++++++++----- 1 file changed, 46 insertions(+), 17 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-css-compound-selector.php b/src/wp-includes/html-api/class-wp-css-compound-selector.php index f281146110f30..19aad862db7e2 100644 --- a/src/wp-includes/html-api/class-wp-css-compound-selector.php +++ b/src/wp-includes/html-api/class-wp-css-compound-selector.php @@ -1,11 +1,55 @@ in the grammar. + * CSS compound selector. + * + * This class is used to test for matching HTML tags in a {@see WP_HTML_Tag_Processor}. + * + * A compound selector is a combination of: + * - An optional type selector. + * - Zero or more subclass selectors (ID, class, or attribute selectors). + * - At least one of the above. * - * > = [ ? * ]! + * @since TBD + * + * @access private */ final class WP_CSS_Compound_Selector implements WP_CSS_HTML_Tag_Processor_Matcher { + /** + * The type selector. + * + * @var WP_CSS_Type_Selector|null + */ + public $type_selector; + + /** + * The subclass selectors. + * + * Subclass selectors are ID, class, or attribute selectors. + * + * @var (WP_CSS_ID_Selector|WP_CSS_Class_Selector|WP_CSS_Attribute_Selector)[]|null + */ + public $subclass_selectors; + + /** + * Constructor. + * + * @param WP_CSS_Type_Selector|null $type_selector The type selector or null. + * @param (WP_CSS_ID_Selector|WP_CSS_Class_Selector|WP_CSS_Attribute_Selector)[]|null $subclass_selectors + * The array of subclass selectors or null. + */ + public function __construct( ?WP_CSS_Type_Selector $type_selector, ?array $subclass_selectors ) { + $this->type_selector = $type_selector; + $this->subclass_selectors = array() === $subclass_selectors ? null : $subclass_selectors; + } + /** * Determines if the processor's current position matches the selector. * @@ -25,19 +69,4 @@ public function matches( WP_HTML_Tag_Processor $processor ): bool { } return true; } - - /** @var WP_CSS_Type_Selector|null */ - public $type_selector; - - /** @var (WP_CSS_ID_Selector|WP_CSS_Class_Selector|WP_CSS_Attribute_Selector)[]|null */ - public $subclass_selectors; - - /** - * @param WP_CSS_Type_Selector|null $type_selector - * @param array $subclass_selectors - */ - public function __construct( ?WP_CSS_Type_Selector $type_selector, array $subclass_selectors ) { - $this->type_selector = $type_selector; - $this->subclass_selectors = array() === $subclass_selectors ? null : $subclass_selectors; - } } From 8ced3aa2da7f1c77a12b344516c9dae8eaad9be5 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Mon, 9 Dec 2024 19:03:59 +0100 Subject: [PATCH 110/129] Improve attribute selector docs and types --- .../class-wp-css-attribute-selector.php | 111 +++++++++++------- .../class-wp-css-compound-selector-list.php | 2 +- .../class-wp-css-compound-selector.php | 2 +- .../html-api/wpCssCompoundSelectorList.php | 2 +- 4 files changed, 73 insertions(+), 44 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-css-attribute-selector.php b/src/wp-includes/html-api/class-wp-css-attribute-selector.php index dae71c4295348..b64efea0bb45c 100644 --- a/src/wp-includes/html-api/class-wp-css-attribute-selector.php +++ b/src/wp-includes/html-api/class-wp-css-attribute-selector.php @@ -19,98 +19,127 @@ */ final class WP_CSS_Attribute_Selector implements WP_CSS_HTML_Tag_Processor_Matcher { /** - * [att=val] - * Represents an element with the att attribute whose value is exactly "val". + * The attribute value is matched exactly. + * + * @example + * + * [att=val] */ - const MATCH_EXACT = 'MATCH_EXACT'; + const MATCH_EXACT = 'exact'; /** - * [attr~=value] - * Represents elements with an attribute name of attr whose value is a - * whitespace-separated list of words, one of which is exactly value. + * The attribute value matches any value in a whitespace separated list of words exactly. + * + * @example + * + * [attr~=value] */ - const MATCH_ONE_OF_EXACT = 'MATCH_ONE_OF_EXACT'; + const MATCH_ONE_OF_EXACT = 'one-of'; /** - * [attr|=value] - * Represents elements with an attribute name of attr whose value can be exactly value or - * can begin with value immediately followed by a hyphen, - (U+002D). It is often used for - * language subcode matches. + * The attribute value is matched exactly or matches the beginning of the attribute + * immediately followed by a hyphen. + * + * @example + * + * [attr|=value] */ - const MATCH_EXACT_OR_EXACT_WITH_HYPHEN = 'MATCH_EXACT_OR_EXACT_WITH_HYPHEN'; + const MATCH_EXACT_OR_HYPHEN_PREFIXED = 'exact-or-hyphen-prefixed'; /** - * [attr^=value] - * Represents elements with an attribute name of attr whose value is prefixed (preceded) - * by value. + * The attribute value matches the start of the attribute. + * + * @example + * + * [attr^=value] */ - const MATCH_PREFIXED_BY = 'MATCH_PREFIXED_BY'; + const MATCH_PREFIXED_BY = 'prefixed'; /** - * [attr$=value] - * Represents elements with an attribute name of attr whose value is suffixed (followed) - * by value. + * The attribute value matches the end of the attribute. + * + * @example + * + * [attr$=value] */ - const MATCH_SUFFIXED_BY = 'MATCH_SUFFIXED_BY'; + const MATCH_SUFFIXED_BY = 'suffixed'; /** - * [attr*=value] - * Represents elements with an attribute name of attr whose value contains at least one - * occurrence of value within the string. + * The attribute value is contained in the attribute. + * + * @example + * + * [attr*=value] */ - const MATCH_CONTAINS = 'MATCH_CONTAINS'; + const MATCH_CONTAINS = 'contains'; /** - * Modifier for case sensitive matching - * [attr=value s] + * Modifier for case sensitive matching. + * + * @example + * + * [attr=value s] */ const MODIFIER_CASE_SENSITIVE = 'case-sensitive'; /** - * Modifier for case insensitive matching - * [attr=value i] + * Modifier for case insensitive matching. + * + * @example + * + * [attr=value i] */ const MODIFIER_CASE_INSENSITIVE = 'case-insensitive'; /** - * The attribute name. + * The name of the attribute to match. * * @var string - * @readonly */ public $name; /** * The attribute matcher. * - * @var null|self::MATCH_* - * @readonly + * Allowed string values are the class constants: + * - {@see WP_CSS_Attribute_Selector::MATCH_EXACT} + * - {@see WP_CSS_Attribute_Selector::MATCH_ONE_OF_EXACT} + * - {@see WP_CSS_Attribute_Selector::MATCH_EXACT_OR_HYPHEN_PREFIXED} + * - {@see WP_CSS_Attribute_Selector::MATCH_PREFIXED_BY} + * - {@see WP_CSS_Attribute_Selector::MATCH_SUFFIXED_BY} + * - {@see WP_CSS_Attribute_Selector::MATCH_CONTAINS} + * + * @var string|null */ public $matcher; /** - * The attribute value. + * The attribute value to match. * * @var string|null - * @readonly */ public $value; /** * The attribute modifier. * - * @var null|self::MODIFIER_* - * @readonly + * Allowed string values are the class constants: + * - {@see WP_CSS_Attribute_Selector::MODIFIER_CASE_SENSITIVE} + * - {@see WP_CSS_Attribute_Selector::MODIFIER_CASE_INSENSITIVE} + * + * @var string|null */ public $modifier; /** * Constructor. * - * @param string $name - * @param null|self::MATCH_* $matcher - * @param null|string $value - * @param null|self::MODIFIER_* $modifier + * @param string $name The attribute name. + * @param string|null $matcher The attribute matcher. + * Must be one of the class MATCH_* constants or null. + * @param string|null $value The attribute value to match. + * @param string|null $modifier The attribute case modifier. + * Must be one of the class MODIFIER_* constants or null. */ public function __construct( string $name, ?string $matcher = null, ?string $value = null, ?string $modifier = null ) { $this->name = $name; @@ -159,7 +188,7 @@ public function matches( WP_HTML_Tag_Processor $processor ): bool { } return false; - case self::MATCH_EXACT_OR_EXACT_WITH_HYPHEN: + case self::MATCH_EXACT_OR_HYPHEN_PREFIXED: // Attempt the full match first if ( $case_insensitive diff --git a/src/wp-includes/html-api/class-wp-css-compound-selector-list.php b/src/wp-includes/html-api/class-wp-css-compound-selector-list.php index 27900d40a238c..02a958e647ef1 100644 --- a/src/wp-includes/html-api/class-wp-css-compound-selector-list.php +++ b/src/wp-includes/html-api/class-wp-css-compound-selector-list.php @@ -294,7 +294,7 @@ final protected static function parse_attribute_selector( string $input, int &$o $updated_offset += 2; break; case '|': - $attr_matcher = WP_CSS_Attribute_Selector::MATCH_EXACT_OR_EXACT_WITH_HYPHEN; + $attr_matcher = WP_CSS_Attribute_Selector::MATCH_EXACT_OR_HYPHEN_PREFIXED; $updated_offset += 2; break; case '^': diff --git a/src/wp-includes/html-api/class-wp-css-compound-selector.php b/src/wp-includes/html-api/class-wp-css-compound-selector.php index 19aad862db7e2..414d36301ec5d 100644 --- a/src/wp-includes/html-api/class-wp-css-compound-selector.php +++ b/src/wp-includes/html-api/class-wp-css-compound-selector.php @@ -1,6 +1,6 @@ array( "[href \n ^= baz ]", 'href', WP_CSS_Attribute_Selector::MATCH_PREFIXED_BY, 'baz', null, '' ), '[match $= insensitive i]' => array( '[match $= insensitive i]', 'match', WP_CSS_Attribute_Selector::MATCH_SUFFIXED_BY, 'insensitive', WP_CSS_Attribute_Selector::MODIFIER_CASE_INSENSITIVE, '' ), - '[match|=sensitive s]' => array( '[match|=sensitive s]', 'match', WP_CSS_Attribute_Selector::MATCH_EXACT_OR_EXACT_WITH_HYPHEN, 'sensitive', WP_CSS_Attribute_Selector::MODIFIER_CASE_SENSITIVE, '' ), + '[match|=sensitive s]' => array( '[match|=sensitive s]', 'match', WP_CSS_Attribute_Selector::MATCH_EXACT_OR_HYPHEN_PREFIXED, 'sensitive', WP_CSS_Attribute_Selector::MODIFIER_CASE_SENSITIVE, '' ), '[att=val I]' => array( '[att=val I]', 'att', WP_CSS_Attribute_Selector::MATCH_EXACT, 'val', WP_CSS_Attribute_Selector::MODIFIER_CASE_INSENSITIVE, '' ), '[att=val S]' => array( '[att=val S]', 'att', WP_CSS_Attribute_Selector::MATCH_EXACT, 'val', WP_CSS_Attribute_Selector::MODIFIER_CASE_SENSITIVE, '' ), From ca1a12973819a7d7b3ea05b4bff160ced33532dd Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Mon, 9 Dec 2024 19:04:11 +0100 Subject: [PATCH 111/129] Update matches docs --- src/wp-includes/html-api/class-wp-css-attribute-selector.php | 3 +-- src/wp-includes/html-api/class-wp-css-class-selector.php | 3 +-- src/wp-includes/html-api/class-wp-css-id-selector.php | 3 +-- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-css-attribute-selector.php b/src/wp-includes/html-api/class-wp-css-attribute-selector.php index b64efea0bb45c..fbdef5ce930be 100644 --- a/src/wp-includes/html-api/class-wp-css-attribute-selector.php +++ b/src/wp-includes/html-api/class-wp-css-attribute-selector.php @@ -10,8 +10,7 @@ /** * CSS attribute selector. * - * This class implements a CSS attribute selector and is used to test for matching HTML tags - * in a {@see WP_HTML_Tag_Processor}. + * This class is used to test for matching HTML tags in a {@see WP_HTML_Tag_Processor}. * * @since TBD * diff --git a/src/wp-includes/html-api/class-wp-css-class-selector.php b/src/wp-includes/html-api/class-wp-css-class-selector.php index cdd38d951e45c..02410546a4b52 100644 --- a/src/wp-includes/html-api/class-wp-css-class-selector.php +++ b/src/wp-includes/html-api/class-wp-css-class-selector.php @@ -10,8 +10,7 @@ /** * CSS class selector. * - * This class implements a CSS class selector and is used to test for matching HTML tags - * in a {@see WP_HTML_Tag_Processor}. + * This class is used to test for matching HTML tags in a {@see WP_HTML_Tag_Processor}. * * @since TBD * diff --git a/src/wp-includes/html-api/class-wp-css-id-selector.php b/src/wp-includes/html-api/class-wp-css-id-selector.php index 5bb6438df6eb3..ca61f00bb7e67 100644 --- a/src/wp-includes/html-api/class-wp-css-id-selector.php +++ b/src/wp-includes/html-api/class-wp-css-id-selector.php @@ -10,8 +10,7 @@ /** * CSS ID selector. * - * This class implements a CSS ID selector and is used to test for matching HTML tags - * in a {@see WP_HTML_Tag_Processor}. + * This class is used to test for matching HTML tags in a {@see WP_HTML_Tag_Processor}. * * @since TBD * From 71fd62aa9d5f8342ed221ec674c420753b838e14 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Mon, 9 Dec 2024 19:15:27 +0100 Subject: [PATCH 112/129] Document complex selector class --- .../class-wp-css-complex-selector.php | 30 ++++++++++++------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-css-complex-selector.php b/src/wp-includes/html-api/class-wp-css-complex-selector.php index bd51884901d93..c6795254ea7b8 100644 --- a/src/wp-includes/html-api/class-wp-css-complex-selector.php +++ b/src/wp-includes/html-api/class-wp-css-complex-selector.php @@ -10,10 +10,10 @@ /** * CSS complex selector. * - * This class implements a CSS complex selector and is used to test for matching HTML tags - * in a {@see WP_HTML_Tag_Processor}. + * This class is used to test for matching HTML tags in a {@see WP_HTML_Processor}. * - * A complex selector is a selector with zero or more combinator-selector pairs. + * A compound selector is at least a single compound selector. There may be additional selectors + * with combinators. * * @since TBD * @@ -106,8 +106,10 @@ final class WP_CSS_Complex_Selector implements WP_CSS_HTML_Processor_Matcher { public $context_selectors; /** - * @param WP_CSS_Compound_Selector $self_selector - * @param array{WP_CSS_Type_Selector, string}[]|null $selectors + * Constructor. + * + * @param WP_CSS_Compound_Selector $self_selector The selector in the final position. + * @param array{WP_CSS_Type_Selector, string}[]|null $selectors The context selectors. */ public function __construct( WP_CSS_Compound_Selector $self_selector, @@ -133,16 +135,15 @@ public function matches( WP_HTML_Processor $processor ): bool { return true; } - /** @var string[] */ $breadcrumbs = array_slice( array_reverse( $processor->get_breadcrumbs() ), 1 ); return $this->explore_matches( $this->context_selectors, $breadcrumbs ); } /** - * Checks for matches recursively comparing context selectors with breadcrumbs. + * Checks for matches by recursively comparing context selectors with breadcrumbs. * - * @param array{WP_CSS_Type_Selector, string}[] $selectors - * @param string[] $breadcrumbs + * @param array{WP_CSS_Type_Selector, string}[] $selectors Selectors to match. + * @param string[] $breadcrumbs Breadcrumbs. * @return bool True if a match is found, otherwise false. */ private function explore_matches( array $selectors, array $breadcrumbs ): bool { @@ -176,7 +177,16 @@ private function explore_matches( array $selectors, array $breadcrumbs ): bool { return false; default: - throw new Exception( "Unsupported combinator '{$combinator}' found." ); + _doing_it_wrong( + __METHOD__, + sprintf( + // translators: %s: A CSS selector combinator like ">" or "+". + __( 'Unsupported combinator "%s" found.' ), + $combinator + ), + '6.8.0' + ); + return false; } } } From 25dbb198cbfa081931b9a60dbda7e5f682d4b8d4 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Mon, 9 Dec 2024 19:23:28 +0100 Subject: [PATCH 113/129] PHP < 7.4 does not like this annotation --- .../html-api/class-wp-css-compound-selector-list.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-css-compound-selector-list.php b/src/wp-includes/html-api/class-wp-css-compound-selector-list.php index 02a958e647ef1..a74ffe0f45fc9 100644 --- a/src/wp-includes/html-api/class-wp-css-compound-selector-list.php +++ b/src/wp-includes/html-api/class-wp-css-compound-selector-list.php @@ -83,7 +83,7 @@ class WP_CSS_Compound_Selector_List implements WP_CSS_HTML_Tag_Processor_Matcher * @param WP_HTML_Tag_Processor $processor The processor. * @return bool True if the processor's current position matches the selector. */ - public function matches( WP_HTML_Tag_Processor $processor ): bool { + public function matches( $processor ): bool { if ( $processor->get_token_type() !== '#tag' ) { return false; } From 70cf7f7584b15ac2934dc90a86b030a21e2ad2d4 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Mon, 9 Dec 2024 19:47:52 +0100 Subject: [PATCH 114/129] Update since annotations to 6.8.0 --- .../html-api/class-wp-css-attribute-selector.php | 4 ++-- src/wp-includes/html-api/class-wp-css-class-selector.php | 4 ++-- .../html-api/class-wp-css-complex-selector-list.php | 6 +++--- src/wp-includes/html-api/class-wp-css-complex-selector.php | 4 ++-- .../html-api/class-wp-css-compound-selector-list.php | 6 +++--- src/wp-includes/html-api/class-wp-css-compound-selector.php | 4 ++-- src/wp-includes/html-api/class-wp-css-id-selector.php | 4 ++-- src/wp-includes/html-api/class-wp-css-type-selector.php | 4 ++-- src/wp-includes/html-api/class-wp-html-processor.php | 4 ++-- src/wp-includes/html-api/class-wp-html-tag-processor.php | 4 ++-- tests/phpunit/tests/html-api/wpCssComplexSelectorList.php | 2 +- tests/phpunit/tests/html-api/wpCssCompoundSelectorList.php | 2 +- tests/phpunit/tests/html-api/wpHtmlProcessor-select.php | 2 +- tests/phpunit/tests/html-api/wpHtmlTagProcessor-select.php | 2 +- 14 files changed, 26 insertions(+), 26 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-css-attribute-selector.php b/src/wp-includes/html-api/class-wp-css-attribute-selector.php index fbdef5ce930be..7543cb834e820 100644 --- a/src/wp-includes/html-api/class-wp-css-attribute-selector.php +++ b/src/wp-includes/html-api/class-wp-css-attribute-selector.php @@ -4,7 +4,7 @@ * * @package WordPress * @subpackage HTML-API - * @since TBD + * @since 6.8.0 */ /** @@ -12,7 +12,7 @@ * * This class is used to test for matching HTML tags in a {@see WP_HTML_Tag_Processor}. * - * @since TBD + * @since 6.8.0 * * @access private */ diff --git a/src/wp-includes/html-api/class-wp-css-class-selector.php b/src/wp-includes/html-api/class-wp-css-class-selector.php index 02410546a4b52..fa287cdf5c580 100644 --- a/src/wp-includes/html-api/class-wp-css-class-selector.php +++ b/src/wp-includes/html-api/class-wp-css-class-selector.php @@ -4,7 +4,7 @@ * * @package WordPress * @subpackage HTML-API - * @since TBD + * @since 6.8.0 */ /** @@ -12,7 +12,7 @@ * * This class is used to test for matching HTML tags in a {@see WP_HTML_Tag_Processor}. * - * @since TBD + * @since 6.8.0 * * @access private */ diff --git a/src/wp-includes/html-api/class-wp-css-complex-selector-list.php b/src/wp-includes/html-api/class-wp-css-complex-selector-list.php index 4a9fc03f582f8..fcc6032589584 100644 --- a/src/wp-includes/html-api/class-wp-css-complex-selector-list.php +++ b/src/wp-includes/html-api/class-wp-css-complex-selector-list.php @@ -4,7 +4,7 @@ * * @package WordPress * @subpackage HTML-API - * @since TBD + * @since 6.8.0 */ /** @@ -28,7 +28,7 @@ * - Next sibling (`el + el`) * - Subsequent sibling (`el ~ el`) * - * @since TBD + * @since 6.8.0 * * @access private */ @@ -37,7 +37,7 @@ class WP_CSS_Complex_Selector_List extends WP_CSS_Compound_Selector_List impleme * Takes a CSS selector string and returns an instance of itself or `null` if the selector * string is invalid or unsupported. * - * @since TBD + * @since 6.8.0 * * @param string $input CSS selectors. * @return static|null diff --git a/src/wp-includes/html-api/class-wp-css-complex-selector.php b/src/wp-includes/html-api/class-wp-css-complex-selector.php index c6795254ea7b8..4461e4d7d92f3 100644 --- a/src/wp-includes/html-api/class-wp-css-complex-selector.php +++ b/src/wp-includes/html-api/class-wp-css-complex-selector.php @@ -4,7 +4,7 @@ * * @package WordPress * @subpackage HTML-API - * @since TBD + * @since 6.8.0 */ /** @@ -15,7 +15,7 @@ * A compound selector is at least a single compound selector. There may be additional selectors * with combinators. * - * @since TBD + * @since 6.8.0 * * @access private */ diff --git a/src/wp-includes/html-api/class-wp-css-compound-selector-list.php b/src/wp-includes/html-api/class-wp-css-compound-selector-list.php index a74ffe0f45fc9..a2ff48e089f5d 100644 --- a/src/wp-includes/html-api/class-wp-css-compound-selector-list.php +++ b/src/wp-includes/html-api/class-wp-css-compound-selector-list.php @@ -4,7 +4,7 @@ * * @package WordPress * @subpackage HTML-API - * @since TBD + * @since 6.8.0 */ /** @@ -67,7 +67,7 @@ * - `svg|*` to select all SVG elements * - `html|title` to select only HTML TITLE elements. * - * @since TBD + * @since 6.8.0 * * @access private * @@ -116,7 +116,7 @@ protected function __construct( array $selectors ) { * Takes a CSS selector string and returns an instance of itself or `null` if the selector * string is invalid or unsupported. * - * @since TBD + * @since 6.8.0 * * @param string $input CSS selectors. * @return static|null diff --git a/src/wp-includes/html-api/class-wp-css-compound-selector.php b/src/wp-includes/html-api/class-wp-css-compound-selector.php index 414d36301ec5d..9596876685212 100644 --- a/src/wp-includes/html-api/class-wp-css-compound-selector.php +++ b/src/wp-includes/html-api/class-wp-css-compound-selector.php @@ -4,7 +4,7 @@ * * @package WordPress * @subpackage HTML-API - * @since TBD + * @since 6.8.0 */ /** @@ -17,7 +17,7 @@ * - Zero or more subclass selectors (ID, class, or attribute selectors). * - At least one of the above. * - * @since TBD + * @since 6.8.0 * * @access private */ diff --git a/src/wp-includes/html-api/class-wp-css-id-selector.php b/src/wp-includes/html-api/class-wp-css-id-selector.php index ca61f00bb7e67..2a600923fa2a2 100644 --- a/src/wp-includes/html-api/class-wp-css-id-selector.php +++ b/src/wp-includes/html-api/class-wp-css-id-selector.php @@ -4,7 +4,7 @@ * * @package WordPress * @subpackage HTML-API - * @since TBD + * @since 6.8.0 */ /** @@ -12,7 +12,7 @@ * * This class is used to test for matching HTML tags in a {@see WP_HTML_Tag_Processor}. * - * @since TBD + * @since 6.8.0 * * @access private */ diff --git a/src/wp-includes/html-api/class-wp-css-type-selector.php b/src/wp-includes/html-api/class-wp-css-type-selector.php index 66d6a1f2db48f..3f7671851c375 100644 --- a/src/wp-includes/html-api/class-wp-css-type-selector.php +++ b/src/wp-includes/html-api/class-wp-css-type-selector.php @@ -4,7 +4,7 @@ * * @package WordPress * @subpackage HTML-API - * @since TBD + * @since 6.8.0 */ /** @@ -12,7 +12,7 @@ * * This class is used to test for matching HTML tags in a {@see WP_HTML_Tag_Processor}. * - * @since TBD + * @since 6.8.0 * * @access private */ diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 7ad65cb9d03d4..6685eaaf79aea 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -652,7 +652,7 @@ public function get_unsupported_exception() { * ); * } * - * @since TBD + * @since 6.8.0 * * @param string $selector_string Selector string. * @return Generator A generator pausing on each tag matching the selector. @@ -692,7 +692,7 @@ public function select_all( $selector_string ): Generator { * $processor->get_attribute( 'charset' ), // string(5) "utf-8" * ); * - * @since TBD + * @since 6.8.0 * * @param string $selector_string * @return bool True if a matching tag was found, otherwise false. diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index a7633291b6bb2..8ea6e930f5b91 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -877,7 +877,7 @@ public function change_parsing_namespace( string $new_namespace ): bool { * ); * } * - * @since TBD + * @since 6.8.0 * * @param string $selector_string Selector string. * @return Generator A generator pausing on each tag matching the selector. @@ -917,7 +917,7 @@ public function select_all( $selector_string ): Generator { * $processor->get_attribute( 'charset' ), // string(5) "utf-8" * ); * - * @since TBD + * @since 6.8.0 * * @param string $selector_string * @return bool True if a matching tag was found, otherwise false. diff --git a/tests/phpunit/tests/html-api/wpCssComplexSelectorList.php b/tests/phpunit/tests/html-api/wpCssComplexSelectorList.php index 076d5b6f65ee6..829af95a55d5f 100644 --- a/tests/phpunit/tests/html-api/wpCssComplexSelectorList.php +++ b/tests/phpunit/tests/html-api/wpCssComplexSelectorList.php @@ -6,7 +6,7 @@ * * @subpackage HTML-API * - * @since TBD + * @since 6.8.0 * * @group html-api */ diff --git a/tests/phpunit/tests/html-api/wpCssCompoundSelectorList.php b/tests/phpunit/tests/html-api/wpCssCompoundSelectorList.php index 2c7a4695f679e..c112585e622c8 100644 --- a/tests/phpunit/tests/html-api/wpCssCompoundSelectorList.php +++ b/tests/phpunit/tests/html-api/wpCssCompoundSelectorList.php @@ -6,7 +6,7 @@ * * @subpackage HTML-API * - * @since TBD + * @since 6.8.0 * * @group html-api */ diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor-select.php b/tests/phpunit/tests/html-api/wpHtmlProcessor-select.php index 21828faf42e80..a8f6a7c949080 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor-select.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor-select.php @@ -8,7 +8,7 @@ * @package WordPress * @subpackage HTML-API * - * @since TBD + * @since 6.8.0 * * @group html-api */ diff --git a/tests/phpunit/tests/html-api/wpHtmlTagProcessor-select.php b/tests/phpunit/tests/html-api/wpHtmlTagProcessor-select.php index 586e38b4bafb2..28f88778629ce 100644 --- a/tests/phpunit/tests/html-api/wpHtmlTagProcessor-select.php +++ b/tests/phpunit/tests/html-api/wpHtmlTagProcessor-select.php @@ -8,7 +8,7 @@ * @package WordPress * @subpackage HTML-API * - * @since TBD + * @since 6.8.0 * * @group html-api */ From 355c9a24e0d983813ae73e8cacc59287833d2846 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Mon, 9 Dec 2024 19:53:48 +0100 Subject: [PATCH 115/129] Update attr-modifier to match selectors grammar --- .../html-api/class-wp-css-compound-selector-list.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-css-compound-selector-list.php b/src/wp-includes/html-api/class-wp-css-compound-selector-list.php index a2ff48e089f5d..fa12519540cc5 100644 --- a/src/wp-includes/html-api/class-wp-css-compound-selector-list.php +++ b/src/wp-includes/html-api/class-wp-css-compound-selector-list.php @@ -36,7 +36,7 @@ * = '[' ']' | * '[' [ | ] ? ']' * = [ '~' | '|' | '^' | '$' | '*' ]? '=' - * = i | I | s | S + * = i | s * * @link https://www.w3.org/TR/selectors/#grammar Refer to the grammar for more details. * From 3206e0b02f2d7f77cc52a0a3710f0d18ec73b9c3 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 11 Dec 2024 18:25:36 +0100 Subject: [PATCH 116/129] Move parsing back to selector classes --- .../class-wp-css-attribute-selector.php | 122 ++- .../html-api/class-wp-css-class-selector.php | 28 +- .../class-wp-css-complex-selector-list.php | 96 +-- .../class-wp-css-complex-selector.php | 80 +- .../class-wp-css-compound-selector-list.php | 735 +----------------- .../class-wp-css-compound-selector.php | 60 +- .../html-api/class-wp-css-id-selector.php | 24 +- .../class-wp-css-selector-parser-matcher.php | 476 ++++++++++++ .../html-api/class-wp-css-type-selector.php | 30 +- ...nterface-wp-css-html-processor-matcher.php | 11 - ...face-wp-css-html-tag-processor-matcher.php | 11 - 11 files changed, 839 insertions(+), 834 deletions(-) create mode 100644 src/wp-includes/html-api/class-wp-css-selector-parser-matcher.php delete mode 100644 src/wp-includes/html-api/interface-wp-css-html-processor-matcher.php delete mode 100644 src/wp-includes/html-api/interface-wp-css-html-tag-processor-matcher.php diff --git a/src/wp-includes/html-api/class-wp-css-attribute-selector.php b/src/wp-includes/html-api/class-wp-css-attribute-selector.php index 7543cb834e820..700a8cba9bb0c 100644 --- a/src/wp-includes/html-api/class-wp-css-attribute-selector.php +++ b/src/wp-includes/html-api/class-wp-css-attribute-selector.php @@ -16,7 +16,7 @@ * * @access private */ -final class WP_CSS_Attribute_Selector implements WP_CSS_HTML_Tag_Processor_Matcher { +final class WP_CSS_Attribute_Selector extends WP_CSS_Selector_Parser_Matcher { /** * The attribute value is matched exactly. * @@ -244,4 +244,124 @@ private function whitespace_delimited_list( string $input ): Generator { yield $value; } } + + /** + * Parses a selector string to create a selector instance. + * + * To create an instance of this class, use the {@see WP_CSS_Compound_Selector_List::from_selectors()} method. + * + * @param string $input The selector string. + * @param int $offset The offset into the string. The offset is passed by reference and + * will be updated if the parse is successful. + * @return static|null The selector instance, or null if the parse was unsuccessful. + */ + public static function parse( string $input, int &$offset ): ?static { + // Need at least 3 bytes [x] + if ( $offset + 2 >= strlen( $input ) ) { + return null; + } + + $updated_offset = $offset; + + if ( '[' !== $input[ $updated_offset ] ) { + return null; + } + ++$updated_offset; + + self::parse_whitespace( $input, $updated_offset ); + $attr_name = self::parse_ident( $input, $updated_offset ); + if ( null === $attr_name ) { + return null; + } + self::parse_whitespace( $input, $updated_offset ); + + if ( $updated_offset >= strlen( $input ) ) { + return null; + } + + if ( ']' === $input[ $updated_offset ] ) { + $offset = $updated_offset + 1; + return new WP_CSS_Attribute_Selector( $attr_name ); + } + + // need to match at least `=x]` at this point + if ( $updated_offset + 3 >= strlen( $input ) ) { + return null; + } + + if ( '=' === $input[ $updated_offset ] ) { + ++$updated_offset; + $attr_matcher = WP_CSS_Attribute_Selector::MATCH_EXACT; + } elseif ( '=' === $input[ $updated_offset + 1 ] ) { + switch ( $input[ $updated_offset ] ) { + case '~': + $attr_matcher = WP_CSS_Attribute_Selector::MATCH_ONE_OF_EXACT; + $updated_offset += 2; + break; + case '|': + $attr_matcher = WP_CSS_Attribute_Selector::MATCH_EXACT_OR_HYPHEN_PREFIXED; + $updated_offset += 2; + break; + case '^': + $attr_matcher = WP_CSS_Attribute_Selector::MATCH_PREFIXED_BY; + $updated_offset += 2; + break; + case '$': + $attr_matcher = WP_CSS_Attribute_Selector::MATCH_SUFFIXED_BY; + $updated_offset += 2; + break; + case '*': + $attr_matcher = WP_CSS_Attribute_Selector::MATCH_CONTAINS; + $updated_offset += 2; + break; + default: + return null; + } + } else { + return null; + } + + self::parse_whitespace( $input, $updated_offset ); + $attr_val = + self::parse_string( $input, $updated_offset ) ?? + self::parse_ident( $input, $updated_offset ); + + if ( null === $attr_val ) { + return null; + } + + self::parse_whitespace( $input, $updated_offset ); + if ( $updated_offset >= strlen( $input ) ) { + return null; + } + + $attr_modifier = null; + switch ( $input[ $updated_offset ] ) { + case 'i': + case 'I': + $attr_modifier = WP_CSS_Attribute_Selector::MODIFIER_CASE_INSENSITIVE; + ++$updated_offset; + break; + + case 's': + case 'S': + $attr_modifier = WP_CSS_Attribute_Selector::MODIFIER_CASE_SENSITIVE; + ++$updated_offset; + break; + } + + if ( null !== $attr_modifier ) { + self::parse_whitespace( $input, $updated_offset ); + if ( $updated_offset >= strlen( $input ) ) { + return null; + } + } + + if ( ']' === $input[ $updated_offset ] ) { + $offset = $updated_offset + 1; + return new self( $attr_name, $attr_matcher, $attr_val, $attr_modifier ); + } + + return null; + } } diff --git a/src/wp-includes/html-api/class-wp-css-class-selector.php b/src/wp-includes/html-api/class-wp-css-class-selector.php index fa287cdf5c580..9abcb881ace49 100644 --- a/src/wp-includes/html-api/class-wp-css-class-selector.php +++ b/src/wp-includes/html-api/class-wp-css-class-selector.php @@ -16,7 +16,7 @@ * * @access private */ -final class WP_CSS_Class_Selector implements WP_CSS_HTML_Tag_Processor_Matcher { +final class WP_CSS_Class_Selector extends WP_CSS_Selector_Parser_Matcher { /** * The class name to match. * @@ -42,4 +42,30 @@ public function __construct( string $class_name ) { public function matches( WP_HTML_Tag_Processor $processor ): bool { return (bool) $processor->has_class( $this->class_name ); } + + /** + * Parses a selector string to create a selector instance. + * + * To create an instance of this class, use the {@see WP_CSS_Compound_Selector_List::from_selectors()} method. + * + * @param string $input The selector string. + * @param int $offset The offset into the string. The offset is passed by reference and + * will be updated if the parse is successful. + * @return static|null The selector instance, or null if the parse was unsuccessful. + */ + public static function parse( string $input, int &$offset ): ?static { + if ( $offset + 1 >= strlen( $input ) || '.' !== $input[ $offset ] ) { + return null; + } + + $updated_offset = $offset + 1; + $result = self::parse_ident( $input, $updated_offset ); + + if ( null === $result ) { + return null; + } + + $offset = $updated_offset; + return new self( $result ); + } } diff --git a/src/wp-includes/html-api/class-wp-css-complex-selector-list.php b/src/wp-includes/html-api/class-wp-css-complex-selector-list.php index fcc6032589584..10af613174a35 100644 --- a/src/wp-includes/html-api/class-wp-css-complex-selector-list.php +++ b/src/wp-includes/html-api/class-wp-css-complex-selector-list.php @@ -32,17 +32,18 @@ * * @access private */ -class WP_CSS_Complex_Selector_List extends WP_CSS_Compound_Selector_List implements WP_CSS_HTML_Processor_Matcher { +class WP_CSS_Complex_Selector_List extends WP_CSS_Compound_Selector_List { /** - * Takes a CSS selector string and returns an instance of itself or `null` if the selector - * string is invalid or unsupported. + * Parses a selector string to create a selector instance. * - * @since 6.8.0 + * To create an instance of this class, use the {@see WP_CSS_Compound_Selector_List::from_selectors()} method. * - * @param string $input CSS selectors. - * @return static|null + * @param string $input The selector string. + * @param int $offset The offset into the string. The offset is passed by reference and + * will be updated if the parse is successful. + * @return static|null The selector instance, or null if the parse was unsuccessful. */ - public static function from_selectors( string $input ) { + public static function parse( string $input, int &$offset ): ?static { $input = self::normalize_selector_input( $input ); if ( '' === $input ) { @@ -51,7 +52,7 @@ public static function from_selectors( string $input ) { $offset = 0; - $selector = self::parse_complex_selector( $input, $offset ); + $selector = WP_CSS_Complex_Selector::parse( $input, $offset ); if ( null === $selector ) { return null; } @@ -65,7 +66,7 @@ public static function from_selectors( string $input ) { } ++$offset; self::parse_whitespace( $input, $offset ); - $selector = self::parse_complex_selector( $input, $offset ); + $selector = WP_CSS_Complex_Selector::parse( $input, $offset ); if ( null === $selector ) { return null; } @@ -75,81 +76,4 @@ public static function from_selectors( string $input ) { return new self( $selectors ); } - - /* - * ------------------------------ - * Selector parsing functionality - * ------------------------------ - */ - - /** - * Parses a complex selector. - * - * > = [ ? ]* - * - * @return WP_CSS_Complex_Selector|null - */ - final protected static function parse_complex_selector( string $input, int &$offset ): ?WP_CSS_Complex_Selector { - if ( $offset >= strlen( $input ) ) { - return null; - } - - $updated_offset = $offset; - $self_selector = self::parse_compound_selector( $input, $updated_offset ); - if ( null === $self_selector ) { - return null; - } - /** @var array{WP_CSS_Compound_Selector, string}[] */ - $selectors = array(); - - $found_whitespace = self::parse_whitespace( $input, $updated_offset ); - while ( $updated_offset < strlen( $input ) ) { - $combinator = null; - $next_selector = null; - - if ( - WP_CSS_Complex_Selector::COMBINATOR_CHILD === $input[ $updated_offset ] || - WP_CSS_Complex_Selector::COMBINATOR_NEXT_SIBLING === $input[ $updated_offset ] || - WP_CSS_Complex_Selector::COMBINATOR_SUBSEQUENT_SIBLING === $input[ $updated_offset ] - ) { - $combinator = $input[ $updated_offset ]; - ++$updated_offset; - self::parse_whitespace( $input, $updated_offset ); - - // A combinator has been found, failure to find a selector here is a parse error. - $next_selector = self::parse_compound_selector( $input, $updated_offset ); - if ( null === $next_selector ) { - return null; - } - } elseif ( $found_whitespace ) { - /* - * Whitespace is ambiguous, it could be a descendant combinator or - * insignificant whitespace. - */ - $next_selector = self::parse_compound_selector( $input, $updated_offset ); - if ( null !== $next_selector ) { - $combinator = WP_CSS_Complex_Selector::COMBINATOR_DESCENDANT; - } - } - - if ( null === $next_selector ) { - break; - } - - // $self_selector will pass to a relative selector where only the type selector is allowed. - if ( null !== $self_selector->subclass_selectors || null === $self_selector->type_selector ) { - return null; - } - - /** @var array{WP_CSS_Compound_Selector, string} */ - $selector_pair = array( $self_selector->type_selector, $combinator ); - $selectors[] = $selector_pair; - $self_selector = $next_selector; - - $found_whitespace = self::parse_whitespace( $input, $updated_offset ); - } - $offset = $updated_offset; - - return new WP_CSS_Complex_Selector( $self_selector, array_reverse( $selectors ) ); - } } diff --git a/src/wp-includes/html-api/class-wp-css-complex-selector.php b/src/wp-includes/html-api/class-wp-css-complex-selector.php index 4461e4d7d92f3..7c997c62a80f7 100644 --- a/src/wp-includes/html-api/class-wp-css-complex-selector.php +++ b/src/wp-includes/html-api/class-wp-css-complex-selector.php @@ -19,7 +19,7 @@ * * @access private */ -final class WP_CSS_Complex_Selector implements WP_CSS_HTML_Processor_Matcher { +final class WP_CSS_Complex_Selector extends WP_CSS_Selector_Parser_Matcher { /** * Child combinator. */ @@ -111,7 +111,7 @@ final class WP_CSS_Complex_Selector implements WP_CSS_HTML_Processor_Matcher { * @param WP_CSS_Compound_Selector $self_selector The selector in the final position. * @param array{WP_CSS_Type_Selector, string}[]|null $selectors The context selectors. */ - public function __construct( + private function __construct( WP_CSS_Compound_Selector $self_selector, ?array $context_selectors ) { @@ -125,7 +125,7 @@ public function __construct( * @param WP_HTML_Processor $processor The processor. * @return bool True if the processor's current position matches the selector. */ - public function matches( WP_HTML_Processor $processor ): bool { + public function matches( $processor ): bool { // First selector must match this location. if ( ! $this->self_selector->matches( $processor ) ) { return false; @@ -189,4 +189,78 @@ private function explore_matches( array $selectors, array $breadcrumbs ): bool { return false; } } + + /** + * Parses a selector string to create a selector instance. + * + * To create an instance of this class, use the {@see WP_CSS_Compound_Selector_List::from_selectors()} method. + * + * @param string $input The selector string. + * @param int $offset The offset into the string. The offset is passed by reference and + * will be updated if the parse is successful. + * @return static|null The selector instance, or null if the parse was unsuccessful. + */ + public static function parse( string $input, int &$offset ): ?static { + if ( $offset >= strlen( $input ) ) { + return null; + } + + $updated_offset = $offset; + $self_selector = WP_CSS_Compound_Selector::parse( $input, $updated_offset ); + if ( null === $self_selector ) { + return null; + } + /** @var array{WP_CSS_Compound_Selector, string}[] */ + $selectors = array(); + + $found_whitespace = self::parse_whitespace( $input, $updated_offset ); + while ( $updated_offset < strlen( $input ) ) { + $combinator = null; + $next_selector = null; + + if ( + WP_CSS_Complex_Selector::COMBINATOR_CHILD === $input[ $updated_offset ] || + WP_CSS_Complex_Selector::COMBINATOR_NEXT_SIBLING === $input[ $updated_offset ] || + WP_CSS_Complex_Selector::COMBINATOR_SUBSEQUENT_SIBLING === $input[ $updated_offset ] + ) { + $combinator = $input[ $updated_offset ]; + ++$updated_offset; + self::parse_whitespace( $input, $updated_offset ); + + // A combinator has been found, failure to find a selector here is a parse error. + $next_selector = WP_CSS_Compound_Selector::parse( $input, $updated_offset ); + if ( null === $next_selector ) { + return null; + } + } elseif ( $found_whitespace ) { + /* + * Whitespace is ambiguous, it could be a descendant combinator or + * insignificant whitespace. + */ + $next_selector = WP_CSS_Compound_Selector::parse( $input, $updated_offset ); + if ( null !== $next_selector ) { + $combinator = WP_CSS_Complex_Selector::COMBINATOR_DESCENDANT; + } + } + + if ( null === $next_selector ) { + break; + } + + // $self_selector will pass to a relative selector where only the type selector is allowed. + if ( null !== $self_selector->subclass_selectors || null === $self_selector->type_selector ) { + return null; + } + + /** @var array{WP_CSS_Compound_Selector, string} */ + $selector_pair = array( $self_selector->type_selector, $combinator ); + $selectors[] = $selector_pair; + $self_selector = $next_selector; + + $found_whitespace = self::parse_whitespace( $input, $updated_offset ); + } + $offset = $updated_offset; + + return new self( $self_selector, array_reverse( $selectors ) ); + } } diff --git a/src/wp-includes/html-api/class-wp-css-compound-selector-list.php b/src/wp-includes/html-api/class-wp-css-compound-selector-list.php index fa12519540cc5..a6f3b87409ff6 100644 --- a/src/wp-includes/html-api/class-wp-css-compound-selector-list.php +++ b/src/wp-includes/html-api/class-wp-css-compound-selector-list.php @@ -76,7 +76,7 @@ * @link https://www.w3.org/TR/selectors-api2/ * @link https://www.w3.org/TR/selectors-4/ */ -class WP_CSS_Compound_Selector_List implements WP_CSS_HTML_Tag_Processor_Matcher { +class WP_CSS_Compound_Selector_List extends WP_CSS_Selector_Parser_Matcher { /** * Determines if the processor's current position matches the selector. * @@ -121,7 +121,22 @@ protected function __construct( array $selectors ) { * @param string $input CSS selectors. * @return static|null */ - public static function from_selectors( string $input ) { + public static function from_selectors( string $input ): ?static { + $offset = 0; + return static::parse( $input, $offset ); + } + + /** + * Parses a selector string to create a selector instance. + * + * To create an instance of this class, use the {@see WP_CSS_Compound_Selector_List::from_selectors()} method. + * + * @param string $input The selector string. + * @param int $offset The offset into the string. The offset is passed by reference and + * will be updated if the parse is successful. + * @return static|null The selector instance, or null if the parse was unsuccessful. + */ + public static function parse( string $input, int &$offset ): ?static { $input = self::normalize_selector_input( $input ); if ( '' === $input ) { @@ -130,7 +145,7 @@ public static function from_selectors( string $input ) { $offset = 0; - $selector = self::parse_compound_selector( $input, $offset ); + $selector = WP_CSS_Compound_Selector::parse( $input, $offset ); if ( null === $selector ) { return null; } @@ -144,7 +159,7 @@ public static function from_selectors( string $input ) { } ++$offset; self::parse_whitespace( $input, $offset ); - $selector = self::parse_compound_selector( $input, $offset ); + $selector = WP_CSS_Compound_Selector::parse( $input, $offset ); if ( null === $selector ) { return null; } @@ -154,716 +169,4 @@ public static function from_selectors( string $input ) { return new self( $selectors ); } - - /* - * ------------------------------ - * Selector parsing functionality - * ------------------------------ - */ - - /** - * Parse an ID selector - * - * > = - * - * https://www.w3.org/TR/selectors/#grammar - * - * @return WP_CSS_ID_Selector|null - */ - final protected static function parse_id_selector( string $input, int &$offset ): ?WP_CSS_ID_Selector { - $ident = self::parse_hash_token( $input, $offset ); - if ( null === $ident ) { - return null; - } - return new WP_CSS_ID_Selector( $ident ); - } - - /** - * Parse a class selector - * - * > = '.' - * - * https://www.w3.org/TR/selectors/#grammar - * - * @return WP_CSS_Class_Selector|null - */ - final protected static function parse_class_selector( string $input, int &$offset ): ?WP_CSS_Class_Selector { - if ( $offset + 1 >= strlen( $input ) || '.' !== $input[ $offset ] ) { - return null; - } - - $updated_offset = $offset + 1; - $result = self::parse_ident( $input, $updated_offset ); - - if ( null === $result ) { - return null; - } - - $offset = $updated_offset; - return new WP_CSS_Class_Selector( $result ); - } - - /** - * Parse a type selector - * - * > = | ? '*' - * > = [ | '*' ]? '|' - * > = ? - * - * Namespaces (e.g. |div, *|div, or namespace|div) are not supported, - * so this selector effectively matches * or ident. - * - * https://www.w3.org/TR/selectors/#grammar - * - * @return WP_CSS_Type_Selector|null - */ - final protected static function parse_type_selector( string $input, int &$offset ): ?WP_CSS_Type_Selector { - if ( $offset >= strlen( $input ) ) { - return null; - } - - if ( '*' === $input[ $offset ] ) { - ++$offset; - return new WP_CSS_Type_Selector( '*' ); - } - - $result = self::parse_ident( $input, $offset ); - if ( null === $result ) { - return null; - } - - return new WP_CSS_Type_Selector( $result ); - } - - /** - * Parse an attribute selector - * - * > = '[' ']' | - * > '[' [ | ] ? ']' - * > = [ '~' | '|' | '^' | '$' | '*' ]? '=' - * > = i | s - * > = ? - * - * Namespaces are not supported, so attribute names are effectively identifiers. - * - * https://www.w3.org/TR/selectors/#grammar - * - * @return WP_CSS_Attribute_Selector|null - */ - final protected static function parse_attribute_selector( string $input, int &$offset ): ?WP_CSS_Attribute_Selector { - // Need at least 3 bytes [x] - if ( $offset + 2 >= strlen( $input ) ) { - return null; - } - - $updated_offset = $offset; - - if ( '[' !== $input[ $updated_offset ] ) { - return null; - } - ++$updated_offset; - - self::parse_whitespace( $input, $updated_offset ); - $attr_name = self::parse_ident( $input, $updated_offset ); - if ( null === $attr_name ) { - return null; - } - self::parse_whitespace( $input, $updated_offset ); - - if ( $updated_offset >= strlen( $input ) ) { - return null; - } - - if ( ']' === $input[ $updated_offset ] ) { - $offset = $updated_offset + 1; - return new WP_CSS_Attribute_Selector( $attr_name ); - } - - // need to match at least `=x]` at this point - if ( $updated_offset + 3 >= strlen( $input ) ) { - return null; - } - - if ( '=' === $input[ $updated_offset ] ) { - ++$updated_offset; - $attr_matcher = WP_CSS_Attribute_Selector::MATCH_EXACT; - } elseif ( '=' === $input[ $updated_offset + 1 ] ) { - switch ( $input[ $updated_offset ] ) { - case '~': - $attr_matcher = WP_CSS_Attribute_Selector::MATCH_ONE_OF_EXACT; - $updated_offset += 2; - break; - case '|': - $attr_matcher = WP_CSS_Attribute_Selector::MATCH_EXACT_OR_HYPHEN_PREFIXED; - $updated_offset += 2; - break; - case '^': - $attr_matcher = WP_CSS_Attribute_Selector::MATCH_PREFIXED_BY; - $updated_offset += 2; - break; - case '$': - $attr_matcher = WP_CSS_Attribute_Selector::MATCH_SUFFIXED_BY; - $updated_offset += 2; - break; - case '*': - $attr_matcher = WP_CSS_Attribute_Selector::MATCH_CONTAINS; - $updated_offset += 2; - break; - default: - return null; - } - } else { - return null; - } - - self::parse_whitespace( $input, $updated_offset ); - $attr_val = - self::parse_string( $input, $updated_offset ) ?? - self::parse_ident( $input, $updated_offset ); - - if ( null === $attr_val ) { - return null; - } - - self::parse_whitespace( $input, $updated_offset ); - if ( $updated_offset >= strlen( $input ) ) { - return null; - } - - $attr_modifier = null; - switch ( $input[ $updated_offset ] ) { - case 'i': - case 'I': - $attr_modifier = WP_CSS_Attribute_Selector::MODIFIER_CASE_INSENSITIVE; - ++$updated_offset; - break; - - case 's': - case 'S': - $attr_modifier = WP_CSS_Attribute_Selector::MODIFIER_CASE_SENSITIVE; - ++$updated_offset; - break; - } - - if ( null !== $attr_modifier ) { - self::parse_whitespace( $input, $updated_offset ); - if ( $updated_offset >= strlen( $input ) ) { - return null; - } - } - - if ( ']' === $input[ $updated_offset ] ) { - $offset = $updated_offset + 1; - return new WP_CSS_Attribute_Selector( $attr_name, $attr_matcher, $attr_val, $attr_modifier ); - } - - return null; - } - - /** - * Parses a compound selector. - * - * > = [ ? * ]! - * - * @return WP_CSS_Compound_Selector|null - */ - final protected static function parse_compound_selector( string $input, int &$offset ): ?WP_CSS_Compound_Selector { - if ( $offset >= strlen( $input ) ) { - return null; - } - - $updated_offset = $offset; - $type_selector = self::parse_type_selector( $input, $updated_offset ); - - $subclass_selectors = array(); - $last_parsed_subclass_selector = self::parse_subclass_selector( $input, $updated_offset ); - while ( null !== $last_parsed_subclass_selector ) { - $subclass_selectors[] = $last_parsed_subclass_selector; - $last_parsed_subclass_selector = self::parse_subclass_selector( $input, $updated_offset ); - } - - if ( null !== $type_selector || array() !== $subclass_selectors ) { - $offset = $updated_offset; - return new WP_CSS_Compound_Selector( $type_selector, $subclass_selectors ); - } - return null; - } - - /** - * Parses a subclass selector. - * - * > = | | - * - * @return WP_CSS_ID_Selector|WP_CSS_Class_Selector|WP_CSS_Attribute_Selector|null - */ - private static function parse_subclass_selector( string $input, int &$offset ) { - if ( $offset >= strlen( $input ) ) { - return null; - } - - $next_char = $input[ $offset ]; - return '.' === $next_char - ? self::parse_class_selector( $input, $offset ) - : ( - '#' === $next_char - ? self::parse_id_selector( $input, $offset ) - : ( '[' === $next_char - ? self::parse_attribute_selector( $input, $offset ) - : null - ) - ); - } - - - /* - * ------------------------ - * Selector partial parsing - * ------------------------ - * - * These functions consume parts of a selector string input when successful - * and return meaningful values to be used by selectors. - */ - - const UTF8_MAX_CODEPOINT_VALUE = 0x10FFFF; - const WHITESPACE_CHARACTERS = " \t\r\n\f"; - - final public static function parse_whitespace( string $input, int &$offset ): bool { - $length = strspn( $input, self::WHITESPACE_CHARACTERS, $offset ); - $advanced = $length > 0; - $offset += $length; - return $advanced; - } - - /** - * Tokenization of hash tokens - * - * > U+0023 NUMBER SIGN (#) - * > If the next input code point is an ident code point or the next two input code points are a valid escape, then: - * > 1. Create a . - * > 2. If the next 3 input code points would start an ident sequence, set the - * > ’s type flag to "id". - * > 3. Consume an ident sequence, and set the ’s value to the - * > returned string. - * > 4. Return the . - * > Otherwise, return a with its value set to the current input code point. - * - * This implementation is not interested in the , a '#' delim token is not relevant for selectors. - */ - final protected static function parse_hash_token( string $input, int &$offset ): ?string { - if ( $offset + 1 >= strlen( $input ) || '#' !== $input[ $offset ] ) { - return null; - } - - $updated_offset = $offset + 1; - $result = self::parse_ident( $input, $updated_offset ); - - if ( null === $result ) { - return null; - } - - $offset = $updated_offset; - return $result; - } - - /** - * Parse an ident token - * - * CAUTION: This method is _not_ for parsing and ID selector! - * - * > 4.3.11. Consume an ident sequence - * > This section describes how to consume an ident sequence from a stream of code points. It returns a string containing the largest name that can be formed from adjacent code points in the stream, starting from the first. - * > - * > Note: This algorithm does not do the verification of the first few code points that are necessary to ensure the returned code points would constitute an . If that is the intended use, ensure that the stream starts with an ident sequence before calling this algorithm. - * > - * > Let result initially be an empty string. - * > - * > Repeatedly consume the next input code point from the stream: - * > - * > ident code point - * > Append the code point to result. - * > the stream starts with a valid escape - * > Consume an escaped code point. Append the returned code point to result. - * > anything else - * > Reconsume the current input code point. Return result. - * - * https://www.w3.org/TR/css-syntax-3/#consume-name - * - * @return string|null - */ - final protected static function parse_ident( string $input, int &$offset ): ?string { - if ( ! self::check_if_three_code_points_would_start_an_ident_sequence( $input, $offset ) ) { - return null; - } - - $ident = ''; - - while ( $offset < strlen( $input ) ) { - if ( self::next_two_are_valid_escape( $input, $offset ) ) { - // Move past the `\` character. - ++$offset; - $ident .= self::consume_escaped_codepoint( $input, $offset ); - continue; - } elseif ( self::is_ident_codepoint( $input, $offset ) ) { - // @todo this should append and advance the correct number of bytes. - $ident .= $input[ $offset ]; - ++$offset; - continue; - } - break; - } - - return $ident; - } - - /** - * Parse a string token - * - * > 4.3.5. Consume a string token - * > This section describes how to consume a string token from a stream of code points. It returns either a or . - * > - * > This algorithm may be called with an ending code point, which denotes the code point that ends the string. If an ending code point is not specified, the current input code point is used. - * > - * > Initially create a with its value set to the empty string. - * > - * > Repeatedly consume the next input code point from the stream: - * > - * > ending code point - * > Return the . - * > EOF - * > This is a parse error. Return the . - * > newline - * > This is a parse error. Reconsume the current input code point, create a , and return it. - * > U+005C REVERSE SOLIDUS (\) - * > If the next input code point is EOF, do nothing. - * > Otherwise, if the next input code point is a newline, consume it. - * > Otherwise, (the stream starts with a valid escape) consume an escaped code point and append the returned code point to the ’s value. - * > - * > anything else - * > Append the current input code point to the ’s value. - * - * https://www.w3.org/TR/css-syntax-3/#consume-string-token - * - * This implementation will never return a because - * the is not a part of the selector grammar. That - * case is treated as failure to parse and null is returned. - * - * @return string|null - */ - final protected static function parse_string( string $input, int &$offset ): ?string { - if ( $offset >= strlen( $input ) ) { - return null; - } - - $ending_code_point = $input[ $offset ]; - if ( '"' !== $ending_code_point && "'" !== $ending_code_point ) { - return null; - } - - $string_token = ''; - - $updated_offset = $offset + 1; - $anything_else_mask = "\\\n{$ending_code_point}"; - while ( $updated_offset < strlen( $input ) ) { - $anything_else_length = strcspn( $input, $anything_else_mask, $updated_offset ); - if ( $anything_else_length > 0 ) { - $string_token .= substr( $input, $updated_offset, $anything_else_length ); - $updated_offset += $anything_else_length; - - if ( $updated_offset >= strlen( $input ) ) { - break; - } - } - - switch ( $input[ $updated_offset ] ) { - case '\\': - ++$updated_offset; - if ( $updated_offset >= strlen( $input ) ) { - break; - } - if ( "\n" === $input[ $updated_offset ] ) { - ++$updated_offset; - break; - } else { - $string_token .= self::consume_escaped_codepoint( $input, $updated_offset ); - } - break; - - /* - * This case would return a . - * The is not a part of the selector grammar - * so we do not return it and instead treat this as a - * failure to parse a string token. - */ - case "\n": - return null; - - case $ending_code_point: - ++$updated_offset; - break 2; - } - } - - $offset = $updated_offset; - return $string_token; - } - - /** - * Consume an escaped code point. - * - * > 4.3.7. Consume an escaped code point - * > This section describes how to consume an escaped code point. It assumes that the U+005C - * > REVERSE SOLIDUS (\) has already been consumed and that the next input code point has - * > already been verified to be part of a valid escape. It will return a code point. - * > - * > Consume the next input code point. - * > - * > hex digit - * > Consume as many hex digits as possible, but no more than 5. Note that this means 1-6 - * > hex digits have been consumed in total. If the next input code point is whitespace, - * > consume it as well. Interpret the hex digits as a hexadecimal number. If this number is - * > zero, or is for a surrogate, or is greater than the maximum allowed code point, return - * > U+FFFD REPLACEMENT CHARACTER (�). Otherwise, return the code point with that value. - * > EOF - * > This is a parse error. Return U+FFFD REPLACEMENT CHARACTER (�). - * > anything else - * > Return the current input code point. - * - * @param string $input - * @param int $offset - * @return string - */ - final protected static function consume_escaped_codepoint( $input, &$offset ): string { - $hex_length = strspn( $input, '0123456789abcdefABCDEF', $offset, 6 ); - if ( $hex_length > 0 ) { - /** - * The 6-character hex string has a maximum value of 0xFFFFFF. - * It is likely to fit in an int value and not be a float. - * - * @var int - */ - $codepoint_value = hexdec( substr( $input, $offset, $hex_length ) ); - - /* - * > A surrogate is a leading surrogate or a trailing surrogate. - * > A leading surrogate is a code point that is in the range U+D800 to U+DBFF, inclusive. - * > A trailing surrogate is a code point that is in the range U+DC00 to U+DFFF, inclusive. - * - * The surrogate ranges are adjacent, so the complete range is 0xD800 to 0xDFFF, inclusive. - */ - $codepoint_char = ( - 0 === $codepoint_value || - $codepoint_value > self::UTF8_MAX_CODEPOINT_VALUE || - ( 0xD800 <= $codepoint_value && $codepoint_value <= 0xDFFF ) - ) - ? "\u{FFFD}" - : mb_chr( $codepoint_value, 'UTF-8' ); - - $offset += $hex_length; - - // If the next input code point is whitespace, consume it as well. - if ( - strlen( $input ) > $offset && - ( - "\n" === $input[ $offset ] || - "\t" === $input[ $offset ] || - ' ' === $input[ $offset ] - ) - ) { - ++$offset; - } - return $codepoint_char; - } - - $codepoint_char = mb_substr( $input, $offset, 1, 'UTF-8' ); - $offset += strlen( $codepoint_char ); - return $codepoint_char; - } - - /* - * --------------------------- - * Selector parsing utiltities - * --------------------------- - * - * The following functions are used for parsing but do not consume any input. - */ - - /** - * Checks for two valid escape codepoints. - * - * > 4.3.8. Check if two code points are a valid escape - * > This section describes how to check if two code points are a valid escape. The algorithm described here can be called explicitly with two code points, or can be called with the input stream itself. In the latter case, the two code points in question are the current input code point and the next input code point, in that order. - * > - * > Note: This algorithm will not consume any additional code point. - * > - * > If the first code point is not U+005C REVERSE SOLIDUS (\), return false. - * > - * > Otherwise, if the second code point is a newline, return false. - * > - * > Otherwise, return true. - * - * https://www.w3.org/TR/css-syntax-3/#starts-with-a-valid-escape - * - * @todo this does not check whether the second codepoint is valid. - * - * @param string $input The input string. - * @param int $offset The byte offset in the string. - * @return bool True if the next two codepoints are a valid escape, otherwise false. - */ - final protected static function next_two_are_valid_escape( string $input, int $offset ): bool { - if ( $offset + 1 >= strlen( $input ) ) { - return false; - } - return '\\' === $input[ $offset ] && "\n" !== $input[ $offset + 1 ]; - } - - /** - * Checks if the next code point is an "ident start code point". - * - * Caution! This method does not do any bounds checking, it should not be passed - * a string with an offset that is out of bounds. - * - * > ident-start code point - * > A letter, a non-ASCII code point, or U+005F LOW LINE (_). - * > uppercase letter - * > A code point between U+0041 LATIN CAPITAL LETTER A (A) and U+005A LATIN CAPITAL LETTER Z (Z) inclusive. - * > lowercase letter - * > A code point between U+0061 LATIN SMALL LETTER A (a) and U+007A LATIN SMALL LETTER Z (z) inclusive. - * > letter - * > An uppercase letter or a lowercase letter. - * > non-ASCII code point - * > A code point with a value equal to or greater than U+0080 . - * - * @link https://www.w3.org/TR/css-syntax-3/#ident-start-code-point - * - * @param string $input The input string. - * @param int $offset The byte offset in the string. - * @return bool True if the next codepoint is an ident start code point, otherwise false. - */ - final protected static function is_ident_start_codepoint( string $input, int $offset ): bool { - return ( - '_' === $input[ $offset ] || - ( 'a' <= $input[ $offset ] && $input[ $offset ] <= 'z' ) || - ( 'A' <= $input[ $offset ] && $input[ $offset ] <= 'Z' ) || - ord( $input[ $offset ] ) > 0x7F - ); - } - - /** - * Checks if the next code point is an "ident code point". - * - * Caution! This method does not do any bounds checking, it should not be passed - * a string with an offset that is out of bounds. - * - * > ident code point - * > An ident-start code point, a digit, or U+002D HYPHEN-MINUS (-). - * > digit - * > A code point between U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9) inclusive. - * - * @link https://www.w3.org/TR/css-syntax-3/#ident-code-point - * - * @param string $input The input string. - * @param int $offset The byte offset in the string. - * @return bool True if the next codepoint is an ident code point, otherwise false. - */ - final protected static function is_ident_codepoint( string $input, int $offset ): bool { - return '-' === $input[ $offset ] || - ( '0' <= $input[ $offset ] && $input[ $offset ] <= '9' ) || - self::is_ident_start_codepoint( $input, $offset ); - } - - /** - * Checks if three code points would start an ident sequence. - * - * > 4.3.9. Check if three code points would start an ident sequence - * > This section describes how to check if three code points would start an ident sequence. The algorithm described here can be called explicitly with three code points, or can be called with the input stream itself. In the latter case, the three code points in question are the current input code point and the next two input code points, in that order. - * > - * > Note: This algorithm will not consume any additional code points. - * > - * > Look at the first code point: - * > - * > U+002D HYPHEN-MINUS - * > If the second code point is an ident-start code point or a U+002D HYPHEN-MINUS, or the second and third code points are a valid escape, return true. Otherwise, return false. - * > ident-start code point - * > Return true. - * > U+005C REVERSE SOLIDUS (\) - * > If the first and second code points are a valid escape, return true. Otherwise, return false. - * > anything else - * > Return false. - * - * @link https://www.w3.org/TR/css-syntax-3/#would-start-an-identifier - * - * @param string $input The input string. - * @param int $offset The byte offset in the string. - * @return bool True if the next three codepoints would start an ident sequence, otherwise false. - */ - final protected static function check_if_three_code_points_would_start_an_ident_sequence( string $input, int $offset ): bool { - if ( $offset >= strlen( $input ) ) { - return false; - } - - // > U+005C REVERSE SOLIDUS (\) - if ( '\\' === $input[ $offset ] ) { - return self::next_two_are_valid_escape( $input, $offset ); - } - - // > U+002D HYPHEN-MINUS - if ( '-' === $input[ $offset ] ) { - $after_initial_hyphen_minus_offset = $offset + 1; - if ( $after_initial_hyphen_minus_offset >= strlen( $input ) ) { - return false; - } - - // > If the second code point is… U+002D HYPHEN-MINUS… return true - if ( '-' === $input[ $after_initial_hyphen_minus_offset ] ) { - return true; - } - - // > If the second and third code points are a valid escape… return true. - if ( self::next_two_are_valid_escape( $input, $after_initial_hyphen_minus_offset ) ) { - return true; - } - - // > If the second code point is an ident-start code point… return true. - if ( self::is_ident_start_codepoint( $input, $after_initial_hyphen_minus_offset ) ) { - return true; - } - - // > Otherwise, return false. - return false; - } - - // > ident-start code point - // > Return true. - // > anything else - // > Return false. - return self::is_ident_start_codepoint( $input, $offset ); - } - - /** - * @todo doc… - */ - final protected static function normalize_selector_input( string $input ): string { - /* - * > A selector string is a list of one or more complex selectors ([SELECTORS4], section 3.1) that may be surrounded by whitespace… - * - * This list includes \f. - * A later step would normalize it to a known whitespace character, but it can be trimmed here as well. - */ - $input = trim( $input, " \t\r\n\r\f" ); - - /* - * > The input stream consists of the filtered code points pushed into it as the input byte stream is decoded. - * > - * > To filter code points from a stream of (unfiltered) code points input: - * > Replace any U+000D CARRIAGE RETURN (CR) code points, U+000C FORM FEED (FF) code points, or pairs of U+000D CARRIAGE RETURN (CR) followed by U+000A LINE FEED (LF) in input by a single U+000A LINE FEED (LF) code point. - * > Replace any U+0000 NULL or surrogate code points in input with U+FFFD REPLACEMENT CHARACTER (�). - * - * https://www.w3.org/TR/css-syntax-3/#input-preprocessing - */ - $input = str_replace( array( "\r\n" ), "\n", $input ); - $input = str_replace( array( "\r", "\f" ), "\n", $input ); - $input = str_replace( "\0", "\u{FFFD}", $input ); - - return $input; - } } diff --git a/src/wp-includes/html-api/class-wp-css-compound-selector.php b/src/wp-includes/html-api/class-wp-css-compound-selector.php index 9596876685212..68aca4d880e0d 100644 --- a/src/wp-includes/html-api/class-wp-css-compound-selector.php +++ b/src/wp-includes/html-api/class-wp-css-compound-selector.php @@ -21,7 +21,7 @@ * * @access private */ -final class WP_CSS_Compound_Selector implements WP_CSS_HTML_Tag_Processor_Matcher { +final class WP_CSS_Compound_Selector extends WP_CSS_Selector_Parser_Matcher { /** * The type selector. * @@ -69,4 +69,62 @@ public function matches( WP_HTML_Tag_Processor $processor ): bool { } return true; } + + /** + * Parses a selector string to create a selector instance. + * + * To create an instance of this class, use the {@see WP_CSS_Compound_Selector_List::from_selectors()} method. + * + * @param string $input The selector string. + * @param int $offset The offset into the string. The offset is passed by reference and + * will be updated if the parse is successful. + * @return static|null The selector instance, or null if the parse was unsuccessful. + */ + public static function parse( string $input, int &$offset ): ?static { + if ( $offset >= strlen( $input ) ) { + return null; + } + + $updated_offset = $offset; + $type_selector = WP_CSS_Type_Selector::parse( $input, $updated_offset ); + + $subclass_selectors = array(); + $last_parsed_subclass_selector = self::parse_subclass_selector( $input, $updated_offset ); + while ( null !== $last_parsed_subclass_selector ) { + $subclass_selectors[] = $last_parsed_subclass_selector; + $last_parsed_subclass_selector = self::parse_subclass_selector( $input, $updated_offset ); + } + + // @todo invert this condition + if ( null !== $type_selector || array() !== $subclass_selectors ) { + $offset = $updated_offset; + return new self( $type_selector, $subclass_selectors ); + } + return null; + } + + /** + * Parses a subclass selector. + * + * > = | | + * + * @return WP_CSS_ID_Selector|WP_CSS_Class_Selector|WP_CSS_Attribute_Selector|null + */ + private static function parse_subclass_selector( string $input, int &$offset ) { + if ( $offset >= strlen( $input ) ) { + return null; + } + + $next_char = $input[ $offset ]; + return '.' === $next_char + ? WP_CSS_Class_Selector::parse( $input, $offset ) + : ( + '#' === $next_char + ? WP_CSS_ID_Selector::parse( $input, $offset ) + : ( '[' === $next_char + ? WP_CSS_Attribute_Selector::parse( $input, $offset ) + : null + ) + ); + } } diff --git a/src/wp-includes/html-api/class-wp-css-id-selector.php b/src/wp-includes/html-api/class-wp-css-id-selector.php index 2a600923fa2a2..de854c37eea9f 100644 --- a/src/wp-includes/html-api/class-wp-css-id-selector.php +++ b/src/wp-includes/html-api/class-wp-css-id-selector.php @@ -16,7 +16,7 @@ * * @access private */ -final class WP_CSS_ID_Selector implements WP_CSS_HTML_Tag_Processor_Matcher { +final class WP_CSS_ID_Selector extends WP_CSS_Selector_Parser_Matcher { /** * The ID to match. * @@ -48,7 +48,25 @@ public function matches( WP_HTML_Tag_Processor $processor ): bool { $case_insensitive = $processor->is_quirks_mode(); return $case_insensitive - ? 0 === strcasecmp( $id, $this->id ) - : $processor->get_attribute( 'id' ) === $this->id; + ? 0 === strcasecmp( $id, $this->id ) + : $processor->get_attribute( 'id' ) === $this->id; + } + + /** + * Parses a selector string to create a selector instance. + * + * To create an instance of this class, use the {@see WP_CSS_Compound_Selector_List::from_selectors()} method. + * + * @param string $input The selector string. + * @param int $offset The offset into the string. The offset is passed by reference and + * will be updated if the parse is successful. + * @return static|null The selector instance, or null if the parse was unsuccessful. + */ + public static function parse( string $input, int &$offset ): ?static { + $ident = self::parse_hash_token( $input, $offset ); + if ( null === $ident ) { + return null; + } + return new self( $ident ); } } diff --git a/src/wp-includes/html-api/class-wp-css-selector-parser-matcher.php b/src/wp-includes/html-api/class-wp-css-selector-parser-matcher.php new file mode 100644 index 0000000000000..8820115f03cfb --- /dev/null +++ b/src/wp-includes/html-api/class-wp-css-selector-parser-matcher.php @@ -0,0 +1,476 @@ + 0; + $offset += $length; + return $advanced; + } + + /** + * Tokenization of hash tokens + * + * > U+0023 NUMBER SIGN (#) + * > If the next input code point is an ident code point or the next two input code points are a valid escape, then: + * > 1. Create a . + * > 2. If the next 3 input code points would start an ident sequence, set the + * > ’s type flag to "id". + * > 3. Consume an ident sequence, and set the ’s value to the + * > returned string. + * > 4. Return the . + * > Otherwise, return a with its value set to the current input code point. + * + * This implementation is not interested in the , a '#' delim token is not relevant for selectors. + */ + final protected static function parse_hash_token( string $input, int &$offset ): ?string { + if ( $offset + 1 >= strlen( $input ) || '#' !== $input[ $offset ] ) { + return null; + } + + $updated_offset = $offset + 1; + $result = self::parse_ident( $input, $updated_offset ); + + if ( null === $result ) { + return null; + } + + $offset = $updated_offset; + return $result; + } + + /** + * Parse a string token + * + * > 4.3.5. Consume a string token + * > This section describes how to consume a string token from a stream of code points. It returns either a or . + * > + * > This algorithm may be called with an ending code point, which denotes the code point that ends the string. If an ending code point is not specified, the current input code point is used. + * > + * > Initially create a with its value set to the empty string. + * > + * > Repeatedly consume the next input code point from the stream: + * > + * > ending code point + * > Return the . + * > EOF + * > This is a parse error. Return the . + * > newline + * > This is a parse error. Reconsume the current input code point, create a , and return it. + * > U+005C REVERSE SOLIDUS (\) + * > If the next input code point is EOF, do nothing. + * > Otherwise, if the next input code point is a newline, consume it. + * > Otherwise, (the stream starts with a valid escape) consume an escaped code point and append the returned code point to the ’s value. + * > + * > anything else + * > Append the current input code point to the ’s value. + * + * https://www.w3.org/TR/css-syntax-3/#consume-string-token + * + * This implementation will never return a because + * the is not a part of the selector grammar. That + * case is treated as failure to parse and null is returned. + * + * @return string|null + */ + final protected static function parse_string( string $input, int &$offset ): ?string { + if ( $offset >= strlen( $input ) ) { + return null; + } + + $ending_code_point = $input[ $offset ]; + if ( '"' !== $ending_code_point && "'" !== $ending_code_point ) { + return null; + } + + $string_token = ''; + + $updated_offset = $offset + 1; + $anything_else_mask = "\\\n{$ending_code_point}"; + while ( $updated_offset < strlen( $input ) ) { + $anything_else_length = strcspn( $input, $anything_else_mask, $updated_offset ); + if ( $anything_else_length > 0 ) { + $string_token .= substr( $input, $updated_offset, $anything_else_length ); + $updated_offset += $anything_else_length; + + if ( $updated_offset >= strlen( $input ) ) { + break; + } + } + + switch ( $input[ $updated_offset ] ) { + case '\\': + ++$updated_offset; + if ( $updated_offset >= strlen( $input ) ) { + break; + } + if ( "\n" === $input[ $updated_offset ] ) { + ++$updated_offset; + break; + } else { + $string_token .= self::consume_escaped_codepoint( $input, $updated_offset ); + } + break; + + /* + * This case would return a . + * The is not a part of the selector grammar + * so we do not return it and instead treat this as a + * failure to parse a string token. + */ + case "\n": + return null; + + case $ending_code_point: + ++$updated_offset; + break 2; + } + } + + $offset = $updated_offset; + return $string_token; + } + + /** + * Consume an escaped code point. + * + * > 4.3.7. Consume an escaped code point + * > This section describes how to consume an escaped code point. It assumes that the U+005C + * > REVERSE SOLIDUS (\) has already been consumed and that the next input code point has + * > already been verified to be part of a valid escape. It will return a code point. + * > + * > Consume the next input code point. + * > + * > hex digit + * > Consume as many hex digits as possible, but no more than 5. Note that this means 1-6 + * > hex digits have been consumed in total. If the next input code point is whitespace, + * > consume it as well. Interpret the hex digits as a hexadecimal number. If this number is + * > zero, or is for a surrogate, or is greater than the maximum allowed code point, return + * > U+FFFD REPLACEMENT CHARACTER (�). Otherwise, return the code point with that value. + * > EOF + * > This is a parse error. Return U+FFFD REPLACEMENT CHARACTER (�). + * > anything else + * > Return the current input code point. + * + * @param string $input + * @param int $offset + * @return string + */ + final protected static function consume_escaped_codepoint( $input, &$offset ): string { + $hex_length = strspn( $input, '0123456789abcdefABCDEF', $offset, 6 ); + if ( $hex_length > 0 ) { + /** + * The 6-character hex string has a maximum value of 0xFFFFFF. + * It is likely to fit in an int value and not be a float. + * + * @var int + */ + $codepoint_value = hexdec( substr( $input, $offset, $hex_length ) ); + + /* + * > A surrogate is a leading surrogate or a trailing surrogate. + * > A leading surrogate is a code point that is in the range U+D800 to U+DBFF, inclusive. + * > A trailing surrogate is a code point that is in the range U+DC00 to U+DFFF, inclusive. + * + * The surrogate ranges are adjacent, so the complete range is 0xD800 to 0xDFFF, inclusive. + */ + $codepoint_char = ( + 0 === $codepoint_value || + $codepoint_value > self::UTF8_MAX_CODEPOINT_VALUE || + ( 0xD800 <= $codepoint_value && $codepoint_value <= 0xDFFF ) + ) + ? "\u{FFFD}" + : mb_chr( $codepoint_value, 'UTF-8' ); + + $offset += $hex_length; + + // If the next input code point is whitespace, consume it as well. + if ( + strlen( $input ) > $offset && + ( + "\n" === $input[ $offset ] || + "\t" === $input[ $offset ] || + ' ' === $input[ $offset ] + ) + ) { + ++$offset; + } + return $codepoint_char; + } + + $codepoint_char = mb_substr( $input, $offset, 1, 'UTF-8' ); + $offset += strlen( $codepoint_char ); + return $codepoint_char; + } + + /** + * Parse an ident token + * + * CAUTION: This method is _not_ for parsing and ID selector! + * + * > 4.3.11. Consume an ident sequence + * > This section describes how to consume an ident sequence from a stream of code points. It returns a string containing the largest name that can be formed from adjacent code points in the stream, starting from the first. + * > + * > Note: This algorithm does not do the verification of the first few code points that are necessary to ensure the returned code points would constitute an . If that is the intended use, ensure that the stream starts with an ident sequence before calling this algorithm. + * > + * > Let result initially be an empty string. + * > + * > Repeatedly consume the next input code point from the stream: + * > + * > ident code point + * > Append the code point to result. + * > the stream starts with a valid escape + * > Consume an escaped code point. Append the returned code point to result. + * > anything else + * > Reconsume the current input code point. Return result. + * + * https://www.w3.org/TR/css-syntax-3/#consume-name + * + * @return string|null + */ + final protected static function parse_ident( string $input, int &$offset ): ?string { + if ( ! self::check_if_three_code_points_would_start_an_ident_sequence( $input, $offset ) ) { + return null; + } + + $ident = ''; + + while ( $offset < strlen( $input ) ) { + if ( self::next_two_are_valid_escape( $input, $offset ) ) { + // Move past the `\` character. + ++$offset; + $ident .= self::consume_escaped_codepoint( $input, $offset ); + continue; + } elseif ( self::is_ident_codepoint( $input, $offset ) ) { + // @todo this should append and advance the correct number of bytes. + $ident .= $input[ $offset ]; + ++$offset; + continue; + } + break; + } + + return $ident; + } + + /* + * -------------------------- + * Selector parsing utilities + * -------------------------- + * + * The following functions are used for parsing but do not consume any input. + */ + + /** + * Checks for two valid escape codepoints. + * + * > 4.3.8. Check if two code points are a valid escape + * > This section describes how to check if two code points are a valid escape. The algorithm described here can be called explicitly with two code points, or can be called with the input stream itself. In the latter case, the two code points in question are the current input code point and the next input code point, in that order. + * > + * > Note: This algorithm will not consume any additional code point. + * > + * > If the first code point is not U+005C REVERSE SOLIDUS (\), return false. + * > + * > Otherwise, if the second code point is a newline, return false. + * > + * > Otherwise, return true. + * + * https://www.w3.org/TR/css-syntax-3/#starts-with-a-valid-escape + * + * @todo this does not check whether the second codepoint is valid. + * + * @param string $input The input string. + * @param int $offset The byte offset in the string. + * @return bool True if the next two codepoints are a valid escape, otherwise false. + */ + final protected static function next_two_are_valid_escape( string $input, int $offset ): bool { + if ( $offset + 1 >= strlen( $input ) ) { + return false; + } + return '\\' === $input[ $offset ] && "\n" !== $input[ $offset + 1 ]; + } + + /** + * Checks if the next code point is an "ident start code point". + * + * Caution! This method does not do any bounds checking, it should not be passed + * a string with an offset that is out of bounds. + * + * > ident-start code point + * > A letter, a non-ASCII code point, or U+005F LOW LINE (_). + * > uppercase letter + * > A code point between U+0041 LATIN CAPITAL LETTER A (A) and U+005A LATIN CAPITAL LETTER Z (Z) inclusive. + * > lowercase letter + * > A code point between U+0061 LATIN SMALL LETTER A (a) and U+007A LATIN SMALL LETTER Z (z) inclusive. + * > letter + * > An uppercase letter or a lowercase letter. + * > non-ASCII code point + * > A code point with a value equal to or greater than U+0080 . + * + * @link https://www.w3.org/TR/css-syntax-3/#ident-start-code-point + * + * @param string $input The input string. + * @param int $offset The byte offset in the string. + * @return bool True if the next codepoint is an ident start code point, otherwise false. + */ + final protected static function is_ident_start_codepoint( string $input, int $offset ): bool { + return ( + '_' === $input[ $offset ] || + ( 'a' <= $input[ $offset ] && $input[ $offset ] <= 'z' ) || + ( 'A' <= $input[ $offset ] && $input[ $offset ] <= 'Z' ) || + ord( $input[ $offset ] ) > 0x7F + ); + } + + /** + * Checks if the next code point is an "ident code point". + * + * Caution! This method does not do any bounds checking, it should not be passed + * a string with an offset that is out of bounds. + * + * > ident code point + * > An ident-start code point, a digit, or U+002D HYPHEN-MINUS (-). + * > digit + * > A code point between U+0030 DIGIT ZERO (0) and U+0039 DIGIT NINE (9) inclusive. + * + * @link https://www.w3.org/TR/css-syntax-3/#ident-code-point + * + * @param string $input The input string. + * @param int $offset The byte offset in the string. + * @return bool True if the next codepoint is an ident code point, otherwise false. + */ + final protected static function is_ident_codepoint( string $input, int $offset ): bool { + return '-' === $input[ $offset ] || + ( '0' <= $input[ $offset ] && $input[ $offset ] <= '9' ) || + self::is_ident_start_codepoint( $input, $offset ); + } + + /** + * Checks if three code points would start an ident sequence. + * + * > 4.3.9. Check if three code points would start an ident sequence + * > This section describes how to check if three code points would start an ident sequence. The algorithm described here can be called explicitly with three code points, or can be called with the input stream itself. In the latter case, the three code points in question are the current input code point and the next two input code points, in that order. + * > + * > Note: This algorithm will not consume any additional code points. + * > + * > Look at the first code point: + * > + * > U+002D HYPHEN-MINUS + * > If the second code point is an ident-start code point or a U+002D HYPHEN-MINUS, or the second and third code points are a valid escape, return true. Otherwise, return false. + * > ident-start code point + * > Return true. + * > U+005C REVERSE SOLIDUS (\) + * > If the first and second code points are a valid escape, return true. Otherwise, return false. + * > anything else + * > Return false. + * + * @link https://www.w3.org/TR/css-syntax-3/#would-start-an-identifier + * + * @param string $input The input string. + * @param int $offset The byte offset in the string. + * @return bool True if the next three codepoints would start an ident sequence, otherwise false. + */ + final protected static function check_if_three_code_points_would_start_an_ident_sequence( string $input, int $offset ): bool { + if ( $offset >= strlen( $input ) ) { + return false; + } + + // > U+005C REVERSE SOLIDUS (\) + if ( '\\' === $input[ $offset ] ) { + return self::next_two_are_valid_escape( $input, $offset ); + } + + // > U+002D HYPHEN-MINUS + if ( '-' === $input[ $offset ] ) { + $after_initial_hyphen_minus_offset = $offset + 1; + if ( $after_initial_hyphen_minus_offset >= strlen( $input ) ) { + return false; + } + + // > If the second code point is… U+002D HYPHEN-MINUS… return true + if ( '-' === $input[ $after_initial_hyphen_minus_offset ] ) { + return true; + } + + // > If the second and third code points are a valid escape… return true. + if ( self::next_two_are_valid_escape( $input, $after_initial_hyphen_minus_offset ) ) { + return true; + } + + // > If the second code point is an ident-start code point… return true. + if ( self::is_ident_start_codepoint( $input, $after_initial_hyphen_minus_offset ) ) { + return true; + } + + // > Otherwise, return false. + return false; + } + + // > ident-start code point + // > Return true. + // > anything else + // > Return false. + return self::is_ident_start_codepoint( $input, $offset ); + } + + /** + * @todo doc… + */ + final protected static function normalize_selector_input( string $input ): string { + /* + * > A selector string is a list of one or more complex selectors ([SELECTORS4], section 3.1) that may be surrounded by whitespace… + * + * This list includes \f. + * A later step would normalize it to a known whitespace character, but it can be trimmed here as well. + */ + $input = trim( $input, " \t\r\n\r\f" ); + + /* + * > The input stream consists of the filtered code points pushed into it as the input byte stream is decoded. + * > + * > To filter code points from a stream of (unfiltered) code points input: + * > Replace any U+000D CARRIAGE RETURN (CR) code points, U+000C FORM FEED (FF) code points, or pairs of U+000D CARRIAGE RETURN (CR) followed by U+000A LINE FEED (LF) in input by a single U+000A LINE FEED (LF) code point. + * > Replace any U+0000 NULL or surrogate code points in input with U+FFFD REPLACEMENT CHARACTER (�). + * + * https://www.w3.org/TR/css-syntax-3/#input-preprocessing + */ + $input = str_replace( array( "\r\n" ), "\n", $input ); + $input = str_replace( array( "\r", "\f" ), "\n", $input ); + $input = str_replace( "\0", "\u{FFFD}", $input ); + + return $input; + } +} diff --git a/src/wp-includes/html-api/class-wp-css-type-selector.php b/src/wp-includes/html-api/class-wp-css-type-selector.php index 3f7671851c375..492569ee51d65 100644 --- a/src/wp-includes/html-api/class-wp-css-type-selector.php +++ b/src/wp-includes/html-api/class-wp-css-type-selector.php @@ -16,7 +16,7 @@ * * @access private */ -final class WP_CSS_Type_Selector implements WP_CSS_HTML_Tag_Processor_Matcher { +final class WP_CSS_Type_Selector extends WP_CSS_Selector_Parser_Matcher { /** * The element type (tag name) to match or '*' to match any element. * @@ -59,4 +59,32 @@ public function matches_tag( string $tag_name ): bool { } return 0 === strcasecmp( $tag_name, $this->type ); } + + /** + * Parses a selector string to create a selector instance. + * + * To create an instance of this class, use the {@see WP_CSS_Compound_Selector_List::from_selectors()} method. + * + * @param string $input The selector string. + * @param int $offset The offset into the string. The offset is passed by reference and + * will be updated if the parse is successful. + * @return static|null The selector instance, or null if the parse was unsuccessful. + */ + public static function parse( string $input, int &$offset ): ?static { + if ( $offset >= strlen( $input ) ) { + return null; + } + + if ( '*' === $input[ $offset ] ) { + ++$offset; + return new WP_CSS_Type_Selector( '*' ); + } + + $result = self::parse_ident( $input, $offset ); + if ( null === $result ) { + return null; + } + + return new self( $result ); + } } diff --git a/src/wp-includes/html-api/interface-wp-css-html-processor-matcher.php b/src/wp-includes/html-api/interface-wp-css-html-processor-matcher.php deleted file mode 100644 index b77ef40931d83..0000000000000 --- a/src/wp-includes/html-api/interface-wp-css-html-processor-matcher.php +++ /dev/null @@ -1,11 +0,0 @@ - Date: Wed, 11 Dec 2024 18:44:24 +0100 Subject: [PATCH 117/129] Update tests for class parsing --- .../tests/html-api/wpCssAttributeSelector.php | 90 ++++ .../tests/html-api/wpCssClassSelector.php | 49 +++ .../tests/html-api/wpCssComplexSelector.php | 71 ++++ .../html-api/wpCssComplexSelectorList.php | 73 +--- .../tests/html-api/wpCssCompoundSelector.php | 44 ++ .../html-api/wpCssCompoundSelectorList.php | 395 +----------------- .../tests/html-api/wpCssIdSelector.php | 50 +++ .../html-api/wpCssSelectorParserMatcher.php | 172 ++++++++ .../tests/html-api/wpCssTypeSelector.php | 51 +++ 9 files changed, 532 insertions(+), 463 deletions(-) create mode 100644 tests/phpunit/tests/html-api/wpCssAttributeSelector.php create mode 100644 tests/phpunit/tests/html-api/wpCssClassSelector.php create mode 100644 tests/phpunit/tests/html-api/wpCssComplexSelector.php create mode 100644 tests/phpunit/tests/html-api/wpCssCompoundSelector.php create mode 100644 tests/phpunit/tests/html-api/wpCssIdSelector.php create mode 100644 tests/phpunit/tests/html-api/wpCssSelectorParserMatcher.php create mode 100644 tests/phpunit/tests/html-api/wpCssTypeSelector.php diff --git a/tests/phpunit/tests/html-api/wpCssAttributeSelector.php b/tests/phpunit/tests/html-api/wpCssAttributeSelector.php new file mode 100644 index 0000000000000..d907ad7c07e5b --- /dev/null +++ b/tests/phpunit/tests/html-api/wpCssAttributeSelector.php @@ -0,0 +1,90 @@ +assertNull( $result ); + } else { + $this->assertSame( $expected_name, $result->name ); + $this->assertSame( $expected_matcher, $result->matcher ); + $this->assertSame( $expected_value, $result->value ); + $this->assertSame( $expected_modifier, $result->modifier ); + $this->assertSame( $rest, substr( $input, $offset ) ); + } + } + + /** + * Data provider. + * + * @return array + */ + public static function data_attribute_selectors(): array { + return array( + '[href]' => array( '[href]', 'href', null, null, null, '' ), + '[href] type' => array( '[href] type', 'href', null, null, null, ' type' ), + '[href]#id' => array( '[href]#id', 'href', null, null, null, '#id' ), + '[href].class' => array( '[href].class', 'href', null, null, null, '.class' ), + '[href][href2]' => array( '[href][href2]', 'href', null, null, null, '[href2]' ), + '[\n href\t\r]' => array( "[\n href\t\r]", 'href', null, null, null, '' ), + '[href=foo]' => array( '[href=foo]', 'href', WP_CSS_Attribute_Selector::MATCH_EXACT, 'foo', null, '' ), + '[href \n = bar ]' => array( "[href \n = bar ]", 'href', WP_CSS_Attribute_Selector::MATCH_EXACT, 'bar', null, '' ), + '[href \n ^= baz ]' => array( "[href \n ^= baz ]", 'href', WP_CSS_Attribute_Selector::MATCH_PREFIXED_BY, 'baz', null, '' ), + + '[match $= insensitive i]' => array( '[match $= insensitive i]', 'match', WP_CSS_Attribute_Selector::MATCH_SUFFIXED_BY, 'insensitive', WP_CSS_Attribute_Selector::MODIFIER_CASE_INSENSITIVE, '' ), + '[match|=sensitive s]' => array( '[match|=sensitive s]', 'match', WP_CSS_Attribute_Selector::MATCH_EXACT_OR_HYPHEN_PREFIXED, 'sensitive', WP_CSS_Attribute_Selector::MODIFIER_CASE_SENSITIVE, '' ), + '[att=val I]' => array( '[att=val I]', 'att', WP_CSS_Attribute_Selector::MATCH_EXACT, 'val', WP_CSS_Attribute_Selector::MODIFIER_CASE_INSENSITIVE, '' ), + '[att=val S]' => array( '[att=val S]', 'att', WP_CSS_Attribute_Selector::MATCH_EXACT, 'val', WP_CSS_Attribute_Selector::MODIFIER_CASE_SENSITIVE, '' ), + + '[match~="quoted[][]"]' => array( '[match~="quoted[][]"]', 'match', WP_CSS_Attribute_Selector::MATCH_ONE_OF_EXACT, 'quoted[][]', null, '' ), + "[match$='quoted!{}']" => array( "[match$='quoted!{}']", 'match', WP_CSS_Attribute_Selector::MATCH_SUFFIXED_BY, 'quoted!{}', null, '' ), + "[match*='quoted's]" => array( "[match*='quoted's]", 'match', WP_CSS_Attribute_Selector::MATCH_CONTAINS, 'quoted', WP_CSS_Attribute_Selector::MODIFIER_CASE_SENSITIVE, '' ), + + '[escape-nl="foo\\nbar"]' => array( "[escape-nl='foo\\\nbar']", 'escape-nl', WP_CSS_Attribute_Selector::MATCH_EXACT, 'foobar', null, '' ), + '[escape-seq="\\31 23"]' => array( "[escape-seq='\\31 23']", 'escape-seq', WP_CSS_Attribute_Selector::MATCH_EXACT, '123', null, '' ), + + // Invalid + 'Invalid: (empty string)' => array( '' ), + 'Invalid: foo' => array( 'foo' ), + 'Invalid: [foo' => array( '[foo' ), + 'Invalid: [#foo]' => array( '[#foo]' ), + 'Invalid: [*|*]' => array( '[*|*]' ), + 'Invalid: [ns|*]' => array( '[ns|*]' ), + 'Invalid: [* |att]' => array( '[* |att]' ), + 'Invalid: [*| att]' => array( '[*| att]' ), + 'Invalid: [att * =]' => array( '[att * =]' ), + 'Invalid: [att+=val]' => array( '[att+=val]' ), + 'Invalid: [att=val ' => array( '[att=val ' ), + 'Invalid: [att i]' => array( '[att i]' ), + 'Invalid: [att s]' => array( '[att s]' ), + "Invalid: [att='val\\n']" => array( "[att='val\n']" ), + 'Invalid: [att=val i ' => array( '[att=val i ' ), + 'Invalid: [att="val"ix' => array( '[att="val"ix' ), + ); + } +} diff --git a/tests/phpunit/tests/html-api/wpCssClassSelector.php b/tests/phpunit/tests/html-api/wpCssClassSelector.php new file mode 100644 index 0000000000000..fa1d097a5ad3d --- /dev/null +++ b/tests/phpunit/tests/html-api/wpCssClassSelector.php @@ -0,0 +1,49 @@ +assertNull( $result ); + } else { + $this->assertSame( $expected, $result->class_name ); + $this->assertSame( $rest, substr( $input, $offset ) ); + } + } + + /** + * Data provider. + * + * @return array + */ + public static function data_class_selectors(): array { + return array( + 'valid ._-foo123' => array( '._-foo123', '_-foo123', '' ), + 'valid .foo.bar' => array( '.foo.bar', 'foo', '.bar' ), + 'escaped .\31 23' => array( '.\\31 23', '123', '' ), + 'with descendant .\31 23 div' => array( '.\\31 23 div', '123', ' div' ), + + 'not class foo' => array( 'foo' ), + 'not class #bar' => array( '#bar' ), + 'not valid .1foo' => array( '.1foo' ), + ); + } +} diff --git a/tests/phpunit/tests/html-api/wpCssComplexSelector.php b/tests/phpunit/tests/html-api/wpCssComplexSelector.php new file mode 100644 index 0000000000000..bb7b6e67e9d1a --- /dev/null +++ b/tests/phpunit/tests/html-api/wpCssComplexSelector.php @@ -0,0 +1,71 @@ + .child#bar[baz=quux] , rest'; + $offset = 0; + + /** @var WP_CSS_Complex_Selector|null */ + $sel = WP_CSS_Complex_Selector::parse( $input, $offset ); + + $this->assertSame( 2, count( $sel->context_selectors ) ); + + // Relative selectors should be reverse ordered. + $this->assertSame( 'el2', $sel->context_selectors[0][0]->type ); + $this->assertSame( WP_CSS_Complex_Selector::COMBINATOR_CHILD, $sel->context_selectors[0][1] ); + + $this->assertSame( 'el1', $sel->context_selectors[1][0]->type ); + $this->assertSame( WP_CSS_Complex_Selector::COMBINATOR_DESCENDANT, $sel->context_selectors[1][1] ); + + $this->assertSame( 3, count( $sel->self_selector->subclass_selectors ) ); + $this->assertNull( $sel->self_selector->type_selector ); + $this->assertSame( 'child', $sel->self_selector->subclass_selectors[0]->class_name ); + + $this->assertSame( ', rest', substr( $input, $offset ) ); + } + + /** + * @ticket 62653 + */ + public function test_parse_invalid_complex_selector() { + $input = 'el.foo#bar[baz=quux] > , rest'; + $offset = 0; + $result = WP_CSS_Complex_Selector::parse( $input, $offset ); + $this->assertNull( $result ); + } + + /** + * @ticket 62653 + */ + public function test_parse_invalid_complex_selector_nonfinal_subclass() { + $input = 'el.foo#bar[baz=quux] > final, rest'; + $offset = 0; + $result = WP_CSS_Complex_Selector::parse( $input, $offset ); + $this->assertNull( $result ); + } + + /** + * @ticket 62653 + */ + public function test_parse_empty_complex_selector() { + $input = ''; + $offset = 0; + $result = WP_CSS_Complex_Selector::parse( $input, $offset ); + $this->assertNull( $result ); + } +} diff --git a/tests/phpunit/tests/html-api/wpCssComplexSelectorList.php b/tests/phpunit/tests/html-api/wpCssComplexSelectorList.php index 829af95a55d5f..4e788860ff53f 100644 --- a/tests/phpunit/tests/html-api/wpCssComplexSelectorList.php +++ b/tests/phpunit/tests/html-api/wpCssComplexSelectorList.php @@ -9,79 +9,10 @@ * @since 6.8.0 * * @group html-api + * + * @coversDefaultClass WP_CSS_Complex_Selector_List */ class Tests_HtmlApi_WpCssComplexSelectorList extends WP_UnitTestCase { - private $test_class; - - public function set_up(): void { - parent::set_up(); - $this->test_class = new class() extends WP_CSS_Complex_Selector_List { - public function __construct() { - parent::__construct( array() ); - } - - public static function test_parse_complex_selector( string $input, int &$offset ): ?WP_CSS_Complex_Selector { - return self::parse_complex_selector( $input, $offset ); - } - }; - } - - /** - * @ticket 62653 - */ - public function test_parse_complex_selector() { - $input = 'el1 el2 > .child#bar[baz=quux] , rest'; - $offset = 0; - - /** @var WP_CSS_Complex_Selector|null */ - $sel = $this->test_class::test_parse_complex_selector( $input, $offset ); - - $this->assertSame( 2, count( $sel->context_selectors ) ); - - // Relative selectors should be reverse ordered. - $this->assertSame( 'el2', $sel->context_selectors[0][0]->type ); - $this->assertSame( WP_CSS_Complex_Selector::COMBINATOR_CHILD, $sel->context_selectors[0][1] ); - - $this->assertSame( 'el1', $sel->context_selectors[1][0]->type ); - $this->assertSame( WP_CSS_Complex_Selector::COMBINATOR_DESCENDANT, $sel->context_selectors[1][1] ); - - $this->assertSame( 3, count( $sel->self_selector->subclass_selectors ) ); - $this->assertNull( $sel->self_selector->type_selector ); - $this->assertSame( 'child', $sel->self_selector->subclass_selectors[0]->class_name ); - - $this->assertSame( ', rest', substr( $input, $offset ) ); - } - - /** - * @ticket 62653 - */ - public function test_parse_invalid_complex_selector() { - $input = 'el.foo#bar[baz=quux] > , rest'; - $offset = 0; - $result = $this->test_class::test_parse_complex_selector( $input, $offset ); - $this->assertNull( $result ); - } - - /** - * @ticket 62653 - */ - public function test_parse_invalid_complex_selector_nonfinal_subclass() { - $input = 'el.foo#bar[baz=quux] > final, rest'; - $offset = 0; - $result = $this->test_class::test_parse_complex_selector( $input, $offset ); - $this->assertNull( $result ); - } - - /** - * @ticket 62653 - */ - public function test_parse_empty_complex_selector() { - $input = ''; - $offset = 0; - $result = $this->test_class::test_parse_complex_selector( $input, $offset ); - $this->assertNull( $result ); - } - /** * @ticket 62653 */ diff --git a/tests/phpunit/tests/html-api/wpCssCompoundSelector.php b/tests/phpunit/tests/html-api/wpCssCompoundSelector.php new file mode 100644 index 0000000000000..8800c89d6ed36 --- /dev/null +++ b/tests/phpunit/tests/html-api/wpCssCompoundSelector.php @@ -0,0 +1,44 @@ + .child'; + $offset = 0; + $sel = WP_CSS_Compound_Selector::parse( $input, $offset ); + + $this->assertSame( 'el', $sel->type_selector->type ); + $this->assertSame( 3, count( $sel->subclass_selectors ) ); + $this->assertSame( 'foo', $sel->subclass_selectors[0]->class_name, 'foo' ); + $this->assertSame( 'bar', $sel->subclass_selectors[1]->id, 'bar' ); + $this->assertSame( 'baz', $sel->subclass_selectors[2]->name, 'baz' ); + $this->assertSame( WP_CSS_Attribute_Selector::MATCH_EXACT, $sel->subclass_selectors[2]->matcher ); + $this->assertSame( 'quux', $sel->subclass_selectors[2]->value ); + $this->assertSame( ' > .child', substr( $input, $offset ) ); + } + + /** + * @ticket 62653 + */ + public function test_parse_empty_selector() { + $input = ''; + $offset = 0; + $result = WP_CSS_Compound_Selector::parse( $input, $offset ); + $this->assertNull( $result ); + $this->assertSame( 0, $offset ); + } +} diff --git a/tests/phpunit/tests/html-api/wpCssCompoundSelectorList.php b/tests/phpunit/tests/html-api/wpCssCompoundSelectorList.php index c112585e622c8..01eff118a87b0 100644 --- a/tests/phpunit/tests/html-api/wpCssCompoundSelectorList.php +++ b/tests/phpunit/tests/html-api/wpCssCompoundSelectorList.php @@ -1,6 +1,6 @@ test_class = new class() extends WP_CSS_Compound_Selector_List { - public function __construct() { - parent::__construct( array() ); - } - - /* - * Parsing - */ - public static function test_parse_ident( string $input, int &$offset ) { - return self::parse_ident( $input, $offset ); - } - - public static function test_parse_string( string $input, int &$offset ) { - return self::parse_string( $input, $offset ); - } - - public static function test_parse_type_selector( string $input, int &$offset ) { - return self::parse_type_selector( $input, $offset ); - } - - public static function test_parse_id_selector( string $input, int &$offset ) { - return self::parse_id_selector( $input, $offset ); - } - - public static function test_parse_class_selector( string $input, int &$offset ) { - return self::parse_class_selector( $input, $offset ); - } - - public static function test_parse_attribute_selector( string $input, int &$offset ) { - return self::parse_attribute_selector( $input, $offset ); - } - - public static function test_parse_compound_selector( string $input, int &$offset ) { - return self::parse_compound_selector( $input, $offset ); - } - - /* - * Utilities - */ - public static function test_is_ident_codepoint( string $input, int $offset ) { - return self::is_ident_codepoint( $input, $offset ); - } - - public static function test_is_ident_start_codepoint( string $input, int $offset ) { - return self::is_ident_start_codepoint( $input, $offset ); - } - }; - } - - /** - * Data provider. - * - * @return array - */ - public static function data_idents(): array { - return array( - 'trailing #' => array( '_-foo123#xyz', '_-foo123', '#xyz' ), - 'trailing .' => array( '😍foo123.xyz', '😍foo123', '.xyz' ), - 'trailing " "' => array( '😍foo123 more', '😍foo123', ' more' ), - 'escaped ASCII character' => array( '\\xyz', 'xyz', '' ), - 'escaped space' => array( '\\ x', ' x', '' ), - 'escaped emoji' => array( '\\😍', '😍', '' ), - 'hex unicode codepoint' => array( '\\1f0a1', '🂡', '' ), - 'HEX UNICODE CODEPOINT' => array( '\\1D4B2', '𝒲', '' ), - - 'hex tab-suffixed 1' => array( "\\31\t23", '123', '' ), - 'hex newline-suffixed 1' => array( "\\31\n23", '123', '' ), - 'hex space-suffixed 1' => array( "\\31 23", '123', '' ), - 'hex tab' => array( '\\9', "\t", '' ), - 'hex a' => array( '\\61 bc', 'abc', '' ), - 'hex a max escape length' => array( '\\000061bc', 'abc', '' ), - - 'out of range replacement min' => array( '\\110000 ', "\u{fffd}", '' ), - 'out of range replacement max' => array( '\\ffffff ', "\u{fffd}", '' ), - 'leading surrogate min replacement' => array( '\\d800 ', "\u{fffd}", '' ), - 'leading surrogate max replacement' => array( '\\dbff ', "\u{fffd}", '' ), - 'trailing surrogate min replacement' => array( '\\dc00 ', "\u{fffd}", '' ), - 'trailing surrogate max replacement' => array( '\\dfff ', "\u{fffd}", '' ), - 'can start with -ident' => array( '-ident', '-ident', '' ), - 'can start with --anything' => array( '--anything', '--anything', '' ), - 'can start with ---anything' => array( '--_anything', '--_anything', '' ), - 'can start with --1anything' => array( '--1anything', '--1anything', '' ), - 'can start with -\31 23' => array( '-\31 23', '-123', '' ), - 'can start with --\31 23' => array( '--\31 23', '--123', '' ), - 'ident ends before ]' => array( 'ident]', 'ident', ']' ), - - // Invalid - 'Invalid: (empty string)' => array( '' ), - 'Invalid: bad start >' => array( '>ident' ), - 'Invalid: bad start [' => array( '[ident' ), - 'Invalid: bad start #' => array( '#ident' ), - 'Invalid: bad start " "' => array( ' ident' ), - 'Invalid: bad start 1' => array( '1ident' ), - 'Invalid: bad start -1' => array( '-1ident' ), - 'Invalid: bad start -' => array( '-' ), - ); - } - - /** - * @ticket 62653 - */ - public function test_is_ident_and_is_ident_start() { - $this->assertFalse( $this->test_class::test_is_ident_codepoint( '[', 0 ) ); - $this->assertFalse( $this->test_class::test_is_ident_codepoint( ']', 0 ) ); - $this->assertFalse( $this->test_class::test_is_ident_start_codepoint( '[', 0 ) ); - $this->assertFalse( $this->test_class::test_is_ident_start_codepoint( ']', 0 ) ); - } - - /** - * @ticket 62653 - * - * @dataProvider data_idents - */ - public function test_parse_ident( string $input, ?string $expected = null, ?string $rest = null ) { - - $offset = 0; - $result = $this->test_class::test_parse_ident( $input, $offset ); - if ( null === $expected ) { - $this->assertNull( $result ); - } else { - $this->assertSame( $expected, $result, 'Ident did not match.' ); - $this->assertSame( $rest, substr( $input, $offset ), 'Offset was not updated correctly.' ); - } - } - - /** - * @ticket 62653 - * - * @dataProvider data_strings - */ - public function test_parse_string( string $input, ?string $expected = null, ?string $rest = null ) { - $offset = 0; - $result = $this->test_class::test_parse_string( $input, $offset ); - if ( null === $expected ) { - $this->assertNull( $result ); - } else { - $this->assertSame( $expected, $result, 'String did not match.' ); - $this->assertSame( $rest, substr( $input, $offset ), 'Offset was not updated correctly.' ); - } - } - - /** - * Data provider. - * - * @return array - */ - public static function data_strings(): array { - return array( - '"foo"' => array( '"foo"', 'foo', '' ), - '"foo"after' => array( '"foo"after', 'foo', 'after' ), - '"foo""two"' => array( '"foo""two"', 'foo', '"two"' ), - '"foo"\'two\'' => array( '"foo"\'two\'', 'foo', "'two'" ), - - "'foo'" => array( "'foo'", 'foo', '' ), - "'foo'after" => array( "'foo'after", 'foo', 'after' ), - "'foo'\"two\"" => array( "'foo'\"two\"", 'foo', '"two"' ), - "'foo''two'" => array( "'foo''two'", 'foo', "'two'" ), - - "'foo\\nbar'" => array( "'foo\\\nbar'", 'foobar', '' ), - "'foo\\31 23'" => array( "'foo\\31 23'", 'foo123', '' ), - "'foo\\31\\n23'" => array( "'foo\\31\n23'", 'foo123', '' ), - "'foo\\31\\t23'" => array( "'foo\\31\t23'", 'foo123', '' ), - "'foo\\00003123'" => array( "'foo\\00003123'", 'foo123', '' ), - - "'foo\\" => array( "'foo\\", 'foo', '' ), - - '"' => array( '"', '', '' ), - '"\\"' => array( '"\\"', '"', '' ), - '"missing close' => array( '"missing close', 'missing close', '' ), - - // Invalid - 'Invalid: (empty string)' => array( '' ), - 'Invalid: .foo' => array( '.foo' ), - 'Invalid: #foo' => array( '#foo' ), - "Invalid: 'newline\\n'" => array( "'newline\n'" ), - 'Invalid: foo' => array( 'foo' ), - ); - } - - /** - * @ticket 62653 - * - * @dataProvider data_id_selectors - */ - public function test_parse_id( string $input, ?string $expected = null, ?string $rest = null ) { - $offset = 0; - $result = $this->test_class::test_parse_id_selector( $input, $offset ); - if ( null === $expected ) { - $this->assertNull( $result ); - } else { - $this->assertSame( $expected, $result->id ); - $this->assertSame( $rest, substr( $input, $offset ) ); - } - } - - /** - * Data provider. - * - * @return array - */ - public static function data_id_selectors(): array { - return array( - 'valid #_-foo123' => array( '#_-foo123', '_-foo123', '' ), - 'valid #foo#bar' => array( '#foo#bar', 'foo', '#bar' ), - 'escaped #\31 23' => array( '#\\31 23', '123', '' ), - 'with descendant #\31 23 div' => array( '#\\31 23 div', '123', ' div' ), - - 'not ID foo' => array( 'foo' ), - 'not ID .bar' => array( '.bar' ), - 'not valid #1foo' => array( '#1foo' ), - ); - } - - /** - * @ticket 62653 - * - * @dataProvider data_class_selectors - */ - public function test_parse_class( string $input, ?string $expected = null, ?string $rest = null ) { - $offset = 0; - $result = $this->test_class::test_parse_class_selector( $input, $offset ); - if ( null === $expected ) { - $this->assertNull( $result ); - } else { - $this->assertSame( $expected, $result->class_name ); - $this->assertSame( $rest, substr( $input, $offset ) ); - } - } - - /** - * Data provider. - * - * @return array - */ - public static function data_class_selectors(): array { - return array( - 'valid ._-foo123' => array( '._-foo123', '_-foo123', '' ), - 'valid .foo.bar' => array( '.foo.bar', 'foo', '.bar' ), - 'escaped .\31 23' => array( '.\\31 23', '123', '' ), - 'with descendant .\31 23 div' => array( '.\\31 23 div', '123', ' div' ), - - 'not class foo' => array( 'foo' ), - 'not class #bar' => array( '#bar' ), - 'not valid .1foo' => array( '.1foo' ), - ); - } - - /** - * @ticket 62653 - * - * @dataProvider data_type_selectors - */ - public function test_parse_type( string $input, ?string $expected = null, ?string $rest = null ) { - $offset = 0; - $result = $this->test_class::test_parse_type_selector( $input, $offset ); - if ( null === $expected ) { - $this->assertNull( $result ); - } else { - $this->assertSame( $expected, $result->type ); - $this->assertSame( $rest, substr( $input, $offset ) ); - } - } - - /** - * Data provider. - * - * @return array - */ - public static function data_type_selectors(): array { - return array( - 'any *' => array( '* .class', '*', ' .class' ), - 'a' => array( 'a', 'a', '' ), - 'div.class' => array( 'div.class', 'div', '.class' ), - 'custom-type#id' => array( 'custom-type#id', 'custom-type', '#id' ), - - // Invalid - 'Invalid: (empty string)' => array( '' ), - 'Invalid: #id' => array( '#id' ), - 'Invalid: .class' => array( '.class' ), - 'Invalid: [attr]' => array( '[attr]' ), - ); - } - - /** - * @ticket 62653 - * - * @dataProvider data_attribute_selectors - */ - public function test_parse_attribute( - string $input, - ?string $expected_name = null, - ?string $expected_matcher = null, - ?string $expected_value = null, - ?string $expected_modifier = null, - ?string $rest = null - ) { - $offset = 0; - $result = $this->test_class::test_parse_attribute_selector( $input, $offset ); - if ( null === $expected_name ) { - $this->assertNull( $result ); - } else { - $this->assertSame( $expected_name, $result->name ); - $this->assertSame( $expected_matcher, $result->matcher ); - $this->assertSame( $expected_value, $result->value ); - $this->assertSame( $expected_modifier, $result->modifier ); - $this->assertSame( $rest, substr( $input, $offset ) ); - } - } - - /** - * Data provider. - * - * @return array - */ - public static function data_attribute_selectors(): array { - return array( - '[href]' => array( '[href]', 'href', null, null, null, '' ), - '[href] type' => array( '[href] type', 'href', null, null, null, ' type' ), - '[href]#id' => array( '[href]#id', 'href', null, null, null, '#id' ), - '[href].class' => array( '[href].class', 'href', null, null, null, '.class' ), - '[href][href2]' => array( '[href][href2]', 'href', null, null, null, '[href2]' ), - '[\n href\t\r]' => array( "[\n href\t\r]", 'href', null, null, null, '' ), - '[href=foo]' => array( '[href=foo]', 'href', WP_CSS_Attribute_Selector::MATCH_EXACT, 'foo', null, '' ), - '[href \n = bar ]' => array( "[href \n = bar ]", 'href', WP_CSS_Attribute_Selector::MATCH_EXACT, 'bar', null, '' ), - '[href \n ^= baz ]' => array( "[href \n ^= baz ]", 'href', WP_CSS_Attribute_Selector::MATCH_PREFIXED_BY, 'baz', null, '' ), - - '[match $= insensitive i]' => array( '[match $= insensitive i]', 'match', WP_CSS_Attribute_Selector::MATCH_SUFFIXED_BY, 'insensitive', WP_CSS_Attribute_Selector::MODIFIER_CASE_INSENSITIVE, '' ), - '[match|=sensitive s]' => array( '[match|=sensitive s]', 'match', WP_CSS_Attribute_Selector::MATCH_EXACT_OR_HYPHEN_PREFIXED, 'sensitive', WP_CSS_Attribute_Selector::MODIFIER_CASE_SENSITIVE, '' ), - '[att=val I]' => array( '[att=val I]', 'att', WP_CSS_Attribute_Selector::MATCH_EXACT, 'val', WP_CSS_Attribute_Selector::MODIFIER_CASE_INSENSITIVE, '' ), - '[att=val S]' => array( '[att=val S]', 'att', WP_CSS_Attribute_Selector::MATCH_EXACT, 'val', WP_CSS_Attribute_Selector::MODIFIER_CASE_SENSITIVE, '' ), - - '[match~="quoted[][]"]' => array( '[match~="quoted[][]"]', 'match', WP_CSS_Attribute_Selector::MATCH_ONE_OF_EXACT, 'quoted[][]', null, '' ), - "[match$='quoted!{}']" => array( "[match$='quoted!{}']", 'match', WP_CSS_Attribute_Selector::MATCH_SUFFIXED_BY, 'quoted!{}', null, '' ), - "[match*='quoted's]" => array( "[match*='quoted's]", 'match', WP_CSS_Attribute_Selector::MATCH_CONTAINS, 'quoted', WP_CSS_Attribute_Selector::MODIFIER_CASE_SENSITIVE, '' ), - - '[escape-nl="foo\\nbar"]' => array( "[escape-nl='foo\\\nbar']", 'escape-nl', WP_CSS_Attribute_Selector::MATCH_EXACT, 'foobar', null, '' ), - '[escape-seq="\\31 23"]' => array( "[escape-seq='\\31 23']", 'escape-seq', WP_CSS_Attribute_Selector::MATCH_EXACT, '123', null, '' ), - - // Invalid - 'Invalid: (empty string)' => array( '' ), - 'Invalid: foo' => array( 'foo' ), - 'Invalid: [foo' => array( '[foo' ), - 'Invalid: [#foo]' => array( '[#foo]' ), - 'Invalid: [*|*]' => array( '[*|*]' ), - 'Invalid: [ns|*]' => array( '[ns|*]' ), - 'Invalid: [* |att]' => array( '[* |att]' ), - 'Invalid: [*| att]' => array( '[*| att]' ), - 'Invalid: [att * =]' => array( '[att * =]' ), - 'Invalid: [att+=val]' => array( '[att+=val]' ), - 'Invalid: [att=val ' => array( '[att=val ' ), - 'Invalid: [att i]' => array( '[att i]' ), - 'Invalid: [att s]' => array( '[att s]' ), - "Invalid: [att='val\\n']" => array( "[att='val\n']" ), - 'Invalid: [att=val i ' => array( '[att=val i ' ), - 'Invalid: [att="val"ix' => array( '[att="val"ix' ), - ); - } - - /** - * @ticket 62653 - */ - public function test_parse_selector() { - $input = 'el.foo#bar[baz=quux] > .child'; - $offset = 0; - $sel = $this->test_class::test_parse_compound_selector( $input, $offset ); - - $this->assertSame( 'el', $sel->type_selector->type ); - $this->assertSame( 3, count( $sel->subclass_selectors ) ); - $this->assertSame( 'foo', $sel->subclass_selectors[0]->class_name, 'foo' ); - $this->assertSame( 'bar', $sel->subclass_selectors[1]->id, 'bar' ); - $this->assertSame( 'baz', $sel->subclass_selectors[2]->name, 'baz' ); - $this->assertSame( WP_CSS_Attribute_Selector::MATCH_EXACT, $sel->subclass_selectors[2]->matcher ); - $this->assertSame( 'quux', $sel->subclass_selectors[2]->value ); - $this->assertSame( ' > .child', substr( $input, $offset ) ); - } - - /** - * @ticket 62653 - */ - public function test_parse_empty_selector() { - $input = ''; - $offset = 0; - $result = $this->test_class::test_parse_compound_selector( $input, $offset ); - $this->assertNull( $result ); - $this->assertSame( 0, $offset ); - } - /** * @ticket 62653 */ diff --git a/tests/phpunit/tests/html-api/wpCssIdSelector.php b/tests/phpunit/tests/html-api/wpCssIdSelector.php new file mode 100644 index 0000000000000..6cd6b83a46b8d --- /dev/null +++ b/tests/phpunit/tests/html-api/wpCssIdSelector.php @@ -0,0 +1,50 @@ +assertNull( $result ); + } else { + $this->assertSame( $expected, $result->id ); + $this->assertSame( $rest, substr( $input, $offset ) ); + } + } + + /** + * Data provider. + * + * @return array + */ + public static function data_id_selectors(): array { + return array( + 'valid #_-foo123' => array( '#_-foo123', '_-foo123', '' ), + 'valid #foo#bar' => array( '#foo#bar', 'foo', '#bar' ), + 'escaped #\31 23' => array( '#\\31 23', '123', '' ), + 'with descendant #\31 23 div' => array( '#\\31 23 div', '123', ' div' ), + + // Invalid + 'not ID foo' => array( 'foo' ), + 'not ID .bar' => array( '.bar' ), + 'not valid #1foo' => array( '#1foo' ), + ); + } +} diff --git a/tests/phpunit/tests/html-api/wpCssSelectorParserMatcher.php b/tests/phpunit/tests/html-api/wpCssSelectorParserMatcher.php new file mode 100644 index 0000000000000..4497334791c88 --- /dev/null +++ b/tests/phpunit/tests/html-api/wpCssSelectorParserMatcher.php @@ -0,0 +1,172 @@ +test_class = new class() extends WP_CSS_Selector_Parser_Matcher { + /* + * Parsing + */ + public static function test_parse_ident( string $input, int &$offset ) { + return self::parse_ident( $input, $offset ); + } + + public static function test_parse_string( string $input, int &$offset ) { + return self::parse_string( $input, $offset ); + } + + /* + * Utilities + */ + public static function test_is_ident_codepoint( string $input, int $offset ) { + return self::is_ident_codepoint( $input, $offset ); + } + + public static function test_is_ident_start_codepoint( string $input, int $offset ) { + return self::is_ident_start_codepoint( $input, $offset ); + } + }; + } + + /** + * Data provider. + * + * @return array + */ + public static function data_idents(): array { + return array( + 'trailing #' => array( '_-foo123#xyz', '_-foo123', '#xyz' ), + 'trailing .' => array( '😍foo123.xyz', '😍foo123', '.xyz' ), + 'trailing " "' => array( '😍foo123 more', '😍foo123', ' more' ), + 'escaped ASCII character' => array( '\\xyz', 'xyz', '' ), + 'escaped space' => array( '\\ x', ' x', '' ), + 'escaped emoji' => array( '\\😍', '😍', '' ), + 'hex unicode codepoint' => array( '\\1f0a1', '🂡', '' ), + 'HEX UNICODE CODEPOINT' => array( '\\1D4B2', '𝒲', '' ), + + 'hex tab-suffixed 1' => array( "\\31\t23", '123', '' ), + 'hex newline-suffixed 1' => array( "\\31\n23", '123', '' ), + 'hex space-suffixed 1' => array( "\\31 23", '123', '' ), + 'hex tab' => array( '\\9', "\t", '' ), + 'hex a' => array( '\\61 bc', 'abc', '' ), + 'hex a max escape length' => array( '\\000061bc', 'abc', '' ), + + 'out of range replacement min' => array( '\\110000 ', "\u{fffd}", '' ), + 'out of range replacement max' => array( '\\ffffff ', "\u{fffd}", '' ), + 'leading surrogate min replacement' => array( '\\d800 ', "\u{fffd}", '' ), + 'leading surrogate max replacement' => array( '\\dbff ', "\u{fffd}", '' ), + 'trailing surrogate min replacement' => array( '\\dc00 ', "\u{fffd}", '' ), + 'trailing surrogate max replacement' => array( '\\dfff ', "\u{fffd}", '' ), + 'can start with -ident' => array( '-ident', '-ident', '' ), + 'can start with --anything' => array( '--anything', '--anything', '' ), + 'can start with ---anything' => array( '--_anything', '--_anything', '' ), + 'can start with --1anything' => array( '--1anything', '--1anything', '' ), + 'can start with -\31 23' => array( '-\31 23', '-123', '' ), + 'can start with --\31 23' => array( '--\31 23', '--123', '' ), + 'ident ends before ]' => array( 'ident]', 'ident', ']' ), + + // Invalid + 'Invalid: (empty string)' => array( '' ), + 'Invalid: bad start >' => array( '>ident' ), + 'Invalid: bad start [' => array( '[ident' ), + 'Invalid: bad start #' => array( '#ident' ), + 'Invalid: bad start " "' => array( ' ident' ), + 'Invalid: bad start 1' => array( '1ident' ), + 'Invalid: bad start -1' => array( '-1ident' ), + 'Invalid: bad start -' => array( '-' ), + ); + } + + /** + * @ticket 62653 + */ + public function test_is_ident_and_is_ident_start() { + $this->assertFalse( $this->test_class::test_is_ident_codepoint( '[', 0 ) ); + $this->assertFalse( $this->test_class::test_is_ident_codepoint( ']', 0 ) ); + $this->assertFalse( $this->test_class::test_is_ident_start_codepoint( '[', 0 ) ); + $this->assertFalse( $this->test_class::test_is_ident_start_codepoint( ']', 0 ) ); + } + + /** + * @ticket 62653 + * + * @dataProvider data_idents + */ + public function test_parse_ident( string $input, ?string $expected = null, ?string $rest = null ) { + + $offset = 0; + $result = $this->test_class::test_parse_ident( $input, $offset ); + if ( null === $expected ) { + $this->assertNull( $result ); + } else { + $this->assertSame( $expected, $result, 'Ident did not match.' ); + $this->assertSame( $rest, substr( $input, $offset ), 'Offset was not updated correctly.' ); + } + } + + /** + * @ticket 62653 + * + * @dataProvider data_strings + */ + public function test_parse_string( string $input, ?string $expected = null, ?string $rest = null ) { + $offset = 0; + $result = $this->test_class::test_parse_string( $input, $offset ); + if ( null === $expected ) { + $this->assertNull( $result ); + } else { + $this->assertSame( $expected, $result, 'String did not match.' ); + $this->assertSame( $rest, substr( $input, $offset ), 'Offset was not updated correctly.' ); + } + } + + /** + * Data provider. + * + * @return array + */ + public static function data_strings(): array { + return array( + '"foo"' => array( '"foo"', 'foo', '' ), + '"foo"after' => array( '"foo"after', 'foo', 'after' ), + '"foo""two"' => array( '"foo""two"', 'foo', '"two"' ), + '"foo"\'two\'' => array( '"foo"\'two\'', 'foo', "'two'" ), + + "'foo'" => array( "'foo'", 'foo', '' ), + "'foo'after" => array( "'foo'after", 'foo', 'after' ), + "'foo'\"two\"" => array( "'foo'\"two\"", 'foo', '"two"' ), + "'foo''two'" => array( "'foo''two'", 'foo', "'two'" ), + + "'foo\\nbar'" => array( "'foo\\\nbar'", 'foobar', '' ), + "'foo\\31 23'" => array( "'foo\\31 23'", 'foo123', '' ), + "'foo\\31\\n23'" => array( "'foo\\31\n23'", 'foo123', '' ), + "'foo\\31\\t23'" => array( "'foo\\31\t23'", 'foo123', '' ), + "'foo\\00003123'" => array( "'foo\\00003123'", 'foo123', '' ), + + "'foo\\" => array( "'foo\\", 'foo', '' ), + + '"' => array( '"', '', '' ), + '"\\"' => array( '"\\"', '"', '' ), + '"missing close' => array( '"missing close', 'missing close', '' ), + + // Invalid + 'Invalid: (empty string)' => array( '' ), + 'Invalid: .foo' => array( '.foo' ), + 'Invalid: #foo' => array( '#foo' ), + "Invalid: 'newline\\n'" => array( "'newline\n'" ), + 'Invalid: foo' => array( 'foo' ), + ); + } +} diff --git a/tests/phpunit/tests/html-api/wpCssTypeSelector.php b/tests/phpunit/tests/html-api/wpCssTypeSelector.php new file mode 100644 index 0000000000000..fb53c41dd058c --- /dev/null +++ b/tests/phpunit/tests/html-api/wpCssTypeSelector.php @@ -0,0 +1,51 @@ +assertNull( $result ); + } else { + $this->assertSame( $expected, $result->type ); + $this->assertSame( $rest, substr( $input, $offset ) ); + } + } + + /** + * Data provider. + * + * @return array + */ + public static function data_type_selectors(): array { + return array( + 'any *' => array( '* .class', '*', ' .class' ), + 'a' => array( 'a', 'a', '' ), + 'div.class' => array( 'div.class', 'div', '.class' ), + 'custom-type#id' => array( 'custom-type#id', 'custom-type', '#id' ), + + // Invalid + 'Invalid: (empty string)' => array( '' ), + 'Invalid: #id' => array( '#id' ), + 'Invalid: .class' => array( '.class' ), + 'Invalid: [attr]' => array( '[attr]' ), + ); + } +} From f217eb0de026fcee8beb645f941de5221c676795 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 11 Dec 2024 18:45:20 +0100 Subject: [PATCH 118/129] Use whitepsace chars constant --- .../html-api/class-wp-css-attribute-selector.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-css-attribute-selector.php b/src/wp-includes/html-api/class-wp-css-attribute-selector.php index 700a8cba9bb0c..ab566f8f1af11 100644 --- a/src/wp-includes/html-api/class-wp-css-attribute-selector.php +++ b/src/wp-includes/html-api/class-wp-css-attribute-selector.php @@ -231,15 +231,15 @@ public function matches( WP_HTML_Tag_Processor $processor ): bool { */ private function whitespace_delimited_list( string $input ): Generator { // Start by skipping whitespace. - $offset = strspn( $input, " \t\r\n\f" ); + $offset = strspn( $input, self::WHITESPACE_CHARACTERS ); while ( $offset < strlen( $input ) ) { // Find the byte length until the next boundary. - $length = strcspn( $input, " \t\r\n\f", $offset ); + $length = strcspn( $input, self::WHITESPACE_CHARACTERS, $offset ); $value = substr( $input, $offset, $length ); // Move past trailing whitespace. - $offset += $length + strspn( $input, " \t\r\n\f", $offset + $length ); + $offset += $length + strspn( $input, self::WHITESPACE_CHARACTERS, $offset + $length ); yield $value; } From 6154742ecb42762951eb2267fe12434417f7bf85 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 11 Dec 2024 18:46:14 +0100 Subject: [PATCH 119/129] parse_whitespace should be protected --- .../html-api/class-wp-css-selector-parser-matcher.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-css-selector-parser-matcher.php b/src/wp-includes/html-api/class-wp-css-selector-parser-matcher.php index 8820115f03cfb..744f75496f0f8 100644 --- a/src/wp-includes/html-api/class-wp-css-selector-parser-matcher.php +++ b/src/wp-includes/html-api/class-wp-css-selector-parser-matcher.php @@ -34,7 +34,7 @@ abstract public static function parse( string $input, int &$offset ): ?static; /** * @todo document */ - final public static function parse_whitespace( string $input, int &$offset ): bool { + final protected static function parse_whitespace( string $input, int &$offset ): bool { $length = strspn( $input, self::WHITESPACE_CHARACTERS, $offset ); $advanced = $length > 0; $offset += $length; From 577b3a3b7036b9db71d6c0ab3337b96f94960686 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 11 Dec 2024 18:48:01 +0100 Subject: [PATCH 120/129] Update interface to abstract class require --- src/wp-settings.php | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/wp-settings.php b/src/wp-settings.php index b52fe8ab6181c..2e6ed6091a682 100644 --- a/src/wp-settings.php +++ b/src/wp-settings.php @@ -265,8 +265,7 @@ require ABSPATH . WPINC . '/html-api/class-wp-html-stack-event.php'; require ABSPATH . WPINC . '/html-api/class-wp-html-processor-state.php'; require ABSPATH . WPINC . '/html-api/class-wp-html-processor.php'; -require ABSPATH . WPINC . '/html-api/interface-wp-css-html-tag-processor-matcher.php'; -require ABSPATH . WPINC . '/html-api/interface-wp-css-html-processor-matcher.php'; +require ABSPATH . WPINC . '/html-api/class-wp-css-selector-parser-matcher.php'; require ABSPATH . WPINC . '/html-api/class-wp-css-attribute-selector.php'; require ABSPATH . WPINC . '/html-api/class-wp-css-class-selector.php'; require ABSPATH . WPINC . '/html-api/class-wp-css-id-selector.php'; From 5ea93abbf5704268239c3129a43e9bd9834e34b8 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 11 Dec 2024 19:04:03 +0100 Subject: [PATCH 121/129] Document base class --- .../class-wp-css-selector-parser-matcher.php | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-css-selector-parser-matcher.php b/src/wp-includes/html-api/class-wp-css-selector-parser-matcher.php index 744f75496f0f8..60e75820c264a 100644 --- a/src/wp-includes/html-api/class-wp-css-selector-parser-matcher.php +++ b/src/wp-includes/html-api/class-wp-css-selector-parser-matcher.php @@ -1,5 +1,19 @@ Date: Wed, 11 Dec 2024 19:06:19 +0100 Subject: [PATCH 122/129] Invert and comment confusing compound selector condition --- .../html-api/class-wp-css-compound-selector.php | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-css-compound-selector.php b/src/wp-includes/html-api/class-wp-css-compound-selector.php index 68aca4d880e0d..f301f6f9342fd 100644 --- a/src/wp-includes/html-api/class-wp-css-compound-selector.php +++ b/src/wp-includes/html-api/class-wp-css-compound-selector.php @@ -95,12 +95,13 @@ public static function parse( string $input, int &$offset ): ?static { $last_parsed_subclass_selector = self::parse_subclass_selector( $input, $updated_offset ); } - // @todo invert this condition - if ( null !== $type_selector || array() !== $subclass_selectors ) { - $offset = $updated_offset; - return new self( $type_selector, $subclass_selectors ); + // There must be at least one selector. + if ( null === $type_selector && array() === $subclass_selectors ) { + return null; } - return null; + + $offset = $updated_offset; + return new self( $type_selector, $subclass_selectors ); } /** From db469e62def02391ab362d5b7fd01ee4f54606d0 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 11 Dec 2024 19:08:46 +0100 Subject: [PATCH 123/129] Use switch in compound selector parsing --- .../class-wp-css-compound-selector.php | 21 +++++++++---------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-css-compound-selector.php b/src/wp-includes/html-api/class-wp-css-compound-selector.php index f301f6f9342fd..002021472f496 100644 --- a/src/wp-includes/html-api/class-wp-css-compound-selector.php +++ b/src/wp-includes/html-api/class-wp-css-compound-selector.php @@ -116,16 +116,15 @@ private static function parse_subclass_selector( string $input, int &$offset ) { return null; } - $next_char = $input[ $offset ]; - return '.' === $next_char - ? WP_CSS_Class_Selector::parse( $input, $offset ) - : ( - '#' === $next_char - ? WP_CSS_ID_Selector::parse( $input, $offset ) - : ( '[' === $next_char - ? WP_CSS_Attribute_Selector::parse( $input, $offset ) - : null - ) - ); + switch ( $input[ $offset ] ) { + case '.': + return WP_CSS_Class_Selector::parse( $input, $offset ); + case '#': + return WP_CSS_ID_Selector::parse( $input, $offset ); + case '[': + return WP_CSS_Attribute_Selector::parse( $input, $offset ); + } + + return null; } } From 400263a007f5e5aa6a0394343bf3dd827e32e029 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 11 Dec 2024 19:20:45 +0100 Subject: [PATCH 124/129] Fix up some todo-s --- .../class-wp-css-complex-selector-list.php | 8 -------- .../class-wp-css-compound-selector-list.php | 14 ++++++-------- .../class-wp-css-selector-parser-matcher.php | 19 ++++++++++++++----- 3 files changed, 20 insertions(+), 21 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-css-complex-selector-list.php b/src/wp-includes/html-api/class-wp-css-complex-selector-list.php index 10af613174a35..940bd098c6c19 100644 --- a/src/wp-includes/html-api/class-wp-css-complex-selector-list.php +++ b/src/wp-includes/html-api/class-wp-css-complex-selector-list.php @@ -44,14 +44,6 @@ class WP_CSS_Complex_Selector_List extends WP_CSS_Compound_Selector_List { * @return static|null The selector instance, or null if the parse was unsuccessful. */ public static function parse( string $input, int &$offset ): ?static { - $input = self::normalize_selector_input( $input ); - - if ( '' === $input ) { - return null; - } - - $offset = 0; - $selector = WP_CSS_Complex_Selector::parse( $input, $offset ); if ( null === $selector ) { return null; diff --git a/src/wp-includes/html-api/class-wp-css-compound-selector-list.php b/src/wp-includes/html-api/class-wp-css-compound-selector-list.php index a6f3b87409ff6..7edafc779ac4c 100644 --- a/src/wp-includes/html-api/class-wp-css-compound-selector-list.php +++ b/src/wp-includes/html-api/class-wp-css-compound-selector-list.php @@ -122,6 +122,12 @@ protected function __construct( array $selectors ) { * @return static|null */ public static function from_selectors( string $input ): ?static { + $input = self::normalize_selector_input( $input ); + + if ( '' === $input ) { + return null; + } + $offset = 0; return static::parse( $input, $offset ); } @@ -137,14 +143,6 @@ public static function from_selectors( string $input ): ?static { * @return static|null The selector instance, or null if the parse was unsuccessful. */ public static function parse( string $input, int &$offset ): ?static { - $input = self::normalize_selector_input( $input ); - - if ( '' === $input ) { - return null; - } - - $offset = 0; - $selector = WP_CSS_Compound_Selector::parse( $input, $offset ); if ( null === $selector ) { return null; diff --git a/src/wp-includes/html-api/class-wp-css-selector-parser-matcher.php b/src/wp-includes/html-api/class-wp-css-selector-parser-matcher.php index 60e75820c264a..6d665c4c26cb0 100644 --- a/src/wp-includes/html-api/class-wp-css-selector-parser-matcher.php +++ b/src/wp-includes/html-api/class-wp-css-selector-parser-matcher.php @@ -46,7 +46,12 @@ abstract public static function parse( string $input, int &$offset ): ?static; */ /** - * @todo document + * Consumes whitespace from the input string. + * + * @param string $input The selector string. + * @param int $offset The offset into the string. The offset is passed by reference and will + * be update to the byte after the whitespace sequence. + * @return bool True if whitespace was consumed. */ final protected static function parse_whitespace( string $input, int &$offset ): bool { $length = strspn( $input, self::WHITESPACE_CHARACTERS, $offset ); @@ -289,7 +294,6 @@ final protected static function parse_ident( string $input, int &$offset ): ?str $ident .= self::consume_escaped_codepoint( $input, $offset ); continue; } elseif ( self::is_ident_codepoint( $input, $offset ) ) { - // @todo this should append and advance the correct number of bytes. $ident .= $input[ $offset ]; ++$offset; continue; @@ -338,7 +342,7 @@ final protected static function next_two_are_valid_escape( string $input, int $o } /** - * Checks if the next code point is an "ident start code point". + * Checks if the next code point is an "ident start code point." * * Caution! This method does not do any bounds checking, it should not be passed * a string with an offset that is out of bounds. @@ -370,7 +374,7 @@ final protected static function is_ident_start_codepoint( string $input, int $of } /** - * Checks if the next code point is an "ident code point". + * Checks if the next code point is an "ident code point." * * Caution! This method does not do any bounds checking, it should not be passed * a string with an offset that is out of bounds. @@ -461,7 +465,12 @@ final protected static function check_if_three_code_points_would_start_an_ident_ } /** - * @todo doc… + * Normalizes selector input for processing. + * + * @see https://www.w3.org/TR/css-syntax-3/#input-preprocessing + * + * @param string $input The selector string. + * @return string The normalized selector string. */ final protected static function normalize_selector_input( string $input ): string { /* From 483a8191401c91f53601033ba3977a068bd03446 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 11 Dec 2024 19:24:22 +0100 Subject: [PATCH 125/129] Make most selector constructors private --- src/wp-includes/html-api/class-wp-css-attribute-selector.php | 2 +- src/wp-includes/html-api/class-wp-css-class-selector.php | 2 +- src/wp-includes/html-api/class-wp-css-compound-selector.php | 2 +- src/wp-includes/html-api/class-wp-css-id-selector.php | 2 +- src/wp-includes/html-api/class-wp-css-type-selector.php | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-css-attribute-selector.php b/src/wp-includes/html-api/class-wp-css-attribute-selector.php index ab566f8f1af11..d2d4d17792a81 100644 --- a/src/wp-includes/html-api/class-wp-css-attribute-selector.php +++ b/src/wp-includes/html-api/class-wp-css-attribute-selector.php @@ -140,7 +140,7 @@ final class WP_CSS_Attribute_Selector extends WP_CSS_Selector_Parser_Matcher { * @param string|null $modifier The attribute case modifier. * Must be one of the class MODIFIER_* constants or null. */ - public function __construct( string $name, ?string $matcher = null, ?string $value = null, ?string $modifier = null ) { + private function __construct( string $name, ?string $matcher = null, ?string $value = null, ?string $modifier = null ) { $this->name = $name; $this->matcher = $matcher; $this->value = $value; diff --git a/src/wp-includes/html-api/class-wp-css-class-selector.php b/src/wp-includes/html-api/class-wp-css-class-selector.php index 9abcb881ace49..ff7a0b0442813 100644 --- a/src/wp-includes/html-api/class-wp-css-class-selector.php +++ b/src/wp-includes/html-api/class-wp-css-class-selector.php @@ -29,7 +29,7 @@ final class WP_CSS_Class_Selector extends WP_CSS_Selector_Parser_Matcher { * * @param string $class_name The class name to match. */ - public function __construct( string $class_name ) { + private function __construct( string $class_name ) { $this->class_name = $class_name; } diff --git a/src/wp-includes/html-api/class-wp-css-compound-selector.php b/src/wp-includes/html-api/class-wp-css-compound-selector.php index 002021472f496..077ed5aa4b7f3 100644 --- a/src/wp-includes/html-api/class-wp-css-compound-selector.php +++ b/src/wp-includes/html-api/class-wp-css-compound-selector.php @@ -45,7 +45,7 @@ final class WP_CSS_Compound_Selector extends WP_CSS_Selector_Parser_Matcher { * @param (WP_CSS_ID_Selector|WP_CSS_Class_Selector|WP_CSS_Attribute_Selector)[]|null $subclass_selectors * The array of subclass selectors or null. */ - public function __construct( ?WP_CSS_Type_Selector $type_selector, ?array $subclass_selectors ) { + private function __construct( ?WP_CSS_Type_Selector $type_selector, ?array $subclass_selectors ) { $this->type_selector = $type_selector; $this->subclass_selectors = array() === $subclass_selectors ? null : $subclass_selectors; } diff --git a/src/wp-includes/html-api/class-wp-css-id-selector.php b/src/wp-includes/html-api/class-wp-css-id-selector.php index de854c37eea9f..2c7cb6feec658 100644 --- a/src/wp-includes/html-api/class-wp-css-id-selector.php +++ b/src/wp-includes/html-api/class-wp-css-id-selector.php @@ -29,7 +29,7 @@ final class WP_CSS_ID_Selector extends WP_CSS_Selector_Parser_Matcher { * * @param string $id The ID to match. */ - public function __construct( string $id ) { + private function __construct( string $id ) { $this->id = $id; } diff --git a/src/wp-includes/html-api/class-wp-css-type-selector.php b/src/wp-includes/html-api/class-wp-css-type-selector.php index 492569ee51d65..ab41a87f1a113 100644 --- a/src/wp-includes/html-api/class-wp-css-type-selector.php +++ b/src/wp-includes/html-api/class-wp-css-type-selector.php @@ -29,7 +29,7 @@ final class WP_CSS_Type_Selector extends WP_CSS_Selector_Parser_Matcher { * * @param string $type The element type (tag name) to match or '*' to match any element. */ - public function __construct( string $type ) { + private function __construct( string $type ) { $this->type = $type; } From 1f641685627d8536887c69d25e5f69466cb1f076 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 11 Dec 2024 19:32:17 +0100 Subject: [PATCH 126/129] Fix test class implementation of abstract class --- .../phpunit/tests/html-api/wpCssSelectorParserMatcher.php | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/phpunit/tests/html-api/wpCssSelectorParserMatcher.php b/tests/phpunit/tests/html-api/wpCssSelectorParserMatcher.php index 4497334791c88..4e0dd23af12f7 100644 --- a/tests/phpunit/tests/html-api/wpCssSelectorParserMatcher.php +++ b/tests/phpunit/tests/html-api/wpCssSelectorParserMatcher.php @@ -16,6 +16,13 @@ class Tests_HtmlApi_WpCssSelectorParserMatcher extends WP_UnitTestCase { public function set_up(): void { parent::set_up(); $this->test_class = new class() extends WP_CSS_Selector_Parser_Matcher { + public function matches( $processor ): bool { + throw new Exeption( 'Matches called on test class.' ); + } + public static function parse( string $input, int &$offset ): ?static { + throw new Exeption( 'Parse called on test class.' ); + } + /* * Parsing */ From 3bfb8a13acbfa5a5360d4555bdc6102f4236d8e5 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 11 Dec 2024 19:41:30 +0100 Subject: [PATCH 127/129] Remove php 8+ ?static return types --- src/wp-includes/html-api/class-wp-css-attribute-selector.php | 2 +- src/wp-includes/html-api/class-wp-css-class-selector.php | 2 +- .../html-api/class-wp-css-complex-selector-list.php | 2 +- src/wp-includes/html-api/class-wp-css-complex-selector.php | 2 +- .../html-api/class-wp-css-compound-selector-list.php | 4 ++-- src/wp-includes/html-api/class-wp-css-compound-selector.php | 2 +- src/wp-includes/html-api/class-wp-css-id-selector.php | 2 +- .../html-api/class-wp-css-selector-parser-matcher.php | 2 +- src/wp-includes/html-api/class-wp-css-type-selector.php | 2 +- 9 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-css-attribute-selector.php b/src/wp-includes/html-api/class-wp-css-attribute-selector.php index d2d4d17792a81..dc3c13a5ea534 100644 --- a/src/wp-includes/html-api/class-wp-css-attribute-selector.php +++ b/src/wp-includes/html-api/class-wp-css-attribute-selector.php @@ -255,7 +255,7 @@ private function whitespace_delimited_list( string $input ): Generator { * will be updated if the parse is successful. * @return static|null The selector instance, or null if the parse was unsuccessful. */ - public static function parse( string $input, int &$offset ): ?static { + public static function parse( string $input, int &$offset ) { // Need at least 3 bytes [x] if ( $offset + 2 >= strlen( $input ) ) { return null; diff --git a/src/wp-includes/html-api/class-wp-css-class-selector.php b/src/wp-includes/html-api/class-wp-css-class-selector.php index ff7a0b0442813..57f7dac50315f 100644 --- a/src/wp-includes/html-api/class-wp-css-class-selector.php +++ b/src/wp-includes/html-api/class-wp-css-class-selector.php @@ -53,7 +53,7 @@ public function matches( WP_HTML_Tag_Processor $processor ): bool { * will be updated if the parse is successful. * @return static|null The selector instance, or null if the parse was unsuccessful. */ - public static function parse( string $input, int &$offset ): ?static { + public static function parse( string $input, int &$offset ) { if ( $offset + 1 >= strlen( $input ) || '.' !== $input[ $offset ] ) { return null; } diff --git a/src/wp-includes/html-api/class-wp-css-complex-selector-list.php b/src/wp-includes/html-api/class-wp-css-complex-selector-list.php index 940bd098c6c19..d819cd469086f 100644 --- a/src/wp-includes/html-api/class-wp-css-complex-selector-list.php +++ b/src/wp-includes/html-api/class-wp-css-complex-selector-list.php @@ -43,7 +43,7 @@ class WP_CSS_Complex_Selector_List extends WP_CSS_Compound_Selector_List { * will be updated if the parse is successful. * @return static|null The selector instance, or null if the parse was unsuccessful. */ - public static function parse( string $input, int &$offset ): ?static { + public static function parse( string $input, int &$offset ) { $selector = WP_CSS_Complex_Selector::parse( $input, $offset ); if ( null === $selector ) { return null; diff --git a/src/wp-includes/html-api/class-wp-css-complex-selector.php b/src/wp-includes/html-api/class-wp-css-complex-selector.php index 7c997c62a80f7..8c7c25ed7b984 100644 --- a/src/wp-includes/html-api/class-wp-css-complex-selector.php +++ b/src/wp-includes/html-api/class-wp-css-complex-selector.php @@ -200,7 +200,7 @@ private function explore_matches( array $selectors, array $breadcrumbs ): bool { * will be updated if the parse is successful. * @return static|null The selector instance, or null if the parse was unsuccessful. */ - public static function parse( string $input, int &$offset ): ?static { + public static function parse( string $input, int &$offset ) { if ( $offset >= strlen( $input ) ) { return null; } diff --git a/src/wp-includes/html-api/class-wp-css-compound-selector-list.php b/src/wp-includes/html-api/class-wp-css-compound-selector-list.php index 7edafc779ac4c..41cf76e2c90f6 100644 --- a/src/wp-includes/html-api/class-wp-css-compound-selector-list.php +++ b/src/wp-includes/html-api/class-wp-css-compound-selector-list.php @@ -121,7 +121,7 @@ protected function __construct( array $selectors ) { * @param string $input CSS selectors. * @return static|null */ - public static function from_selectors( string $input ): ?static { + public static function from_selectors( string $input ) { $input = self::normalize_selector_input( $input ); if ( '' === $input ) { @@ -142,7 +142,7 @@ public static function from_selectors( string $input ): ?static { * will be updated if the parse is successful. * @return static|null The selector instance, or null if the parse was unsuccessful. */ - public static function parse( string $input, int &$offset ): ?static { + public static function parse( string $input, int &$offset ) { $selector = WP_CSS_Compound_Selector::parse( $input, $offset ); if ( null === $selector ) { return null; diff --git a/src/wp-includes/html-api/class-wp-css-compound-selector.php b/src/wp-includes/html-api/class-wp-css-compound-selector.php index 077ed5aa4b7f3..91e543fdc7e7e 100644 --- a/src/wp-includes/html-api/class-wp-css-compound-selector.php +++ b/src/wp-includes/html-api/class-wp-css-compound-selector.php @@ -80,7 +80,7 @@ public function matches( WP_HTML_Tag_Processor $processor ): bool { * will be updated if the parse is successful. * @return static|null The selector instance, or null if the parse was unsuccessful. */ - public static function parse( string $input, int &$offset ): ?static { + public static function parse( string $input, int &$offset ) { if ( $offset >= strlen( $input ) ) { return null; } diff --git a/src/wp-includes/html-api/class-wp-css-id-selector.php b/src/wp-includes/html-api/class-wp-css-id-selector.php index 2c7cb6feec658..f0c203dc6477e 100644 --- a/src/wp-includes/html-api/class-wp-css-id-selector.php +++ b/src/wp-includes/html-api/class-wp-css-id-selector.php @@ -62,7 +62,7 @@ public function matches( WP_HTML_Tag_Processor $processor ): bool { * will be updated if the parse is successful. * @return static|null The selector instance, or null if the parse was unsuccessful. */ - public static function parse( string $input, int &$offset ): ?static { + public static function parse( string $input, int &$offset ) { $ident = self::parse_hash_token( $input, $offset ); if ( null === $ident ) { return null; diff --git a/src/wp-includes/html-api/class-wp-css-selector-parser-matcher.php b/src/wp-includes/html-api/class-wp-css-selector-parser-matcher.php index 6d665c4c26cb0..e2b56a7b9e55c 100644 --- a/src/wp-includes/html-api/class-wp-css-selector-parser-matcher.php +++ b/src/wp-includes/html-api/class-wp-css-selector-parser-matcher.php @@ -34,7 +34,7 @@ abstract public function matches( WP_HTML_Tag_Processor $processor ): bool; * will be updated if the parse is successful. * @return static|null The selector instance, or null if the parse was unsuccessful. */ - abstract public static function parse( string $input, int &$offset ): ?static; + abstract public static function parse( string $input, int &$offset ); /* * ------------------------ diff --git a/src/wp-includes/html-api/class-wp-css-type-selector.php b/src/wp-includes/html-api/class-wp-css-type-selector.php index ab41a87f1a113..c16883fa60679 100644 --- a/src/wp-includes/html-api/class-wp-css-type-selector.php +++ b/src/wp-includes/html-api/class-wp-css-type-selector.php @@ -70,7 +70,7 @@ public function matches_tag( string $tag_name ): bool { * will be updated if the parse is successful. * @return static|null The selector instance, or null if the parse was unsuccessful. */ - public static function parse( string $input, int &$offset ): ?static { + public static function parse( string $input, int &$offset ) { if ( $offset >= strlen( $input ) ) { return null; } From 8d2aef2f19c99a7e1401fc29d16e56a930bad948 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 11 Dec 2024 19:47:59 +0100 Subject: [PATCH 128/129] Fix typo in Exception class name --- tests/phpunit/tests/html-api/wpCssSelectorParserMatcher.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/phpunit/tests/html-api/wpCssSelectorParserMatcher.php b/tests/phpunit/tests/html-api/wpCssSelectorParserMatcher.php index 4e0dd23af12f7..bf84f30637510 100644 --- a/tests/phpunit/tests/html-api/wpCssSelectorParserMatcher.php +++ b/tests/phpunit/tests/html-api/wpCssSelectorParserMatcher.php @@ -17,10 +17,10 @@ public function set_up(): void { parent::set_up(); $this->test_class = new class() extends WP_CSS_Selector_Parser_Matcher { public function matches( $processor ): bool { - throw new Exeption( 'Matches called on test class.' ); + throw new Error( 'Matches called on test class.' ); } public static function parse( string $input, int &$offset ): ?static { - throw new Exeption( 'Parse called on test class.' ); + throw new Error( 'Parse called on test class.' ); } /* From 33b83338c827a67c48328dbf8f4c2dd70893ba9c Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 11 Dec 2024 19:48:37 +0100 Subject: [PATCH 129/129] Remove ?static return type from test --- tests/phpunit/tests/html-api/wpCssSelectorParserMatcher.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/phpunit/tests/html-api/wpCssSelectorParserMatcher.php b/tests/phpunit/tests/html-api/wpCssSelectorParserMatcher.php index bf84f30637510..29a76bfd78723 100644 --- a/tests/phpunit/tests/html-api/wpCssSelectorParserMatcher.php +++ b/tests/phpunit/tests/html-api/wpCssSelectorParserMatcher.php @@ -19,7 +19,7 @@ public function set_up(): void { public function matches( $processor ): bool { throw new Error( 'Matches called on test class.' ); } - public static function parse( string $input, int &$offset ): ?static { + public static function parse( string $input, int &$offset ) { throw new Error( 'Parse called on test class.' ); }