diff --git a/src/wp-includes/html-api/class-wp-html-attribute-token.php b/src/wp-includes/html-api/class-wp-html-attribute-token.php index f938609e41687..74d41320b1c79 100644 --- a/src/wp-includes/html-api/class-wp-html-attribute-token.php +++ b/src/wp-includes/html-api/class-wp-html-attribute-token.php @@ -15,6 +15,7 @@ * * @access private * @since 6.2.0 + * @since 6.5.0 Replaced `end` with `length` to more closely match `substr()`. * * @see WP_HTML_Tag_Processor */ @@ -23,6 +24,7 @@ class WP_HTML_Attribute_Token { * Attribute name. * * @since 6.2.0 + * * @var string */ public $name; @@ -31,6 +33,7 @@ class WP_HTML_Attribute_Token { * Attribute value. * * @since 6.2.0 + * * @var int */ public $value_starts_at; @@ -39,6 +42,7 @@ class WP_HTML_Attribute_Token { * How many bytes the value occupies in the input HTML. * * @since 6.2.0 + * * @var int */ public $value_length; @@ -47,22 +51,43 @@ class WP_HTML_Attribute_Token { * The string offset where the attribute name starts. * * @since 6.2.0 + * * @var int */ public $start; /** - * The string offset after the attribute value or its name. + * Byte length of text spanning the attribute inside a tag. + * + * This span starts at the first character of the attribute name + * and it ends after one of three cases: + * + * - at the end of the attribute name for boolean attributes. + * - at the end of the value for unquoted attributes. + * - at the final single or double quote for quoted attributes. + * + * Example: + * + *
+ * ------------ length is 12, including quotes + * + * + * ------- length is 6 + * + * + * ------------ length is 11 + * + * @since 6.5.0 Replaced `end` with `length` to more closely match `substr()`. * - * @since 6.2.0 * @var int */ - public $end; + public $length; /** * Whether the attribute is a boolean attribute with value `true`. * * @since 6.2.0 + * * @var bool */ public $is_true; @@ -71,20 +96,21 @@ class WP_HTML_Attribute_Token { * Constructor. * * @since 6.2.0 + * @since 6.5.0 Replaced `end` with `length` to more closely match `substr()`. * * @param string $name Attribute name. * @param int $value_start Attribute value. * @param int $value_length Number of bytes attribute value spans. * @param int $start The string offset where the attribute name starts. - * @param int $end The string offset after the attribute value or its name. + * @param int $length Byte length of the entire attribute name or name and value pair expression. * @param bool $is_true Whether the attribute is a boolean attribute with true value. */ - public function __construct( $name, $value_start, $value_length, $start, $end, $is_true ) { + public function __construct( $name, $value_start, $value_length, $start, $length, $is_true ) { $this->name = $name; $this->value_starts_at = $value_start; $this->value_length = $value_length; $this->start = $start; - $this->end = $end; + $this->length = $length; $this->is_true = $is_true; } } diff --git a/src/wp-includes/html-api/class-wp-html-span.php b/src/wp-includes/html-api/class-wp-html-span.php index 46227ebd02997..b1ab865af3bed 100644 --- a/src/wp-includes/html-api/class-wp-html-span.php +++ b/src/wp-includes/html-api/class-wp-html-span.php @@ -18,6 +18,7 @@ * * @access private * @since 6.2.0 + * @since 6.5.0 Replaced `end` with `length` to more closely align with `substr()`. * * @see WP_HTML_Tag_Processor */ @@ -26,28 +27,30 @@ class WP_HTML_Span { * Byte offset into document where span begins. * * @since 6.2.0 + * * @var int */ public $start; /** - * Byte offset into document where span ends. + * Byte length of this span. + * + * @since 6.5.0 * - * @since 6.2.0 * @var int */ - public $end; + public $length; /** * Constructor. * * @since 6.2.0 * - * @param int $start Byte offset into document where replacement span begins. - * @param int $end Byte offset into document where replacement span ends. + * @param int $start Byte offset into document where replacement span begins. + * @param int $length Byte length of span. */ - public function __construct( $start, $end ) { - $this->start = $start; - $this->end = $end; + public function __construct( $start, $length ) { + $this->start = $start; + $this->length = $length; } } diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index ea74410230954..17b3f400fcea6 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -329,47 +329,68 @@ class WP_HTML_Tag_Processor { private $bytes_already_parsed = 0; /** - * Byte offset in input document where current tag name starts. + * Byte offset in input document where current token starts. * * Example: * *
... * 01234 - * - tag name starts at 1 + * - token starts at 0 + * + * @since 6.5.0 * - * @since 6.2.0 * @var int|null */ - private $tag_name_starts_at; + private $token_starts_at; /** - * Byte length of current tag name. + * Byte length of current token. + * + * Example: + * + *
... + * 012345678901234 + * - token length is 14 - 0 = 14 + * + * a is a token. + * 0123456789 123456789 123456789 + * - token length is 17 - 2 = 15 + * + * @since 6.5.0 + * + * @var int|null + */ + private $token_length; + + /** + * Byte offset in input document where current tag name starts. * * Example: * *
... * 01234 - * --- tag name length is 3 + * - tag name starts at 1 * * @since 6.2.0 + * * @var int|null */ - private $tag_name_length; + private $tag_name_starts_at; /** - * Byte offset in input document where current tag token ends. + * Byte length of current tag name. * * Example: * *
... - * 0 1 | - * 01234567890123456 - * --- tag name ends at 14 + * 01234 + * --- tag name length is 3 * * @since 6.2.0 + * * @var int|null */ - private $tag_ends_at; + private $tag_name_length; /** * Whether the current tag is an opening tag, e.g.
, or a closing tag, e.g.
. @@ -388,14 +409,14 @@ class WP_HTML_Tag_Processor { * //
* // ^ parsing will continue from this point. * $this->attributes = array( - * 'id' => new WP_HTML_Attribute_Match( 'id', null, 6, 17 ) + * 'id' => new WP_HTML_Attribute_Token( 'id', 9, 6, 5, 11, false ) * ); * * // When picking up parsing again, or when asking to find the * // `class` attribute we will continue and add to this array. * $this->attributes = array( - * 'id' => new WP_HTML_Attribute_Match( 'id', null, 6, 17 ), - * 'class' => new WP_HTML_Attribute_Match( 'class', 'outline', 18, 32 ) + * 'id' => new WP_HTML_Attribute_Token( 'id', 9, 6, 5, 11, false ), + * 'class' => new WP_HTML_Attribute_Token( 'class', 23, 7, 17, 13, false ) * ); * * // Note that only the `class` attribute value is stored in the index. @@ -484,9 +505,9 @@ class WP_HTML_Tag_Processor { * * // Replace an attribute stored with a new value, indices * // sourced from the lazily-parsed HTML recognizer. - * $start = $attributes['src']->start; - * $end = $attributes['src']->end; - * $modifications[] = new WP_HTML_Text_Replacement( $start, $end, $new_value ); + * $start = $attributes['src']->start; + * $length = $attributes['src']->length; + * $modifications[] = new WP_HTML_Text_Replacement( $start, $length, $new_value ); * * // Correspondingly, something like this will appear in this array. * $lexical_updates = array( @@ -566,7 +587,7 @@ public function next_tag( $query = null ) { if ( false === $tag_ends_at ) { return false; } - $this->tag_ends_at = $tag_ends_at; + $this->token_length = $tag_ends_at - $this->token_starts_at; $this->bytes_already_parsed = $tag_ends_at; // Finally, check if the parsed tag and its attributes match the search query. @@ -808,10 +829,7 @@ public function set_bookmark( $name ) { return false; } - $this->bookmarks[ $name ] = new WP_HTML_Span( - $this->tag_name_starts_at - ( $this->is_closing_tag ? 2 : 1 ), - $this->tag_ends_at - ); + $this->bookmarks[ $name ] = new WP_HTML_Span( $this->token_starts_at, $this->token_length ); return true; } @@ -875,7 +893,7 @@ private function skip_rcdata( $tag_name ) { while ( false !== $at && $at < $doc_length ) { $at = strpos( $this->html, '= $doc_length ) { $this->bytes_already_parsed = $doc_length; return false; @@ -1093,6 +1111,8 @@ private function parse_next_tag() { return false; } + $this->token_starts_at = $at; + if ( '/' === $this->html[ $at + 1 ] ) { $this->is_closing_tag = true; ++$at; @@ -1381,7 +1401,7 @@ private function parse_next_attribute() { $value_start, $value_length, $attribute_start, - $attribute_end, + $attribute_end - $attribute_start, ! $has_value ); @@ -1396,7 +1416,7 @@ private function parse_next_attribute() { * an array when encountering duplicates avoids needless allocations in the * normative case of parsing tags with no duplicate attributes. */ - $duplicate_span = new WP_HTML_Span( $attribute_start, $attribute_end ); + $duplicate_span = new WP_HTML_Span( $attribute_start, $attribute_end - $attribute_start ); if ( null === $this->duplicate_attributes ) { $this->duplicate_attributes = array( $comparable_name => array( $duplicate_span ) ); } elseif ( ! array_key_exists( $comparable_name, $this->duplicate_attributes ) ) { @@ -1424,9 +1444,10 @@ private function skip_whitespace() { */ private function after_tag() { $this->get_updated_html(); + $this->token_starts_at = null; + $this->token_length = null; $this->tag_name_starts_at = null; $this->tag_name_length = null; - $this->tag_ends_at = null; $this->is_closing_tag = null; $this->attributes = array(); $this->duplicate_attributes = null; @@ -1606,7 +1627,7 @@ private function apply_attributes_updates( $shift_this_point = 0 ) { $bytes_already_copied = 0; $output_buffer = ''; foreach ( $this->lexical_updates as $diff ) { - $shift = strlen( $diff->text ) - ( $diff->end - $diff->start ); + $shift = strlen( $diff->text ) - $diff->length; // Adjust the cursor position by however much an update affects it. if ( $diff->start <= $this->bytes_already_parsed ) { @@ -1620,7 +1641,7 @@ private function apply_attributes_updates( $shift_this_point = 0 ) { $output_buffer .= substr( $this->html, $bytes_already_copied, $diff->start - $bytes_already_copied ); $output_buffer .= $diff->text; - $bytes_already_copied = $diff->end; + $bytes_already_copied = $diff->start + $diff->length; } $this->html = $output_buffer . substr( $this->html, $bytes_already_copied ); @@ -1630,6 +1651,8 @@ private function apply_attributes_updates( $shift_this_point = 0 ) { * replacements adjust offsets in the input document. */ foreach ( $this->bookmarks as $bookmark_name => $bookmark ) { + $bookmark_end = $bookmark->start + $bookmark->length; + /* * Each lexical update which appears before the bookmark's endpoints * might shift the offsets for those endpoints. Loop through each change @@ -1640,28 +1663,30 @@ private function apply_attributes_updates( $shift_this_point = 0 ) { $tail_delta = 0; foreach ( $this->lexical_updates as $diff ) { - if ( $bookmark->start < $diff->start && $bookmark->end < $diff->start ) { + $diff_end = $diff->start + $diff->length; + + if ( $bookmark->start < $diff->start && $bookmark_end < $diff->start ) { break; } - if ( $bookmark->start >= $diff->start && $bookmark->end < $diff->end ) { + if ( $bookmark->start >= $diff->start && $bookmark_end < $diff_end ) { $this->release_bookmark( $bookmark_name ); continue 2; } - $delta = strlen( $diff->text ) - ( $diff->end - $diff->start ); + $delta = strlen( $diff->text ) - $diff->length; if ( $bookmark->start >= $diff->start ) { $head_delta += $delta; } - if ( $bookmark->end >= $diff->end ) { + if ( $bookmark_end >= $diff_end ) { $tail_delta += $delta; } } - $bookmark->start += $head_delta; - $bookmark->end += $tail_delta; + $bookmark->start += $head_delta; + $bookmark->length += $tail_delta - $head_delta; } $this->lexical_updates = array(); @@ -1743,7 +1768,7 @@ private static function sort_start_ascending( $a, $b ) { * This code should be unreachable, because it implies the two replacements * start at the same location and contain the same text. */ - return $a->end - $b->end; + return $a->length - $b->length; } /** @@ -1971,7 +1996,15 @@ public function has_self_closing_flag() { return false; } - return '/' === $this->html[ $this->tag_ends_at - 1 ]; + /* + * The self-closing flag is the solidus at the _end_ of the tag, not the beginning. + * + * Example: + * + *
+ * ^ this appears one character before the end of the closing ">". + */ + return '/' === $this->html[ $this->token_starts_at + $this->token_length - 1 ]; } /** @@ -2101,7 +2134,7 @@ public function set_attribute( $name, $value ) { $existing_attribute = $this->attributes[ $comparable_name ]; $this->lexical_updates[ $comparable_name ] = new WP_HTML_Text_Replacement( $existing_attribute->start, - $existing_attribute->end, + $existing_attribute->length, $updated_attribute ); } else { @@ -2119,7 +2152,7 @@ public function set_attribute( $name, $value ) { */ $this->lexical_updates[ $comparable_name ] = new WP_HTML_Text_Replacement( $this->tag_name_starts_at + $this->tag_name_length, - $this->tag_name_starts_at + $this->tag_name_length, + 0, ' ' . $updated_attribute ); } @@ -2194,7 +2227,7 @@ public function remove_attribute( $name ) { */ $this->lexical_updates[ $name ] = new WP_HTML_Text_Replacement( $this->attributes[ $name ]->start, - $this->attributes[ $name ]->end, + $this->attributes[ $name ]->length, '' ); @@ -2203,7 +2236,7 @@ public function remove_attribute( $name ) { foreach ( $this->duplicate_attributes[ $name ] as $attribute_token ) { $this->lexical_updates[] = new WP_HTML_Text_Replacement( $attribute_token->start, - $attribute_token->end, + $attribute_token->length, '' ); } @@ -2289,7 +2322,7 @@ public function get_updated_html() { * Keep track of the position right before the current tag. This will * be necessary for reparsing the current tag after updating the HTML. */ - $before_current_tag = $this->tag_name_starts_at - 1; + $before_current_tag = $this->token_starts_at; /* * 1. Apply the enqueued edits and update all the pointers to reflect those changes. @@ -2325,7 +2358,7 @@ public function get_updated_html() { } $tag_ends_at = strpos( $this->html, '>', $this->bytes_already_parsed ); - $this->tag_ends_at = $tag_ends_at; + $this->token_length = $tag_ends_at - $this->token_starts_at; $this->bytes_already_parsed = $tag_ends_at; return $this->html; diff --git a/src/wp-includes/html-api/class-wp-html-text-replacement.php b/src/wp-includes/html-api/class-wp-html-text-replacement.php index 26b7bb2d28630..4b8a6a6aa289d 100644 --- a/src/wp-includes/html-api/class-wp-html-text-replacement.php +++ b/src/wp-includes/html-api/class-wp-html-text-replacement.php @@ -15,6 +15,7 @@ * * @access private * @since 6.2.0 + * @since 6.5.0 Replace `end` with `length` to more closely match `substr()`. * * @see WP_HTML_Tag_Processor */ @@ -23,22 +24,25 @@ class WP_HTML_Text_Replacement { * Byte offset into document where replacement span begins. * * @since 6.2.0 + * * @var int */ public $start; /** - * Byte offset into document where replacement span ends. + * Byte length of span being replaced. + * + * @since 6.5.0 * - * @since 6.2.0 * @var int */ - public $end; + public $length; /** * Span of text to insert in document to replace existing content from start to end. * * @since 6.2.0 + * * @var string */ public $text; @@ -48,13 +52,13 @@ class WP_HTML_Text_Replacement { * * @since 6.2.0 * - * @param int $start Byte offset into document where replacement span begins. - * @param int $end Byte offset into document where replacement span ends. - * @param string $text Span of text to insert in document to replace existing content from start to end. + * @param int $start Byte offset into document where replacement span begins. + * @param int $length Byte length of span in document being replaced. + * @param string $text Span of text to insert in document to replace existing content from start to end. */ - public function __construct( $start, $end, $text ) { - $this->start = $start; - $this->end = $end; - $this->text = $text; + public function __construct( $start, $length, $text ) { + $this->start = $start; + $this->length = $length; + $this->text = $text; } }