diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index 74cb2c7a4ef16..a487063da2e3b 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -672,7 +672,6 @@ public function next_token() { $this->tag_ends_at = $tag_ends_at; $this->bytes_already_parsed = min( strlen( $this->html ) - 1, $tag_ends_at + 1 ); - $this->continuation_state = self::STATE_IN_TAG; $this->last_token_type = self::ELEMENT_NODE; /* @@ -712,7 +711,6 @@ public function next_token() { $tag_name_length = $this->tag_name_length; $tag_ends_at = $this->tag_ends_at; - $this->last_token_type = self::ELEMENT_NODE; $this->token_starts_at = $was_at; $this->text_starts_at = $this->tag_ends_at + 1; @@ -750,6 +748,8 @@ public function next_token() { return false; } + $this->token_starts_at = $was_at; + $this->text_starts_at = $tag_ends_at + 1; $this->text_length = $this->tag_name_starts_at - $this->text_starts_at; $this->token_length = $this->bytes_already_parsed - $was_at; $this->tag_name_starts_at = $tag_name_starts_at; @@ -1221,8 +1221,9 @@ private function skip_script_data() { private function parse_next_tag() { $this->after_tag(); - $html = $this->html; - $at = $this->bytes_already_parsed; + $html = $this->html; + $at = $this->bytes_already_parsed; + $was_at = $at; if ( $at >= strlen( $this->html ) ) { $this->continuation_state = self::STATE_COMPLETE; $this->bytes_already_parsed = strlen( $this->html ); @@ -1231,16 +1232,15 @@ private function parse_next_tag() { $at = strpos( $html, '<', $at ); if ( false === $at ) { - $this->continuation_state = self::STATE_COMPLETE; - $this->last_token_type = self::TEXT_NODE; - $this->bytes_already_parsed = strlen( $this->html ); - return false; + $at = strlen( $this->html ); } if ( $at > $this->bytes_already_parsed ) { $this->last_token_type = self::TEXT_NODE; - $this->tag_name_starts_at = $this->bytes_already_parsed; - $this->tag_name_length = $at - $this->tag_name_starts_at; + $this->token_starts_at = $was_at; + $this->text_starts_at = $was_at; + $this->text_length = $at - $was_at; + $this->token_length = $at - $was_at; $this->bytes_already_parsed = $at; return true; } @@ -1269,11 +1269,13 @@ private function parse_next_tag() { $tag_name_prefix_length = strspn( $html, 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ', $at + 1 ); if ( $tag_name_prefix_length > 0 ) { ++$at; + $this->last_token_type = self::ELEMENT_NODE; + $this->token_starts_at = $was_at; + $this->text_starts_at = null; + $this->text_length = null; $this->tag_name_length = $tag_name_prefix_length + strcspn( $html, " \t\f\r\n/>", $at + $tag_name_prefix_length ); $this->tag_name_starts_at = $at; $this->bytes_already_parsed = $at + $this->tag_name_length; - $this->continuation_state = self::STATE_IN_TAG; - $this->last_token_type = self::ELEMENT_NODE; return true; } @@ -1282,8 +1284,8 @@ private function parse_next_tag() { * the document. There is nothing left to parse. */ if ( $at + 1 >= strlen( $html ) ) { - $this->continuation_state = self::STATE_COMPLETE; - $this->last_token_type = self::TEXT_NODE; + $this->continuation_state = self::STATE_INCOMPLETE; + $this->bytes_already_parsed = $was_at; return false; } @@ -1304,8 +1306,8 @@ private function parse_next_tag() { $closer_at = $at + 4; // If it's not possible to close the comment then there is nothing more to scan. if ( strlen( $html ) <= $closer_at ) { - $this->continuation_state = self::STATE_COMPLETE; - $this->bytes_already_parsed = strlen( $this->html ); + $this->continuation_state = self::STATE_INCOMPLETE; + $this->bytes_already_parsed = $was_at; return false; } @@ -1313,9 +1315,10 @@ private function parse_next_tag() { $span_of_dashes = strspn( $html, '-', $closer_at ); if ( '>' === $html[ $closer_at + $span_of_dashes ] ) { $this->last_token_type = self::COMMENT_NODE; - $this->tag_name_starts_at = $at; - $this->tag_name_length = max( 0, $span_of_dashes - 2 ); - $this->tag_ends_at = $closer_at + $span_of_dashes; + $this->token_starts_at = $was_at; + $this->text_starts_at = $was_at + 3; + $this->text_length = max( 0, $span_of_dashes - 2 ); + $this->token_length = $closer_at + $span_of_dashes + 1 - $was_at; // @todo this seems wrong to add 1 here; why? $this->bytes_already_parsed = $closer_at + $span_of_dashes + 1; return true; @@ -1331,23 +1334,27 @@ private function parse_next_tag() { while ( ++$closer_at < strlen( $html ) ) { $closer_at = strpos( $html, '--', $closer_at ); if ( false === $closer_at ) { + $this->continuation_state = self::STATE_INCOMPLETE; + $this->bytes_already_parsed = $was_at; return false; } if ( $closer_at + 2 < strlen( $html ) && '>' === $html[ $closer_at + 2 ] ) { $this->last_token_type = self::COMMENT_NODE; - $this->tag_name_starts_at = $at; - $this->tag_name_length = $closer_at - $at - 4; - $this->tag_ends_at = $closer_at + 3; + $this->token_starts_at = $was_at; + $this->text_starts_at = $was_at + 4; + $this->text_length = $closer_at - $this->text_starts_at; + $this->token_length = $closer_at + 2 - $was_at; $this->bytes_already_parsed = $closer_at + 3; return true; } if ( $closer_at + 3 < strlen( $html ) && '!' === $html[ $closer_at + 2 ] && '>' === $html[ $closer_at + 3 ] ) { $this->last_token_type = self::COMMENT_NODE; - $this->tag_name_starts_at = $at; - $this->tag_name_length = $closer_at - $at - 4; - $this->tag_ends_at = $closer_at + 4; + $this->token_starts_at = $was_at; + $this->text_starts_at = $was_at + 4; + $this->text_length = $closer_at - $this->text_starts_at; + $this->token_length = $closer_at + 3 - $was_at; $this->bytes_already_parsed = $closer_at + 4; return true; } @@ -1371,14 +1378,17 @@ private function parse_next_tag() { ) { $closer_at = strpos( $html, ']]>', $at + 9 ); if ( false === $closer_at ) { + $this->continuation_state = self::STATE_INCOMPLETE; + $this->bytes_already_parsed = $was_at; return false; } - $this->tag_name_starts_at = $at; - $this->tag_name_length = $closer_at + 3 - $this->tag_name_starts_at; + $this->last_token_type = self::CDATA_SECTION_NODE; + $this->token_starts_at = $was_at; + $this->text_starts_at = $at + 9; + $this->text_length = $closer_at - $this->text_starts_at; + $this->token_length = $closer_at + 3 - $was_at; $this->bytes_already_parsed = $closer_at + 3; - - $this->last_token_type = self::CDATA_SECTION_NODE; return true; } @@ -1399,14 +1409,14 @@ private function parse_next_tag() { ) { $closer_at = strpos( $html, '>', $at + 9 ); if ( false === $closer_at ) { - $this->continuation_state = self::STATE_COMPLETE; - $this->bytes_already_parsed = strlen( $this->html ); + $this->continuation_state = self::STATE_INCOMPLETE; + $this->bytes_already_parsed = $was_at; return false; } $this->last_token_type = self::DOCUMENT_TYPE_NODE; - $this->tag_name_starts_at = $at; - $this->tag_name_length = $closer_at - $at - 9; + $this->token_starts_at = $was_at; + $this->token_length = $closer_at + 1 - $was_at; $this->bytes_already_parsed = $closer_at + 1; return true; } @@ -1417,14 +1427,17 @@ private function parse_next_tag() { */ $closer_at = strpos( $html, '>', $at + 1 ); if ( false === $closer_at ) { - $this->continuation_state = self::STATE_COMPLETE; - $this->bytes_already_parsed = strlen( $this->html ); + $this->continuation_state = self::STATE_INCOMPLETE; + $this->bytes_already_parsed = $was_at; return false; } + $this->last_token_type = self::COMMENT_NODE; + $this->token_starts_at = $was_at; + $this->text_starts_at = $was_at + 2; + $this->text_length = $closer_at - $this->text_starts_at; + $this->token_length = $closer_at + 1 - $was_at; $this->bytes_already_parsed = $closer_at + 1; - $this->tag_name_starts_at = $at; - $this->tag_name_length = $closer_at - $at; return true; } @@ -1435,7 +1448,7 @@ private function parse_next_tag() { */ if ( '>' === $html[ $at + 1 ] ) { $this->last_token_type = self::EMPTY_END_TAG; - $this->bytes_already_parsed = $at + 1; + $this->bytes_already_parsed = $at + 2; return true; } @@ -1446,14 +1459,16 @@ private function parse_next_tag() { if ( '?' === $html[ $at + 1 ] ) { $closer_at = strpos( $html, '>', $at + 2 ); if ( false === $closer_at ) { - $this->bytes_already_parsed = strlen( $this->html ); - $this->continuation_state = self::STATE_COMPLETE; + $this->continuation_state = self::STATE_INCOMPLETE; + $this->bytes_already_parsed = $was_at; return false; } $this->last_token_type = self::COMMENT_NODE; - $this->tag_name_starts_at = $at; - $this->tag_name_length = $closer_at - $at; + $this->token_starts_at = $was_at; + $this->text_starts_at = $at + 2; + $this->text_length = $closer_at - $this->text_starts_at; + $this->token_length = $closer_at + 1 - $was_at; $this->bytes_already_parsed = $closer_at + 1; return true; } @@ -1467,15 +1482,16 @@ private function parse_next_tag() { if ( $this->is_closing_tag ) { $closer_at = strpos( $html, '>', $at + 3 ); if ( false === $closer_at ) { - $this->bytes_already_parsed = strlen( $this->html ); - $this->continuation_state = self::STATE_COMPLETE; + $this->continuation_state = self::STATE_INCOMPLETE; + $this->bytes_already_parsed = $was_at; return false; } $this->last_token_type = self::WP_FUNKY_COMMENT_NODE; - $this->tag_name_starts_at = $at - 1; - $this->tag_name_length = $closer_at - $at - 1; - $this->tag_ends_at = $closer_at; + $this->token_starts_at = $was_at; + $this->text_starts_at = $was_at + 2; + $this->text_length = $closer_at - $this->text_starts_at; + $this->token_length = $closer_at + 1 - $was_at; $this->bytes_already_parsed = $closer_at + 1; return true; } @@ -2169,6 +2185,9 @@ public function get_node_name() { // @todo: the browser returns "html" for the DOCTYPE node type. return 'html'; + case self::EMPTY_END_TAG: + return '#empty-end-tag'; + case self::WP_FUNKY_COMMENT_NODE: return '#funky-comment'; } @@ -2177,6 +2196,10 @@ public function get_node_name() { public function get_node_text() { switch ( $this->last_token_type ) { case self::ELEMENT_NODE: + if ( $this->is_closing_tag ) { + return null; + } + switch ( $this->get_tag() ) { case 'IFRAME': case 'NOEMBED': @@ -2184,6 +2207,7 @@ public function get_node_text() { case 'NOSCRIPT': case 'SCRIPT': case 'STYLE': + case 'TEXTAREA': case 'TITLE': return substr( $this->html, @@ -2196,31 +2220,13 @@ public function get_node_text() { } case self::TEXT_NODE: - return substr( - $this->html, - $this->tag_name_starts_at, - $this->tag_name_length - ); - case self::CDATA_SECTION_NODE: - return substr( - $this->html, - $this->tag_name_starts_at + 9, - $this->tag_name_length - 9 - 3 - ); - case self::COMMENT_NODE: - return substr( - $this->html, - $this->tag_name_starts_at + 4, - $this->tag_name_length - ); - case self::WP_FUNKY_COMMENT_NODE: return substr( $this->html, - $this->tag_name_starts_at + 2, - $this->tag_name_length + $this->text_starts_at, + $this->text_length ); } }