diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index 74cb2c7a4ef16..a487063da2e3b 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -672,7 +672,6 @@ public function next_token() {
$this->tag_ends_at = $tag_ends_at;
$this->bytes_already_parsed = min( strlen( $this->html ) - 1, $tag_ends_at + 1 );
- $this->continuation_state = self::STATE_IN_TAG;
$this->last_token_type = self::ELEMENT_NODE;
/*
@@ -712,7 +711,6 @@ public function next_token() {
$tag_name_length = $this->tag_name_length;
$tag_ends_at = $this->tag_ends_at;
- $this->last_token_type = self::ELEMENT_NODE;
$this->token_starts_at = $was_at;
$this->text_starts_at = $this->tag_ends_at + 1;
@@ -750,6 +748,8 @@ public function next_token() {
return false;
}
+ $this->token_starts_at = $was_at;
+ $this->text_starts_at = $tag_ends_at + 1;
$this->text_length = $this->tag_name_starts_at - $this->text_starts_at;
$this->token_length = $this->bytes_already_parsed - $was_at;
$this->tag_name_starts_at = $tag_name_starts_at;
@@ -1221,8 +1221,9 @@ private function skip_script_data() {
private function parse_next_tag() {
$this->after_tag();
- $html = $this->html;
- $at = $this->bytes_already_parsed;
+ $html = $this->html;
+ $at = $this->bytes_already_parsed;
+ $was_at = $at;
if ( $at >= strlen( $this->html ) ) {
$this->continuation_state = self::STATE_COMPLETE;
$this->bytes_already_parsed = strlen( $this->html );
@@ -1231,16 +1232,15 @@ private function parse_next_tag() {
$at = strpos( $html, '<', $at );
if ( false === $at ) {
- $this->continuation_state = self::STATE_COMPLETE;
- $this->last_token_type = self::TEXT_NODE;
- $this->bytes_already_parsed = strlen( $this->html );
- return false;
+ $at = strlen( $this->html );
}
if ( $at > $this->bytes_already_parsed ) {
$this->last_token_type = self::TEXT_NODE;
- $this->tag_name_starts_at = $this->bytes_already_parsed;
- $this->tag_name_length = $at - $this->tag_name_starts_at;
+ $this->token_starts_at = $was_at;
+ $this->text_starts_at = $was_at;
+ $this->text_length = $at - $was_at;
+ $this->token_length = $at - $was_at;
$this->bytes_already_parsed = $at;
return true;
}
@@ -1269,11 +1269,13 @@ private function parse_next_tag() {
$tag_name_prefix_length = strspn( $html, 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ', $at + 1 );
if ( $tag_name_prefix_length > 0 ) {
++$at;
+ $this->last_token_type = self::ELEMENT_NODE;
+ $this->token_starts_at = $was_at;
+ $this->text_starts_at = null;
+ $this->text_length = null;
$this->tag_name_length = $tag_name_prefix_length + strcspn( $html, " \t\f\r\n/>", $at + $tag_name_prefix_length );
$this->tag_name_starts_at = $at;
$this->bytes_already_parsed = $at + $this->tag_name_length;
- $this->continuation_state = self::STATE_IN_TAG;
- $this->last_token_type = self::ELEMENT_NODE;
return true;
}
@@ -1282,8 +1284,8 @@ private function parse_next_tag() {
* the document. There is nothing left to parse.
*/
if ( $at + 1 >= strlen( $html ) ) {
- $this->continuation_state = self::STATE_COMPLETE;
- $this->last_token_type = self::TEXT_NODE;
+ $this->continuation_state = self::STATE_INCOMPLETE;
+ $this->bytes_already_parsed = $was_at;
return false;
}
@@ -1304,8 +1306,8 @@ private function parse_next_tag() {
$closer_at = $at + 4;
// If it's not possible to close the comment then there is nothing more to scan.
if ( strlen( $html ) <= $closer_at ) {
- $this->continuation_state = self::STATE_COMPLETE;
- $this->bytes_already_parsed = strlen( $this->html );
+ $this->continuation_state = self::STATE_INCOMPLETE;
+ $this->bytes_already_parsed = $was_at;
return false;
}
@@ -1313,9 +1315,10 @@ private function parse_next_tag() {
$span_of_dashes = strspn( $html, '-', $closer_at );
if ( '>' === $html[ $closer_at + $span_of_dashes ] ) {
$this->last_token_type = self::COMMENT_NODE;
- $this->tag_name_starts_at = $at;
- $this->tag_name_length = max( 0, $span_of_dashes - 2 );
- $this->tag_ends_at = $closer_at + $span_of_dashes;
+ $this->token_starts_at = $was_at;
+ $this->text_starts_at = $was_at + 3;
+ $this->text_length = max( 0, $span_of_dashes - 2 );
+ $this->token_length = $closer_at + $span_of_dashes + 1 - $was_at;
// @todo this seems wrong to add 1 here; why?
$this->bytes_already_parsed = $closer_at + $span_of_dashes + 1;
return true;
@@ -1331,23 +1334,27 @@ private function parse_next_tag() {
while ( ++$closer_at < strlen( $html ) ) {
$closer_at = strpos( $html, '--', $closer_at );
if ( false === $closer_at ) {
+ $this->continuation_state = self::STATE_INCOMPLETE;
+ $this->bytes_already_parsed = $was_at;
return false;
}
if ( $closer_at + 2 < strlen( $html ) && '>' === $html[ $closer_at + 2 ] ) {
$this->last_token_type = self::COMMENT_NODE;
- $this->tag_name_starts_at = $at;
- $this->tag_name_length = $closer_at - $at - 4;
- $this->tag_ends_at = $closer_at + 3;
+ $this->token_starts_at = $was_at;
+ $this->text_starts_at = $was_at + 4;
+ $this->text_length = $closer_at - $this->text_starts_at;
+ $this->token_length = $closer_at + 2 - $was_at;
$this->bytes_already_parsed = $closer_at + 3;
return true;
}
if ( $closer_at + 3 < strlen( $html ) && '!' === $html[ $closer_at + 2 ] && '>' === $html[ $closer_at + 3 ] ) {
$this->last_token_type = self::COMMENT_NODE;
- $this->tag_name_starts_at = $at;
- $this->tag_name_length = $closer_at - $at - 4;
- $this->tag_ends_at = $closer_at + 4;
+ $this->token_starts_at = $was_at;
+ $this->text_starts_at = $was_at + 4;
+ $this->text_length = $closer_at - $this->text_starts_at;
+ $this->token_length = $closer_at + 3 - $was_at;
$this->bytes_already_parsed = $closer_at + 4;
return true;
}
@@ -1371,14 +1378,17 @@ private function parse_next_tag() {
) {
$closer_at = strpos( $html, ']]>', $at + 9 );
if ( false === $closer_at ) {
+ $this->continuation_state = self::STATE_INCOMPLETE;
+ $this->bytes_already_parsed = $was_at;
return false;
}
- $this->tag_name_starts_at = $at;
- $this->tag_name_length = $closer_at + 3 - $this->tag_name_starts_at;
+ $this->last_token_type = self::CDATA_SECTION_NODE;
+ $this->token_starts_at = $was_at;
+ $this->text_starts_at = $at + 9;
+ $this->text_length = $closer_at - $this->text_starts_at;
+ $this->token_length = $closer_at + 3 - $was_at;
$this->bytes_already_parsed = $closer_at + 3;
-
- $this->last_token_type = self::CDATA_SECTION_NODE;
return true;
}
@@ -1399,14 +1409,14 @@ private function parse_next_tag() {
) {
$closer_at = strpos( $html, '>', $at + 9 );
if ( false === $closer_at ) {
- $this->continuation_state = self::STATE_COMPLETE;
- $this->bytes_already_parsed = strlen( $this->html );
+ $this->continuation_state = self::STATE_INCOMPLETE;
+ $this->bytes_already_parsed = $was_at;
return false;
}
$this->last_token_type = self::DOCUMENT_TYPE_NODE;
- $this->tag_name_starts_at = $at;
- $this->tag_name_length = $closer_at - $at - 9;
+ $this->token_starts_at = $was_at;
+ $this->token_length = $closer_at + 1 - $was_at;
$this->bytes_already_parsed = $closer_at + 1;
return true;
}
@@ -1417,14 +1427,17 @@ private function parse_next_tag() {
*/
$closer_at = strpos( $html, '>', $at + 1 );
if ( false === $closer_at ) {
- $this->continuation_state = self::STATE_COMPLETE;
- $this->bytes_already_parsed = strlen( $this->html );
+ $this->continuation_state = self::STATE_INCOMPLETE;
+ $this->bytes_already_parsed = $was_at;
return false;
}
+
$this->last_token_type = self::COMMENT_NODE;
+ $this->token_starts_at = $was_at;
+ $this->text_starts_at = $was_at + 2;
+ $this->text_length = $closer_at - $this->text_starts_at;
+ $this->token_length = $closer_at + 1 - $was_at;
$this->bytes_already_parsed = $closer_at + 1;
- $this->tag_name_starts_at = $at;
- $this->tag_name_length = $closer_at - $at;
return true;
}
@@ -1435,7 +1448,7 @@ private function parse_next_tag() {
*/
if ( '>' === $html[ $at + 1 ] ) {
$this->last_token_type = self::EMPTY_END_TAG;
- $this->bytes_already_parsed = $at + 1;
+ $this->bytes_already_parsed = $at + 2;
return true;
}
@@ -1446,14 +1459,16 @@ private function parse_next_tag() {
if ( '?' === $html[ $at + 1 ] ) {
$closer_at = strpos( $html, '>', $at + 2 );
if ( false === $closer_at ) {
- $this->bytes_already_parsed = strlen( $this->html );
- $this->continuation_state = self::STATE_COMPLETE;
+ $this->continuation_state = self::STATE_INCOMPLETE;
+ $this->bytes_already_parsed = $was_at;
return false;
}
$this->last_token_type = self::COMMENT_NODE;
- $this->tag_name_starts_at = $at;
- $this->tag_name_length = $closer_at - $at;
+ $this->token_starts_at = $was_at;
+ $this->text_starts_at = $at + 2;
+ $this->text_length = $closer_at - $this->text_starts_at;
+ $this->token_length = $closer_at + 1 - $was_at;
$this->bytes_already_parsed = $closer_at + 1;
return true;
}
@@ -1467,15 +1482,16 @@ private function parse_next_tag() {
if ( $this->is_closing_tag ) {
$closer_at = strpos( $html, '>', $at + 3 );
if ( false === $closer_at ) {
- $this->bytes_already_parsed = strlen( $this->html );
- $this->continuation_state = self::STATE_COMPLETE;
+ $this->continuation_state = self::STATE_INCOMPLETE;
+ $this->bytes_already_parsed = $was_at;
return false;
}
$this->last_token_type = self::WP_FUNKY_COMMENT_NODE;
- $this->tag_name_starts_at = $at - 1;
- $this->tag_name_length = $closer_at - $at - 1;
- $this->tag_ends_at = $closer_at;
+ $this->token_starts_at = $was_at;
+ $this->text_starts_at = $was_at + 2;
+ $this->text_length = $closer_at - $this->text_starts_at;
+ $this->token_length = $closer_at + 1 - $was_at;
$this->bytes_already_parsed = $closer_at + 1;
return true;
}
@@ -2169,6 +2185,9 @@ public function get_node_name() {
// @todo: the browser returns "html" for the DOCTYPE node type.
return 'html';
+ case self::EMPTY_END_TAG:
+ return '#empty-end-tag';
+
case self::WP_FUNKY_COMMENT_NODE:
return '#funky-comment';
}
@@ -2177,6 +2196,10 @@ public function get_node_name() {
public function get_node_text() {
switch ( $this->last_token_type ) {
case self::ELEMENT_NODE:
+ if ( $this->is_closing_tag ) {
+ return null;
+ }
+
switch ( $this->get_tag() ) {
case 'IFRAME':
case 'NOEMBED':
@@ -2184,6 +2207,7 @@ public function get_node_text() {
case 'NOSCRIPT':
case 'SCRIPT':
case 'STYLE':
+ case 'TEXTAREA':
case 'TITLE':
return substr(
$this->html,
@@ -2196,31 +2220,13 @@ public function get_node_text() {
}
case self::TEXT_NODE:
- return substr(
- $this->html,
- $this->tag_name_starts_at,
- $this->tag_name_length
- );
-
case self::CDATA_SECTION_NODE:
- return substr(
- $this->html,
- $this->tag_name_starts_at + 9,
- $this->tag_name_length - 9 - 3
- );
-
case self::COMMENT_NODE:
- return substr(
- $this->html,
- $this->tag_name_starts_at + 4,
- $this->tag_name_length
- );
-
case self::WP_FUNKY_COMMENT_NODE:
return substr(
$this->html,
- $this->tag_name_starts_at + 2,
- $this->tag_name_length
+ $this->text_starts_at,
+ $this->text_length
);
}
}