diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index ea74410230954..4cb3092f6c0a5 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -378,6 +378,31 @@ class WP_HTML_Tag_Processor { */ private $is_closing_tag; + /** + * What kind of node was parsed in the last step while scanning through the document, + * or if the parser hasn't paused on a matched token, then `null`. + * + * Can be one of: + * - WP_HTML_Tag_Processor::ELEMENT_NODE + * - WP_HTML_Tag_Processor::TEXT_NODE + * - WP_HTML_Tag_Processor::CDATA_SECTION_NODE + * - WP_HTML_Tag_Processor::PROCESSING_INSTRUCTION_NODE + * - WP_HTML_Tag_Processor::COMMENT_NODE + * - WP_HTML_Tag_Processor::DOCUMENT_TYPE_NODE + * - WP_HTML_Tag_Processor::WP_FUNKY_COMMENT_NODE + * + * @var string|null + */ + private $last_token_type = null; + + /** + * In what mode the parser should resume after pausing, + * or if not paused on a matched token, then `null`. + * + * @var string|null + */ + private $continuation_state = null; + /** * Lazily-built index of attributes found within an HTML tag, keyed by the attribute name. * @@ -567,7 +592,7 @@ public function next_tag( $query = null ) { return false; } $this->tag_ends_at = $tag_ends_at; - $this->bytes_already_parsed = $tag_ends_at; + $this->bytes_already_parsed = min( strlen( $this->html ) - 1, $tag_ends_at + 1 ); // Finally, check if the parsed tag and its attributes match the search query. if ( $this->matches() ) { @@ -2447,4 +2472,17 @@ private function matches() { return true; } + + // Constants that would otherwise be noisy at the top of the file. + + const ELEMENT_NODE = 'NodeType.1.ELEMENT_NODE'; + const TEXT_NODE = 'NodeType.3.TEXT_NODE'; + const CDATA_SECTION_NODE = 'NodeType.4.CDATA_SECTION_NODE'; + const PROCESSING_INSTRUCTION_NODE = 'NodeType.7.PROCESSING_INSTRUCTION_NODE'; + const COMMENT_NODE = 'NodeType.8.COMMENT_NODE'; + const DOCUMENT_TYPE_NODE = 'NodeType.10.DOCUMENT_TYPE_NODE'; + const WP_FUNKY_COMMENT_NODE = 'NodeType.0_1.WP_FUNKY_COMMENT_NODE'; + + const STATE_COMPLETE = 'The parser has finished scanning through the document.'; + const STATE_IN_TAG = 'The parser has found a valid tag name and needs to continue parsing attributes.'; }