From f19a5cb6ceb0325b650f4d0f75fddb5dc11000d9 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Sun, 31 Dec 2023 22:25:31 -0500 Subject: [PATCH] Update comments, remove mistaken = sign. --- .../html-api/class-wp-html-tag-processor.php | 40 ++++++++++--------- 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index b86dbf38942a2..79d08c43b77d6 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -249,8 +249,6 @@ * * ## Tokens and finer-grained processing. * - * >>> Stub documentation. - * * It's also possible to scan through every lexical token in * the HTML document using the `next_token()` function. This * alternative form takes no argument and provides no built-in @@ -261,12 +259,12 @@ * $title = '(untitled)'; * $text_content = ''; * while ( $processor->next_token() ) { - * switch ( $processor->get_node_name() ) { + * switch ( $processor->get_token_name() ) { * case '#text': * $text .= $processor->get_node_text(); * break; * - * case 'HR': + * case 'BR': * $text .= "\n"; * break; * @@ -305,7 +303,7 @@ * - `TITLE` and `TEXTAREA` whose contents are treated as plaintext and then any * character references are decoded. E.g. "1 &lt; 2 < 3" becomes "1 < 2 < 3". * - `IFRAME`, `NOSCRIPT`, `NOEMBED`, `NOFRAME`, `STYLE` whose contents are treated as - * raw plaintext and left as-si. E.g. "1 &lt; 2 < 3" remains "1 &lt; 2 < 3". + * raw plaintext and left as-is. E.g. "1 &lt; 2 < 3" remains "1 &lt; 2 < 3". * * #### Other tokens with modifiable text. * @@ -314,17 +312,17 @@ * - `#text` nodes, whose entire token _is_ the modifiable text. * - Comment nodes and nodes that became comments because of some syntax error. The * text for these nodes is the portion of the comment inside of the syntax. E.g. for - * "<!-- comment -->" the text is " comment " (note that the spaces are part of it). + * `` the text is `" comment "` (note that the spaces are part of it). * - `CDATA` sections, whose text is the content inside of the section itself. E.g. for - * "<![CDATA[some content]]>" the text is "some content". + * `` the text is `"some content"`. * - "Funky comments," which are a special case of invalid closing tags whose name is * invalid. The text for these nodes is the text that a browser would transform into - * an HTML when parsing. E.g. for "</%post_author>" the text is "%post_author". + * an HTML when parsing. E.g. for `` the text is `%post_author`. * * And there are non-elements which are atomic in nature but have no modifiable text. - * - `DOCTYPE` nodes like "<DOCTYPE html>" which have no closing tag. - * - XML Processing instruction nodes like "<". - * - The empty end tag "<" which is ignored in the browser and DOM but exposed + * - `DOCTYPE` nodes like `` which have no closing tag. + * - XML Processing instruction nodes like ``. + * - The empty end tag `` which is ignored in the browser and DOM but exposed * to the HTML API. * * ## Design and limitations @@ -849,9 +847,10 @@ public function next_token() { } /* - * for legacy reasons the rest of this function handles tags and their - * attributes. if the processor has reached the end of the document - * or if it matched any other token then it should return here. + * For legacy reasons the rest of this function handles tags and their + * attributes. If the processor has reached the end of the document + * or if it matched any other token then it should return here to avoid + * attempting to process tag-specific syntax. */ if ( self::STATE_INCOMPLETE !== $this->parser_state && @@ -922,7 +921,12 @@ public function next_token() { return true; } - // Preserve the opening tag pointers. + /* + * Preserve the opening tag pointers, as these will be overwritten + * when finding the closing tag. They will be reset after finding + * the closing to tag to point to the opening of the special atomic + * tag sequence. + */ $tag_name_starts_at = $this->tag_name_starts_at; $tag_name_length = $this->tag_name_length; $tag_ends_at = $this->token_starts_at + $this->token_length; @@ -956,7 +960,7 @@ public function next_token() { /* * The values here look like they reference the opening tag but they reference - * the closing that instead. This is why the opening tag values were stored + * the closing tag instead. This is why the opening tag values were stored * above in a variable. It reads confusingly here, but that's because the * functions that skip the contents have moved all the internal cursors past * the inner content of the tag. @@ -1473,7 +1477,7 @@ private function parse_next_tag() { $was_at = $this->bytes_already_parsed; $at = $was_at; - while ( false !== $at && $at <= $doc_length ) { + while ( false !== $at && $at < $doc_length ) { $at = strpos( $html, '<', $at ); if ( $at > $was_at ) { @@ -1568,7 +1572,7 @@ private function parse_next_tag() { // Abruptly-closed empty comments are a sequence of dashes followed by `>`. $span_of_dashes = strspn( $html, '-', $closer_at ); if ( '>' === $html[ $closer_at + $span_of_dashes ] ) { - // @todo This could go wrong if the closer is shorter than `` because there's no inside. + // @todo This could go wrong if the closer is shorter than `` because there's no inside content. $this->parser_state = self::STATE_COMMENT; $this->token_length = $closer_at + $span_of_dashes + 1 - $this->token_starts_at; $this->text_starts_at = $this->token_starts_at + 4;