Skip to content

Commit

Permalink
Rename token functions and update comments.
Browse files Browse the repository at this point in the history
  • Loading branch information
dmsnell committed Jan 1, 2024
1 parent b712690 commit cbb2ad4
Show file tree
Hide file tree
Showing 2 changed files with 164 additions and 15 deletions.
2 changes: 1 addition & 1 deletion src/wp-includes/html-api/class-wp-html-processor.php
Original file line number Diff line number Diff line change
Expand Up @@ -505,7 +505,7 @@ public function step( $node_to_process = self::PROCESS_NEXT_NODE ) {
$this->state->stack_of_open_elements->pop();
}

while ( parent::next_token() && '#tag' !== $this->get_node_type() ) {
while ( parent::next_token() && '#tag' !== $this->get_token_type() ) {
continue;
}
}
Expand Down
177 changes: 163 additions & 14 deletions src/wp-includes/html-api/class-wp-html-tag-processor.php
Original file line number Diff line number Diff line change
Expand Up @@ -2501,7 +2501,29 @@ public function is_tag_closer() {
);
}

public function get_node_type() {
/**
* Indicates the kind of matched token, if any.
*
* This differs from `get_token_name()` in that it always
* returns a static string indicating the type, whereas
* `get_token_name()` may return values derived from the
* token itself, such as a tag name or processing
* instruction tag.
*
* Possible values:
* - `#tag` when matched on a tag.
* - `#text` when matched on a text node.
* - `#cdata-section` when matched on a CDATA node.
* - `#processing-instruction` when matched on a processing instruction.
* - `#comment` when matched on a comment.
* - `#presumptuous-tag` when matched on an empty tag closer.
* - `#funky-comment` when matched on a funky comment.
*
* @since 6.5.0
*
* @return string|null What kind of token is matched, or null.
*/
public function get_token_type() {
switch ( $this->parser_state ) {
case self::STATE_MATCHED_TAG:
return '#tag';
Expand All @@ -2513,11 +2535,32 @@ public function get_node_type() {
return '#processing-instruction';

default:
return $this->get_node_name();
return $this->get_token_name();
}
}

public function get_node_name() {
/**
* Returns the node name represented by the token.
*
* This matches the DOM API value `nodeName`. Some values
* are static, such as `#text` for a text node, while others
* are dynamically generated from the token itself.
*
* Dynamic names:
* - Uppercase tag name for tag matches.
* - Tag name for processing instructions.
* - `html` for DOCTYPE declarations.
*
* Note that if the Tag Processor is not matched on a token
* then this function will return `null`, either because it
* hasn't yet found a token or because it reached the end
* of the document without matching a token.
*
* @since 6.5.0
*
* @return string|null Name of the matched token.
*/
public function get_token_name() {
switch ( $this->parser_state ) {
case self::STATE_MATCHED_TAG:
return $this->get_tag();
Expand Down Expand Up @@ -2546,7 +2589,26 @@ public function get_node_name() {
}
}

public function get_node_text() {
/**
* Returns the modifiable text for a matched token, or an empty string.
*
* Modifiable text is text content that may be read and changed without
* changing the HTML structure of the document around it. This includes
* the contents of `#text` nodes in the HTML as well as the inner
* contents of HTML comments, CDATA sections, Processing Instructions,
* and others, even though these nodes aren't part of a parsed DOM tree.
* They also contain the contents of SCRIPT and STYLE tags, of TEXTAREA
* tags, and of any other section in an HTML document which cannot
* contain HTML markup (DATA).
*
* If a token has no modifiable text then an empty string is returned to
* avoid needless crashing or type errors. An empty string does not mean
* that a token has modifiable text, and a token with modifiable text may
* have an empty string (e.g. a comment with no contents).
*
* @return string
*/
public function get_modifiable_text() {
$at = $this->text_starts_at;
$length = $this->text_length;

Expand Down Expand Up @@ -3030,7 +3092,7 @@ private function matches() {
}

/**
* Parser Ready State
* Parser Ready State.
*
* Indicates that the parser is ready to run and waiting for a state transition.
* It may not have started yet, or it may have just finished parsing a token and
Expand All @@ -3043,7 +3105,7 @@ private function matches() {
const STATE_READY = 'STATE_READY';

/**
* Parser Complete State
* Parser Complete State.
*
* Indicates that the parser has reached the end of the document and there is
* nothing left to scan. It finished parsing the last token completely.
Expand All @@ -3055,7 +3117,7 @@ private function matches() {
const STATE_COMPLETE = 'STATE_COMPLETE';

/**
* Parser Incomplete State
* Parser Incomplete State.
*
* Indicates that the parser has reached the end of the document before finishing
* a token. It started parsing a token but there is a possibility that the input
Expand All @@ -3071,7 +3133,7 @@ private function matches() {
const STATE_INCOMPLETE = 'STATE_INCOMPLETE';

/**
* Parser Matched Tag State
* Parser Matched Tag State.
*
* Indicates that the parser has found an HTML tag and it's possible to get
* the tag name and read or modify its attributes (if it's not a closing tag).
Expand All @@ -3082,11 +3144,98 @@ private function matches() {
*/
const STATE_MATCHED_TAG = 'STATE_MATCHED_TAG';

const STATE_TEXT_NODE = 'STATE_TEXT_NODE';
const STATE_CDATA_NODE = 'STATE_CDATA_NODE';
const STATE_PI_NODE = 'STATE_PI_NODE';
const STATE_COMMENT = 'STATE_COMMENT';
const STATE_DOCTYPE = 'STATE_DOCTYPE';
/**
* Parser Text Node State.
*
* Indicates that the parser has found a text node and it's possible
* to read and modify that text.
*
* @since 6.5.0
*
* @access private
*/
const STATE_TEXT_NODE = 'STATE_TEXT_NODE';

/**
* Parser CDATA Node State.
*
* Indicates that the parser has found a CDADA node and it's possible
* to read and modify its modifiable text. Note that in HTML there are
* no CDATA nodes outside foreign elements (SVG and MathML). Outside
* of foreign elements, they are treated as HTML comments. Nonetheless,
* the Tag Processor still recognizes them as they appear in the HTML
* stream and exposes them for inspection and modification.
*
* @since 6.5.0
*
* @access private
*/
const STATE_CDATA_NODE = 'STATE_CDATA_NODE';

/**
* Parser Processing Instruction State.
*
* Indicates that the parser has found a Processing Instruction and
* it's possible to read and modify its modifiable text. Note that in
* HTML there are no Processing Instruction nodes and they are treated
* as HTML comments. Nonetheless, the Tag Processor still recognizes
* them as they appear in the HTML stream and exposes them for
* inspection and modification.
*
* @since 6.5.0
*
* @access private
*/
const STATE_PI_NODE = 'STATE_PI_NODE';

/**
* Indicates that the parser has found an HTML comment and it's
* possible to read and modify its modifiable text.
*
* @since 6.5.0
*
* @access private
*/
const STATE_COMMENT = 'STATE_COMMENT';

/**
* Indicates that the parser has found a DOCTYPE node and it's
* possible to read and modify its modifiable text.
*
* @since 6.5.0
*
* @access private
*/
const STATE_DOCTYPE = 'STATE_DOCTYPE';

/**
* Indicates that the parser has found an empty tag closer.
* Note that in HTML there are no empty tag closers, and they
* are ignored. Nonetheless, the Tag Processor still
* recognizes them as they appear in the HTML stream.
*
* These were historically discussed as a "presumptuous tag
* closer," which would close the nearest open tag, but were
* dismissed in favor of explicitly-closing tags.
*
* @since 6.5.0
*
* @access private
*/
const STATE_PRESUMPTUOUS_TAG = 'STATE_PRESUMPTUOUS_TAG';
const STATE_FUNKY_COMMENT = 'STATE_WP_FUNKY';

/**
* Indicates that the parser has found a "funky comment"
* and it's possible to read and modify its modifiable text.
*
* Funky comments are tag closers with invalid tag names. Note
* that in HTML these are treated as HTML comments. Nonetheless,
* the Tag Processor recognizes them in a stream of HTML and
* exposes them for inspection and modification.
*
* @since 6.5.0
*
* @access private
*/
const STATE_FUNKY_COMMENT = 'STATE_WP_FUNKY';
}

0 comments on commit cbb2ad4

Please sign in to comment.