Skip to content

Commit

Permalink
HTML API: Fix void tag nesting with next_token
Browse files Browse the repository at this point in the history
When `next_token()` was introduced, it introduced a regression in the HTML
Processor whereby void tags remain on the stack of open elements when they
shouldn't. This led to invalid values returned from `get_breadcrumbs()`.

The reason was that calling `next_token()` works through a different code path
than the HTML Processor runs everything else. To solve this, its sub-classed
`next_token()` called `step( self::REPROCESS_CURRENT_TOKEN )` so that the proper
HTML accounting takes place.

Unfortunately that same reprocessing code path skipped the step whereby void
and self-closing elements are popped from the stack of open elements.

In this patch, that step is run with a third mode for `step()`, which is the
new `self::PROCESS_CURRENT_TOKEN`. This mode acts as if `self::PROCESS_NEXT_NODE`
were called, except it doesn't advance the parser.

Developed in WordPress#5975
Discussed in https://core.trac.wordpress.org/ticket/60382

Follow-up to [57348]

Props dmsnell, jonsurrell
Fixes #60382



git-svn-id: https://develop.svn.wordpress.org/trunk@57507 602fd350-edb4-49c9-b593-d223f7449a82
  • Loading branch information
dmsnell committed Feb 1, 2024
1 parent 4a2aa99 commit cdb218b
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 2 deletions.
15 changes: 13 additions & 2 deletions src/wp-includes/html-api/class-wp-html-processor.php
Original file line number Diff line number Diff line change
Expand Up @@ -431,7 +431,7 @@ public function next_token() {
$found_a_token = parent::next_token();

if ( '#tag' === $this->get_token_type() ) {
$this->step( self::REPROCESS_CURRENT_NODE );
$this->step( self::PROCESS_CURRENT_NODE );
}

return $found_a_token;
Expand Down Expand Up @@ -513,7 +513,7 @@ public function step( $node_to_process = self::PROCESS_NEXT_NODE ) {
return false;
}

if ( self::PROCESS_NEXT_NODE === $node_to_process ) {
if ( self::REPROCESS_CURRENT_NODE !== $node_to_process ) {
/*
* Void elements still hop onto the stack of open elements even though
* there's no corresponding closing tag. This is important for managing
Expand All @@ -532,7 +532,9 @@ public function step( $node_to_process = self::PROCESS_NEXT_NODE ) {
if ( $top_node && self::is_void( $top_node->node_name ) ) {
$this->state->stack_of_open_elements->pop();
}
}

if ( self::PROCESS_NEXT_NODE === $node_to_process ) {
while ( parent::next_token() && '#tag' !== $this->get_token_type() ) {
continue;
}
Expand Down Expand Up @@ -1781,6 +1783,15 @@ public static function is_void( $tag_name ) {
*/
const REPROCESS_CURRENT_NODE = 'reprocess-current-node';

/**
* Indicates that the current HTML token should be processed without advancing the parser.
*
* @since 6.5.0
*
* @var string
*/
const PROCESS_CURRENT_NODE = 'process-current-node';

/**
* Indicates that the parser encountered unsupported markup and has bailed.
*
Expand Down
53 changes: 53 additions & 0 deletions tests/phpunit/tests/html-api/wpHtmlProcessor.php
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,59 @@ public function test_cannot_nest_void_tags( $tag_name ) {
);
}

/**
* Ensure non-nesting tags do not nest when processing tokens.
*
* @ticket 60382
*
* @dataProvider data_void_tags
*
* @param string $tag_name Name of void tag under test.
*/
public function test_cannot_nest_void_tags_next_token( $tag_name ) {
$processor = WP_HTML_Processor::create_fragment( "<{$tag_name}><div>" );

/*
* This HTML represents the same as the following HTML,
* assuming that it were provided `<img>` as the tag:
*
* <html>
* <body>
* <img>
* <div></div>
* </body>
* </html>
*/

$found_tag = $processor->next_token();

if ( WP_HTML_Processor::ERROR_UNSUPPORTED === $processor->get_last_error() ) {
$this->markTestSkipped( "Tag {$tag_name} is not supported." );
}

$this->assertTrue(
$found_tag,
"Could not find first {$tag_name}."
);

$this->assertSame(
array( 'HTML', 'BODY', $tag_name ),
$processor->get_breadcrumbs(),
'Found incorrect nesting of first element.'
);

$this->assertTrue(
$processor->next_token(),
'Should have found the DIV as the second tag.'
);

$this->assertSame(
array( 'HTML', 'BODY', 'DIV' ),
$processor->get_breadcrumbs(),
"DIV should have been a sibling of the {$tag_name}."
);
}

/**
* Data provider.
*
Expand Down

0 comments on commit cdb218b

Please sign in to comment.