Skip to content

Commit

Permalink
HTML API: Simplify breadcrumb accounting.
Browse files Browse the repository at this point in the history
Since the HTML Processor started visiting all nodes in a document, both
real and virtual, the breadcrumb accounting became a bit complicated
and it's not entirely clear that it is fully reliable.

In this patch the breadcrumbs are rebuilt separately from the stack of
open elements in order to eliminate the problem of the stateful stack
interactions and the post-hoc event queue.

Breadcrumbs are greatly simplified as a result, and more verifiably
correct, in this construction.

Developed in WordPress#6981
Discussed in https://core.trac.wordpress.org/ticket/61576

Follow-up to [58590].

Props bernhard-reiter, dmsnell.
See #61576.


git-svn-id: https://develop.svn.wordpress.org/trunk@58713 602fd350-edb4-49c9-b593-d223f7449a82
  • Loading branch information
dmsnell committed Jul 12, 2024
1 parent cd3bf1b commit 4eec5cb
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 76 deletions.
126 changes: 51 additions & 75 deletions src/wp-includes/html-api/class-wp-html-processor.php
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,15 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
*/
private $element_queue = array();

/**
* Stores the current breadcrumbs.
*
* @since 6.7.0
*
* @var string[]
*/
private $breadcrumbs = array();

/**
* Current stack event, if set, representing a matched token.
*
Expand Down Expand Up @@ -310,8 +319,8 @@ public static function create_fragment( $html, $context = '<body>', $encoding =
false
);

$processor->state->stack_of_open_elements->push( $context_node );
$processor->context_node = $context_node;
$processor->breadcrumbs = array( 'HTML', $context_node->node_name );

return $processor;
}
Expand Down Expand Up @@ -523,44 +532,46 @@ public function next_token() {
return false;
}

if ( 'done' !== $this->has_seen_context_node && 0 === count( $this->element_queue ) && ! $this->step() ) {
while ( 'context-node' !== $this->state->stack_of_open_elements->current_node()->bookmark_name && $this->state->stack_of_open_elements->pop() ) {
continue;
}
$this->has_seen_context_node = 'done';
return $this->next_token();
/*
* Prime the events if there are none.
*
* @todo In some cases, probably related to the adoption agency
* algorithm, this call to step() doesn't create any new
* events. Calling it again creates them. Figure out why
* this is and if it's inherent or if it's a bug. Looping
* until there are events or until there are no more
* tokens works in the meantime and isn't obviously wrong.
*/
while ( empty( $this->element_queue ) && $this->step() ) {
continue;
}

// Process the next event on the queue.
$this->current_element = array_shift( $this->element_queue );
while ( isset( $this->context_node ) && ! $this->has_seen_context_node ) {
if ( isset( $this->current_element ) ) {
if ( $this->context_node === $this->current_element->token && WP_HTML_Stack_Event::PUSH === $this->current_element->operation ) {
$this->has_seen_context_node = true;
return $this->next_token();
}
}
$this->current_element = array_shift( $this->element_queue );
if ( ! isset( $this->current_element ) ) {
return false;
}

if ( ! isset( $this->current_element ) ) {
if ( 'done' === $this->has_seen_context_node ) {
return false;
} else {
return $this->next_token();
}
$is_pop = WP_HTML_Stack_Event::POP === $this->current_element->operation;

/*
* The root node only exists in the fragment parser, and closing it
* indicates that the parse is complete. Stop before popping if from
* the breadcrumbs.
*/
if ( 'root-node' === $this->current_element->token->bookmark_name ) {
return ! $is_pop && $this->next_token();
}

if ( isset( $this->context_node ) && WP_HTML_Stack_Event::POP === $this->current_element->operation && $this->context_node === $this->current_element->token ) {
$this->element_queue = array();
$this->current_element = null;
return false;
// Adjust the breadcrumbs for this event.
if ( $is_pop ) {
array_pop( $this->breadcrumbs );
} else {
$this->breadcrumbs[] = $this->current_element->token->node_name;
}

// Avoid sending close events for elements which don't expect a closing.
if (
WP_HTML_Stack_Event::POP === $this->current_element->operation &&
! static::expects_closer( $this->current_element->token )
) {
if ( $is_pop && ! static::expects_closer( $this->current_element->token ) ) {
return $this->next_token();
}

Expand Down Expand Up @@ -643,10 +654,11 @@ public function matches_breadcrumbs( $breadcrumbs ) {
return false;
}

foreach ( $this->state->stack_of_open_elements->walk_up() as $node ) {
for ( $i = count( $this->breadcrumbs ) - 1; $i >= 0; $i-- ) {
$node = $this->breadcrumbs[ $i ];
$crumb = strtoupper( current( $breadcrumbs ) );

if ( '*' !== $crumb && $node->node_name !== $crumb ) {
if ( '*' !== $crumb && $node !== $crumb ) {
return false;
}

Expand Down Expand Up @@ -862,46 +874,7 @@ public function step( $node_to_process = self::PROCESS_NEXT_NODE ) {
* @return string[]|null Array of tag names representing path to matched node, if matched, otherwise NULL.
*/
public function get_breadcrumbs() {
$breadcrumbs = array();

foreach ( $this->state->stack_of_open_elements->walk_down() as $stack_item ) {
$breadcrumbs[] = $stack_item->node_name;
}

if ( ! $this->is_virtual() ) {
return $breadcrumbs;
}

foreach ( $this->element_queue as $queue_item ) {
if ( $this->current_element->token->bookmark_name === $queue_item->token->bookmark_name ) {
break;
}

if ( 'context-node' === $queue_item->token->bookmark_name ) {
break;
}

if ( 'real' === $queue_item->provenance ) {
break;
}

if ( WP_HTML_Stack_Event::PUSH === $queue_item->operation ) {
$breadcrumbs[] = $queue_item->token->node_name;
} else {
array_pop( $breadcrumbs );
}
}

if ( null !== parent::get_token_name() && ! parent::is_tag_closer() ) {
array_pop( $breadcrumbs );
}

// Add the virtual node we're at.
if ( WP_HTML_Stack_Event::PUSH === $this->current_element->operation ) {
$breadcrumbs[] = $this->current_element->token->node_name;
}

return $breadcrumbs;
return $this->breadcrumbs;
}

/**
Expand Down Expand Up @@ -930,9 +903,7 @@ public function get_breadcrumbs() {
* @return int Nesting-depth of current location in the document.
*/
public function get_current_depth() {
return $this->is_virtual()
? count( $this->get_breadcrumbs() )
: $this->state->stack_of_open_elements->count();
return count( $this->breadcrumbs );
}

/**
Expand Down Expand Up @@ -2552,7 +2523,6 @@ public function seek( $bookmark_name ) {
? $this->bookmarks[ $this->state->current_token->bookmark_name ]->start
: 0;
$bookmark_starts_at = $this->bookmarks[ $actual_bookmark_name ]->start;
$bookmark_length = $this->bookmarks[ $actual_bookmark_name ]->length;
$direction = $bookmark_starts_at > $processor_started_at ? 'forward' : 'backward';

/*
Expand Down Expand Up @@ -2610,6 +2580,12 @@ public function seek( $bookmark_name ) {
$this->state->frameset_ok = true;
$this->element_queue = array();
$this->current_element = null;

if ( isset( $this->context_node ) ) {
$this->breadcrumbs = array_slice( $this->breadcrumbs, 0, 2 );
} else {
$this->breadcrumbs = array();
}
}

// When moving forwards, reparse the document until reaching the same location as the original bookmark.
Expand Down
11 changes: 10 additions & 1 deletion tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRules.php
Original file line number Diff line number Diff line change
Expand Up @@ -387,7 +387,16 @@ public function test_in_body_any_other_end_tag_with_unclosed_non_special_element
$this->assertSame( 'CODE', $processor->get_tag(), "Expected to start test on CODE element but found {$processor->get_tag()} instead." );
$this->assertSame( array( 'HTML', 'BODY', 'DIV', 'SPAN', 'CODE' ), $processor->get_breadcrumbs(), 'Failed to produce expected DOM nesting.' );

$this->assertTrue( $processor->next_token(), 'Failed to advance past CODE tag to expected SPAN closer.' );
$this->assertTrue(
$processor->next_tag(
array(
'tag_name' => 'SPAN',
'tag_closers' => 'visit',
)
),
'Failed to advance past CODE tag to expected SPAN closer.'
);
$this->assertSame( 'SPAN', $processor->get_tag() );
$this->assertTrue( $processor->is_tag_closer(), 'Expected to find closing SPAN, but found opener instead.' );
$this->assertSame( array( 'HTML', 'BODY', 'DIV' ), $processor->get_breadcrumbs(), 'Failed to advance past CODE tag to expected DIV opener.' );

Expand Down

0 comments on commit 4eec5cb

Please sign in to comment.