Skip to content

Commit

Permalink
Fix indices; 6 failing tests
Browse files Browse the repository at this point in the history
  • Loading branch information
dmsnell committed Nov 29, 2023
1 parent bfd2d3a commit 1a045da
Showing 1 changed file with 73 additions and 67 deletions.
140 changes: 73 additions & 67 deletions src/wp-includes/html-api/class-wp-html-tag-processor.php
Original file line number Diff line number Diff line change
Expand Up @@ -672,7 +672,6 @@ public function next_token() {

$this->tag_ends_at = $tag_ends_at;
$this->bytes_already_parsed = min( strlen( $this->html ) - 1, $tag_ends_at + 1 );
$this->continuation_state = self::STATE_IN_TAG;
$this->last_token_type = self::ELEMENT_NODE;

/*
Expand Down Expand Up @@ -712,7 +711,6 @@ public function next_token() {
$tag_name_length = $this->tag_name_length;
$tag_ends_at = $this->tag_ends_at;

$this->last_token_type = self::ELEMENT_NODE;
$this->token_starts_at = $was_at;
$this->text_starts_at = $this->tag_ends_at + 1;

Expand Down Expand Up @@ -750,6 +748,8 @@ public function next_token() {
return false;
}

$this->token_starts_at = $was_at;
$this->text_starts_at = $tag_ends_at + 1;
$this->text_length = $this->tag_name_starts_at - $this->text_starts_at;
$this->token_length = $this->bytes_already_parsed - $was_at;
$this->tag_name_starts_at = $tag_name_starts_at;
Expand Down Expand Up @@ -1221,8 +1221,9 @@ private function skip_script_data() {
private function parse_next_tag() {
$this->after_tag();

$html = $this->html;
$at = $this->bytes_already_parsed;
$html = $this->html;
$at = $this->bytes_already_parsed;
$was_at = $at;
if ( $at >= strlen( $this->html ) ) {
$this->continuation_state = self::STATE_COMPLETE;
$this->bytes_already_parsed = strlen( $this->html );
Expand All @@ -1231,16 +1232,15 @@ private function parse_next_tag() {

$at = strpos( $html, '<', $at );
if ( false === $at ) {
$this->continuation_state = self::STATE_COMPLETE;
$this->last_token_type = self::TEXT_NODE;
$this->bytes_already_parsed = strlen( $this->html );
return false;
$at = strlen( $this->html );
}

if ( $at > $this->bytes_already_parsed ) {
$this->last_token_type = self::TEXT_NODE;
$this->tag_name_starts_at = $this->bytes_already_parsed;
$this->tag_name_length = $at - $this->tag_name_starts_at;
$this->token_starts_at = $was_at;
$this->text_starts_at = $was_at;
$this->text_length = $at - $was_at;
$this->token_length = $at - $was_at;
$this->bytes_already_parsed = $at;
return true;
}
Expand Down Expand Up @@ -1269,11 +1269,13 @@ private function parse_next_tag() {
$tag_name_prefix_length = strspn( $html, 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ', $at + 1 );
if ( $tag_name_prefix_length > 0 ) {
++$at;
$this->last_token_type = self::ELEMENT_NODE;
$this->token_starts_at = $was_at;
$this->text_starts_at = null;
$this->text_length = null;
$this->tag_name_length = $tag_name_prefix_length + strcspn( $html, " \t\f\r\n/>", $at + $tag_name_prefix_length );
$this->tag_name_starts_at = $at;
$this->bytes_already_parsed = $at + $this->tag_name_length;
$this->continuation_state = self::STATE_IN_TAG;
$this->last_token_type = self::ELEMENT_NODE;
return true;
}

Expand All @@ -1282,8 +1284,8 @@ private function parse_next_tag() {
* the document. There is nothing left to parse.
*/
if ( $at + 1 >= strlen( $html ) ) {
$this->continuation_state = self::STATE_COMPLETE;
$this->last_token_type = self::TEXT_NODE;
$this->continuation_state = self::STATE_INCOMPLETE;
$this->bytes_already_parsed = $was_at;
return false;
}

Expand All @@ -1304,18 +1306,19 @@ private function parse_next_tag() {
$closer_at = $at + 4;
// If it's not possible to close the comment then there is nothing more to scan.
if ( strlen( $html ) <= $closer_at ) {
$this->continuation_state = self::STATE_COMPLETE;
$this->bytes_already_parsed = strlen( $this->html );
$this->continuation_state = self::STATE_INCOMPLETE;
$this->bytes_already_parsed = $was_at;
return false;
}

// Abruptly-closed empty comments are a sequence of dashes followed by `>`.
$span_of_dashes = strspn( $html, '-', $closer_at );
if ( '>' === $html[ $closer_at + $span_of_dashes ] ) {
$this->last_token_type = self::COMMENT_NODE;
$this->tag_name_starts_at = $at;
$this->tag_name_length = max( 0, $span_of_dashes - 2 );
$this->tag_ends_at = $closer_at + $span_of_dashes;
$this->token_starts_at = $was_at;
$this->text_starts_at = $was_at + 3;
$this->text_length = max( 0, $span_of_dashes - 2 );
$this->token_length = $closer_at + $span_of_dashes + 1 - $was_at;
// @todo this seems wrong to add 1 here; why?
$this->bytes_already_parsed = $closer_at + $span_of_dashes + 1;
return true;
Expand All @@ -1331,23 +1334,27 @@ private function parse_next_tag() {
while ( ++$closer_at < strlen( $html ) ) {
$closer_at = strpos( $html, '--', $closer_at );
if ( false === $closer_at ) {
$this->continuation_state = self::STATE_INCOMPLETE;
$this->bytes_already_parsed = $was_at;
return false;
}

if ( $closer_at + 2 < strlen( $html ) && '>' === $html[ $closer_at + 2 ] ) {
$this->last_token_type = self::COMMENT_NODE;
$this->tag_name_starts_at = $at;
$this->tag_name_length = $closer_at - $at - 4;
$this->tag_ends_at = $closer_at + 3;
$this->token_starts_at = $was_at;
$this->text_starts_at = $was_at + 4;
$this->text_length = $closer_at - $this->text_starts_at;
$this->token_length = $closer_at + 2 - $was_at;
$this->bytes_already_parsed = $closer_at + 3;
return true;
}

if ( $closer_at + 3 < strlen( $html ) && '!' === $html[ $closer_at + 2 ] && '>' === $html[ $closer_at + 3 ] ) {
$this->last_token_type = self::COMMENT_NODE;
$this->tag_name_starts_at = $at;
$this->tag_name_length = $closer_at - $at - 4;
$this->tag_ends_at = $closer_at + 4;
$this->token_starts_at = $was_at;
$this->text_starts_at = $was_at + 4;
$this->text_length = $closer_at - $this->text_starts_at;
$this->token_length = $closer_at + 3 - $was_at;
$this->bytes_already_parsed = $closer_at + 4;
return true;
}
Expand All @@ -1371,14 +1378,17 @@ private function parse_next_tag() {
) {
$closer_at = strpos( $html, ']]>', $at + 9 );
if ( false === $closer_at ) {
$this->continuation_state = self::STATE_INCOMPLETE;
$this->bytes_already_parsed = $was_at;
return false;
}

$this->tag_name_starts_at = $at;
$this->tag_name_length = $closer_at + 3 - $this->tag_name_starts_at;
$this->last_token_type = self::CDATA_SECTION_NODE;
$this->token_starts_at = $was_at;
$this->text_starts_at = $at + 9;
$this->text_length = $closer_at - $this->text_starts_at;
$this->token_length = $closer_at + 3 - $was_at;
$this->bytes_already_parsed = $closer_at + 3;

$this->last_token_type = self::CDATA_SECTION_NODE;
return true;
}

Expand All @@ -1399,14 +1409,14 @@ private function parse_next_tag() {
) {
$closer_at = strpos( $html, '>', $at + 9 );
if ( false === $closer_at ) {
$this->continuation_state = self::STATE_COMPLETE;
$this->bytes_already_parsed = strlen( $this->html );
$this->continuation_state = self::STATE_INCOMPLETE;
$this->bytes_already_parsed = $was_at;
return false;
}

$this->last_token_type = self::DOCUMENT_TYPE_NODE;
$this->tag_name_starts_at = $at;
$this->tag_name_length = $closer_at - $at - 9;
$this->token_starts_at = $was_at;
$this->token_length = $closer_at + 1 - $was_at;
$this->bytes_already_parsed = $closer_at + 1;
return true;
}
Expand All @@ -1417,14 +1427,17 @@ private function parse_next_tag() {
*/
$closer_at = strpos( $html, '>', $at + 1 );
if ( false === $closer_at ) {
$this->continuation_state = self::STATE_COMPLETE;
$this->bytes_already_parsed = strlen( $this->html );
$this->continuation_state = self::STATE_INCOMPLETE;
$this->bytes_already_parsed = $was_at;
return false;
}

$this->last_token_type = self::COMMENT_NODE;
$this->token_starts_at = $was_at;
$this->text_starts_at = $was_at + 2;
$this->text_length = $closer_at - $this->text_starts_at;
$this->token_length = $closer_at + 1 - $was_at;
$this->bytes_already_parsed = $closer_at + 1;
$this->tag_name_starts_at = $at;
$this->tag_name_length = $closer_at - $at;
return true;
}

Expand All @@ -1435,7 +1448,7 @@ private function parse_next_tag() {
*/
if ( '>' === $html[ $at + 1 ] ) {
$this->last_token_type = self::EMPTY_END_TAG;
$this->bytes_already_parsed = $at + 1;
$this->bytes_already_parsed = $at + 2;
return true;
}

Expand All @@ -1446,14 +1459,16 @@ private function parse_next_tag() {
if ( '?' === $html[ $at + 1 ] ) {
$closer_at = strpos( $html, '>', $at + 2 );
if ( false === $closer_at ) {
$this->bytes_already_parsed = strlen( $this->html );
$this->continuation_state = self::STATE_COMPLETE;
$this->continuation_state = self::STATE_INCOMPLETE;
$this->bytes_already_parsed = $was_at;
return false;
}

$this->last_token_type = self::COMMENT_NODE;
$this->tag_name_starts_at = $at;
$this->tag_name_length = $closer_at - $at;
$this->token_starts_at = $was_at;
$this->text_starts_at = $at + 2;
$this->text_length = $closer_at - $this->text_starts_at;
$this->token_length = $closer_at + 1 - $was_at;
$this->bytes_already_parsed = $closer_at + 1;
return true;
}
Expand All @@ -1467,15 +1482,16 @@ private function parse_next_tag() {
if ( $this->is_closing_tag ) {
$closer_at = strpos( $html, '>', $at + 3 );
if ( false === $closer_at ) {
$this->bytes_already_parsed = strlen( $this->html );
$this->continuation_state = self::STATE_COMPLETE;
$this->continuation_state = self::STATE_INCOMPLETE;
$this->bytes_already_parsed = $was_at;
return false;
}

$this->last_token_type = self::WP_FUNKY_COMMENT_NODE;
$this->tag_name_starts_at = $at - 1;
$this->tag_name_length = $closer_at - $at - 1;
$this->tag_ends_at = $closer_at;
$this->token_starts_at = $was_at;
$this->text_starts_at = $was_at + 2;
$this->text_length = $closer_at - $this->text_starts_at;
$this->token_length = $closer_at + 1 - $was_at;
$this->bytes_already_parsed = $closer_at + 1;
return true;
}
Expand Down Expand Up @@ -2169,6 +2185,9 @@ public function get_node_name() {
// @todo: the browser returns "html" for the DOCTYPE node type.
return 'html';

case self::EMPTY_END_TAG:
return '#empty-end-tag';

case self::WP_FUNKY_COMMENT_NODE:
return '#funky-comment';
}
Expand All @@ -2177,13 +2196,18 @@ public function get_node_name() {
public function get_node_text() {
switch ( $this->last_token_type ) {
case self::ELEMENT_NODE:
if ( $this->is_closing_tag ) {
return null;
}

switch ( $this->get_tag() ) {
case 'IFRAME':
case 'NOEMBED':
case 'NOFRAMES':
case 'NOSCRIPT':
case 'SCRIPT':
case 'STYLE':
case 'TEXTAREA':
case 'TITLE':
return substr(
$this->html,
Expand All @@ -2196,31 +2220,13 @@ public function get_node_text() {
}

case self::TEXT_NODE:
return substr(
$this->html,
$this->tag_name_starts_at,
$this->tag_name_length
);

case self::CDATA_SECTION_NODE:
return substr(
$this->html,
$this->tag_name_starts_at + 9,
$this->tag_name_length - 9 - 3
);

case self::COMMENT_NODE:
return substr(
$this->html,
$this->tag_name_starts_at + 4,
$this->tag_name_length
);

case self::WP_FUNKY_COMMENT_NODE:
return substr(
$this->html,
$this->tag_name_starts_at + 2,
$this->tag_name_length
$this->text_starts_at,
$this->text_length
);
}
}
Expand Down

0 comments on commit 1a045da

Please sign in to comment.