diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index 233d47eb8da95..2b115dd156014 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -3385,6 +3385,58 @@ public function get_comment_type(): ?string {
return $this->comment_type;
}
+ /**
+ * Returns the text of a matched comment or null if not on a comment type node.
+ *
+ * This method returns the entire text content of a comment node as it
+ * would appear in the browser.
+ *
+ * This differs from {@see ::get_modifiable_text()} in that certain comment
+ * types in the HTML API cannot allow their entire comment text content to
+ * be modified. Namely, "bogus comments" of the form ``
+ * will create a comment whose text content starts with `?`. Note that if
+ * that character were modified, it would be possible to change the node
+ * type.
+ *
+ * @since 6.7.0
+ *
+ * @return string|null The comment text as it would appear in the browser or null
+ * if not on a comment type node.
+ */
+ public function get_full_comment_text(): ?string {
+ if ( self::STATE_FUNKY_COMMENT === $this->parser_state ) {
+ return $this->get_modifiable_text();
+ }
+
+ if ( self::STATE_COMMENT !== $this->parser_state ) {
+ return null;
+ }
+
+ switch ( $this->get_comment_type() ) {
+ case self::COMMENT_AS_HTML_COMMENT:
+ case self::COMMENT_AS_ABRUPTLY_CLOSED_COMMENT:
+ return $this->get_modifiable_text();
+
+ case self::COMMENT_AS_CDATA_LOOKALIKE:
+ return "[CDATA[{$this->get_modifiable_text()}]]";
+
+ case self::COMMENT_AS_PI_NODE_LOOKALIKE:
+ return "?{$this->get_tag()}{$this->get_modifiable_text()}?";
+
+ /*
+ * This represents "bogus comments state" from HTML tokenization.
+ * This can be entered by `` or `html[ $this->text_starts_at - 1 ];
+ $comment_start = '?' === $preceding_character ? '?' : '';
+ return "{$comment_start}{$this->get_modifiable_text()}";
+ }
+
+ return null;
+ }
+
/**
* Subdivides a matched text node, splitting NULL byte sequences and decoded whitespace as
* distinct nodes prefixes.
diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php b/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php
index 4862ba981e6f0..808fa39d17f26 100644
--- a/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php
+++ b/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php
@@ -27,20 +27,17 @@ class Tests_HtmlApi_Html5lib extends WP_UnitTestCase {
* Skip specific tests that may not be supported or have known issues.
*/
const SKIP_TESTS = array(
- 'comments01/line0155' => 'Unimplemented: Need to access raw comment text on non-normative comments.',
- 'comments01/line0169' => 'Unimplemented: Need to access raw comment text on non-normative comments.',
- 'html5test-com/line0129' => 'Unimplemented: Need to access raw comment text on non-normative comments.',
- 'noscript01/line0014' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
- 'tests14/line0022' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
- 'tests14/line0055' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
- 'tests19/line0488' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
- 'tests19/line0500' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
- 'tests19/line1079' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
- 'tests2/line0207' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
- 'tests2/line0686' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
- 'tests2/line0697' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
- 'tests2/line0709' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
- 'webkit01/line0231' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
+ 'noscript01/line0014' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
+ 'tests14/line0022' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
+ 'tests14/line0055' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
+ 'tests19/line0488' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
+ 'tests19/line0500' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
+ 'tests19/line1079' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
+ 'tests2/line0207' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
+ 'tests2/line0686' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
+ 'tests2/line0697' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
+ 'tests2/line0709' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
+ 'webkit01/line0231' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
);
/**
@@ -315,26 +312,8 @@ static function ( $a, $b ) {
break;
case '#comment':
- switch ( $processor->get_comment_type() ) {
- case WP_HTML_Processor::COMMENT_AS_ABRUPTLY_CLOSED_COMMENT:
- case WP_HTML_Processor::COMMENT_AS_HTML_COMMENT:
- case WP_HTML_Processor::COMMENT_AS_INVALID_HTML:
- $comment_text_content = $processor->get_modifiable_text();
- break;
-
- case WP_HTML_Processor::COMMENT_AS_CDATA_LOOKALIKE:
- $comment_text_content = "[CDATA[{$processor->get_modifiable_text()}]]";
- break;
-
- case WP_HTML_Processor::COMMENT_AS_PI_NODE_LOOKALIKE:
- $comment_text_content = "?{$processor->get_tag()}{$processor->get_modifiable_text()}?";
- break;
-
- default:
- throw new Error( "Unhandled comment type for tree construction: {$processor->get_comment_type()}" );
- }
// Comments must be "<" then "!-- " then the data then " -->".
- $output .= str_repeat( self::TREE_INDENT, $indent_level ) . "\n";
+ $output .= str_repeat( self::TREE_INDENT, $indent_level ) . "\n";
break;
default: