Skip to content

Commit

Permalink
Expand support but exclude legacy CDATA behavioral changes.
Browse files Browse the repository at this point in the history
  • Loading branch information
dmsnell committed Jun 16, 2024
1 parent b529867 commit 1168050
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 34 deletions.
92 changes: 59 additions & 33 deletions src/wp-includes/kses.php
Original file line number Diff line number Diff line change
Expand Up @@ -984,13 +984,17 @@ function wp_kses_split( $content, $allowed_html, $allowed_protocols ) {

$token_pattern = <<<REGEX
~
( # Detect comments of various flavors before attempting to find tags.
(<!--.*?(-->|$)) # - Normative HTML comments.
|
</[^a-zA-Z][^>]*> # - Closing tags with invalid tag names.
)
# Detect comments of various flavors before attempting to find tags.
<!--.*?(--!?>|$) # - Normative HTML comments.
# These must come before other bogus comments to avoid mismatching `<!--`.
|
</[^a-zA-Z][^>]*> # - Closing tags with invalid tag names.
|
<[?!][^>]*> # - Invalid markup declaration or question mark nodes.
|
<[^>]*(>|$) # Tag-like spans of text.
|
(<[^>]*(>|$)|>) # Tag-like spans of text.
> # Non-tag closer.
~x
REGEX;
return preg_replace_callback( $token_pattern, '_wp_kses_split_callback', $content );
Expand Down Expand Up @@ -1113,46 +1117,68 @@ function wp_kses_split2( $content, $allowed_html, $allowed_protocols ) {
return '&gt;';
}

/*
* When a closing tag appears with a name that isn't a valid tag name,
* it must be interpreted as an HTML comment. It extends until the
* first `>` character after the initial opening `</`.
*
* Preserve these comments and do not treat them like tags.
*/
if ( 1 === preg_match( '~^</[^a-zA-Z][^>]*>$~', $content ) ) {
$content = substr( $content, 2, -1 );
$transformed = null;

while ( $transformed !== $content ) {
$transformed = wp_kses( $content, $allowed_html, $allowed_protocols );
$content = $transformed;
}

return "</{$transformed}>";
}

/*
* Normative HTML comments should be handled separately as their
* parsing rules differ from those for tags and text nodes.
*/
if ( str_starts_with( $content, '<!--' ) ) {
$content = str_replace( array( '<!--', '-->' ), '', $content );
$transformed = str_replace( array( '<!--', '-->', '--!>' ), '', $content );

while ( ( $newstring = wp_kses( $content, $allowed_html, $allowed_protocols ) ) !== $content ) {
$content = $newstring;
}
do {
$prev = $transformed;
$transformed = wp_kses( $transformed, $allowed_html, $allowed_protocols );
} while ( $prev !== $transformed );

if ( '' === $content ) {
if ( '' === $transformed ) {
return '';
}

// Prevent multiple dashes in comments.
$content = preg_replace( '/--+/', '-', $content );
$transformed = preg_replace( '/--+/', '-', $transformed );
// Prevent three dashes closing a comment.
$content = preg_replace( '/-$/', '', $content );
$transformed = preg_replace( '/-$/', '', $transformed );

return "<!--{$transformed}-->";
}

/*
* When a closing tag appears with a name that isn't a valid tag name,
* it must be interpreted as an HTML comment. It extends until the
* first `>` character after the initial opening `</`.
*
* Preserve these comments and do not treat them like tags.
*/
$bogus_comment_pattern = <<<'REGEX'
~
^<
(?:
(?P<invalid_tag_closer>/[^a-zA-Z])
|
(?P<invalid_markup_declaration>[?!])
)
[^>]*
>$
~x
REGEX;

/*
* Since there are variant legacy behaviors regarding invalid CDATA sections, omit
* them from this processing until all the downstream tests and code is updated.
*/
$is_invalid_cdata = str_starts_with( $content, '<![[CDATA' );

if (
! $is_invalid_cdata &&
1 === preg_match( $bogus_comment_pattern, $content, $bogus_comment_match )
) {
$transformed = substr( $content, 2, -1 );

do {
$prev = $transformed;
$transformed = wp_kses( $transformed, $allowed_html, $allowed_protocols );
} while ( $prev !== $transformed );

return "<!--{$content}-->";
return "<{$content[1]}{$transformed}>";
}

// It's seriously malformed.
Expand Down
5 changes: 4 additions & 1 deletion tests/phpunit/tests/kses.php
Original file line number Diff line number Diff line change
Expand Up @@ -1936,11 +1936,13 @@ public function filter_wp_kses_object_added_in_html_filter( $tags, $context ) {
*
* @ticket 61009
*
* @dataProvider data_html_containing_various_kinds_of_html_comments
*
* @param string $html_comment HTML containing a comment; must not be a valid comment
* but must be syntax which a browser interprets as a comment.
* @param string $expected_output How `wp_kses()` ought to transform the comment.
*/
public function wp_kses_preserves_html_comments( $html_comment, $expected_output ) {
public function test_wp_kses_preserves_html_comments( $html_comment, $expected_output ) {
$this->assertSame(
$expected_output,
wp_kses( $html_comment, array() ),
Expand All @@ -1956,6 +1958,7 @@ public function wp_kses_preserves_html_comments( $html_comment, $expected_output
public static function data_html_containing_various_kinds_of_html_comments() {
return array(
'Normative HTML comment' => array( 'before<!-- this is a comment -->after', 'before<!-- this is a comment -->after' ),
'Normative HTML comment with invalid closer' => array( 'before<!-- this is a comment --!>after', 'before<!-- this is a comment -->after' ),
'Closing tag with invalid tag name' => array( 'before<//not a tag>after', 'before<//not a tag>after' ),
'Incorrectly opened comment (Markup declaration)' => array( 'before<!also not a tag>after', 'before<!also not a tag>after' ),
'Incorrectly opened comment (Question mark)' => array( 'before<?also not a tag>after', 'before<?also not a tag>after' ),
Expand Down

0 comments on commit 1168050

Please sign in to comment.