From 11680509cef5625c7d6c251e9deb6bdadae6ea8b Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Sun, 16 Jun 2024 22:40:43 +0300 Subject: [PATCH] Expand support but exclude legacy CDATA behavioral changes. --- src/wp-includes/kses.php | 92 +++++++++++++++++++++++------------- tests/phpunit/tests/kses.php | 5 +- 2 files changed, 63 insertions(+), 34 deletions(-) diff --git a/src/wp-includes/kses.php b/src/wp-includes/kses.php index a9e8bbdd3ec5f..3846c532861a6 100644 --- a/src/wp-includes/kses.php +++ b/src/wp-includes/kses.php @@ -984,13 +984,17 @@ function wp_kses_split( $content, $allowed_html, $allowed_protocols ) { $token_pattern = <<|$)) # - Normative HTML comments. - | - ]*> # - Closing tags with invalid tag names. - ) + # Detect comments of various flavors before attempting to find tags. + ' ), '', $content ); + $transformed = str_replace( array( '', '--!>' ), '', $content ); - while ( ( $newstring = wp_kses( $content, $allowed_html, $allowed_protocols ) ) !== $content ) { - $content = $newstring; - } + do { + $prev = $transformed; + $transformed = wp_kses( $transformed, $allowed_html, $allowed_protocols ); + } while ( $prev !== $transformed ); - if ( '' === $content ) { + if ( '' === $transformed ) { return ''; } // Prevent multiple dashes in comments. - $content = preg_replace( '/--+/', '-', $content ); + $transformed = preg_replace( '/--+/', '-', $transformed ); // Prevent three dashes closing a comment. - $content = preg_replace( '/-$/', '', $content ); + $transformed = preg_replace( '/-$/', '', $transformed ); + + return ""; + } + + /* + * When a closing tag appears with a name that isn't a valid tag name, + * it must be interpreted as an HTML comment. It extends until the + * first `>` character after the initial opening `/[^a-zA-Z]) + | + (?P[?!]) + ) + [^>]* + >$ +~x +REGEX; + + /* + * Since there are variant legacy behaviors regarding invalid CDATA sections, omit + * them from this processing until all the downstream tests and code is updated. + */ + $is_invalid_cdata = str_starts_with( $content, '"; + return "<{$content[1]}{$transformed}>"; } // It's seriously malformed. diff --git a/tests/phpunit/tests/kses.php b/tests/phpunit/tests/kses.php index ee09677c1b924..481e0587525e4 100644 --- a/tests/phpunit/tests/kses.php +++ b/tests/phpunit/tests/kses.php @@ -1936,11 +1936,13 @@ public function filter_wp_kses_object_added_in_html_filter( $tags, $context ) { * * @ticket 61009 * + * @dataProvider data_html_containing_various_kinds_of_html_comments + * * @param string $html_comment HTML containing a comment; must not be a valid comment * but must be syntax which a browser interprets as a comment. * @param string $expected_output How `wp_kses()` ought to transform the comment. */ - public function wp_kses_preserves_html_comments( $html_comment, $expected_output ) { + public function test_wp_kses_preserves_html_comments( $html_comment, $expected_output ) { $this->assertSame( $expected_output, wp_kses( $html_comment, array() ), @@ -1956,6 +1958,7 @@ public function wp_kses_preserves_html_comments( $html_comment, $expected_output public static function data_html_containing_various_kinds_of_html_comments() { return array( 'Normative HTML comment' => array( 'beforeafter', 'beforeafter' ), + 'Normative HTML comment with invalid closer' => array( 'beforeafter', 'beforeafter' ), 'Closing tag with invalid tag name' => array( 'beforeafter', 'beforeafter' ), 'Incorrectly opened comment (Markup declaration)' => array( 'beforeafter', 'beforeafter' ), 'Incorrectly opened comment (Question mark)' => array( 'beforeafter', 'beforeafter' ),