diff --git a/lib/experimental/html/class-wp-html-tag-processor.php b/lib/experimental/html/class-wp-html-tag-processor.php index 0d1c0756e843c..13fa6100e1d19 100644 --- a/lib/experimental/html/class-wp-html-tag-processor.php +++ b/lib/experimental/html/class-wp-html-tag-processor.php @@ -908,7 +908,9 @@ public function get_attribute( $name ) { return true; } - return substr( $this->html, $attribute->value_starts_at, $attribute->value_length ); + $raw_value = substr( $this->html, $attribute->value_starts_at, $attribute->value_length ); + + return html_entity_decode( $raw_value ); } /** @@ -945,6 +947,8 @@ public function get_tag() { * - When `true` is passed as the value, then only the attribute name is added to the tag. * - When `false` is passed, the attribute gets removed if it existed before. * + * For string attributes, the value is escaped using the `esc_attr` function. + * * @since 6.2.0 * * @param string $name The attribute name to target. @@ -968,8 +972,7 @@ public function set_attribute( $name, $value ) { if ( true === $value ) { $updated_attribute = $name; } else { - // @TODO: What escaping and sanitization do we need here? - $escaped_new_value = str_replace( '"', '"', $value ); + $escaped_new_value = esc_attr( $value ); $updated_attribute = "{$name}=\"{$escaped_new_value}\""; } diff --git a/phpunit/html/wp-html-tag-processor-test.php b/phpunit/html/wp-html-tag-processor-standalone-test.php similarity index 92% rename from phpunit/html/wp-html-tag-processor-test.php rename to phpunit/html/wp-html-tag-processor-standalone-test.php index 41bf04a138abc..a09e982f7cb44 100644 --- a/phpunit/html/wp-html-tag-processor-test.php +++ b/phpunit/html/wp-html-tag-processor-standalone-test.php @@ -1,14 +1,23 @@ Text'; const HTML_WITH_CLASSES = '
Text
'; const HTML_MALFORMED = '
Back to notifications
'; @@ -134,6 +143,17 @@ public function test_get_attribute_returns_string_for_truthy_attributes() { $this->assertSame( 'true', $p->get_attribute( 'hidden' ), 'Accessing a hidden="true" attribute value did not return "true"' ); } + /** + * @ticket 56299 + * + * @covers WP_HTML_Tag_Processor::get_attribute + */ + public function test_get_attribute_decodes_html_character_references() { + $p = new WP_HTML_Tag_Processor( '
' ); + $p->next_tag(); + $this->assertSame( 'the "grande" is < 32oz†', $p->get_attribute( 'id' ), 'HTML Attribute value was returned without decoding character references' ); + } + /** * @ticket 56299 * @@ -235,6 +255,68 @@ public function test_set_attribute_on_a_non_existing_tag_does_not_change_the_mar ); } + /** + * Passing a double quote inside of an attribute values could lead to an XSS attack as follows: + * + * + * $p = new WP_HTML_Tag_Processor( '
' ); + * $p->next_tag(); + * $p->set_attribute('class', '" onclick="alert'); + * echo $p; + * //
+ *
+ * + * To prevent it, `set_attribute` calls `esc_attr()` on its given values. + * + * + *
+ *
+ * + * @ticket 56299 + * + * @dataProvider data_set_attribute_escapable_values + * @covers set_attribute + */ + public function test_set_attribute_prevents_xss( $attribute_value ) { + $p = new WP_HTML_Tag_Processor( '
' ); + $p->next_tag(); + $p->set_attribute( 'test', $attribute_value ); + + /* + * Testing the escaping is hard using tools that properly parse + * HTML because they might interpret the escaped values. It's hard + * with tools that don't understand HTML because they might get + * confused by improperly-escaped values. + * + * For this test, since we control the input HTML we're going to + * do what looks like the opposite of what we want to be doing with + * this library but are only doing so because we have full control + * over the content and because we want to look at the raw values. + */ + $match = null; + preg_match( '~^
$~', (string) $p, $match ); + list( , $actual_value ) = $match; + + $this->assertEquals( $actual_value, '"' . esc_attr( $attribute_value ) . '"' ); + } + + /** + * Data provider with HTML attribute values that might need escaping. + */ + public function data_set_attribute_escapable_values() { + return array( + array( '"' ), + array( '"' ), + array( '&' ), + array( '&' ), + array( '€' ), + array( "'" ), + array( '<>' ), + array( '"";' ), + array( '" onclick="alert(\'1\');">' ), + ); + } + /** * @ticket 56299 * @@ -999,12 +1081,12 @@ public function data_malformed_tag() { $examples['HTML tag opening inside attribute value'] = array( '
This <is> a <strong is="true">thing.
test', - '
This <is> a <strong is="true">thing.
test', + '
This <is> a <strong is="true">thing.
test', ); $examples['HTML tag brackets in attribute values and data markup'] = array( '
This <is> a <strong is="true">thing.
test', - '
This <is> a <strong is="true">thing.
test', + '
This <is> a <strong is="true">thing.
test', ); $examples['Single and double quotes in attribute value'] = array( diff --git a/phpunit/html/wp-html-tag-processor-wp-test.php b/phpunit/html/wp-html-tag-processor-wp-test.php new file mode 100644 index 0000000000000..6c3a30340b267 --- /dev/null +++ b/phpunit/html/wp-html-tag-processor-wp-test.php @@ -0,0 +1,91 @@ + + * $p = new WP_HTML_Tag_Processor( '
' ); + * $p->next_tag(); + * $p->set_attribute('class', '" onclick="alert'); + * echo $p; + * //
+ * + * + * To prevent it, `set_attribute` calls `esc_attr()` on its given values. + * + * + *
+ *
+ * + * @ticket 56299 + * + * @dataProvider data_set_attribute_escapable_values + * @covers set_attribute + */ + public function test_set_attribute_prevents_xss( $value_to_set, $expected_result ) { + $p = new WP_HTML_Tag_Processor( '
' ); + $p->next_tag(); + $p->set_attribute( 'test', $value_to_set ); + + /* + * Testing the escaping is hard using tools that properly parse + * HTML because they might interpret the escaped values. It's hard + * with tools that don't understand HTML because they might get + * confused by improperly-escaped values. + * + * For this test, since we control the input HTML we're going to + * do what looks like the opposite of what we want to be doing with + * this library but are only doing so because we have full control + * over the content and because we want to look at the raw values. + */ + $match = null; + preg_match( '~^
$~', (string) $p, $match ); + list( , $actual_value ) = $match; + + $this->assertEquals( $actual_value, '"' . $expected_result . '"' ); + } + + /** + * Data provider with HTML attribute values that might need escaping. + */ + public function data_set_attribute_escapable_values() { + return array( + array( '"', '"' ), + array( '"', '"' ), + array( '&', '&' ), + array( '&', '&' ), + array( '€', '€' ), + array( "'", ''' ), + array( '<>', '<>' ), + array( '"";', '&quot";' ), + array( + '" onclick="alert(\'1\');">', + '" onclick="alert('1');"><span onclick=""></span><script>alert("1")</script>', + ), + ); + } + +}