diff --git a/lib/experimental/html/class-wp-html-tag-processor.php b/lib/experimental/html/class-wp-html-tag-processor.php index 0d1c0756e843c..13fa6100e1d19 100644 --- a/lib/experimental/html/class-wp-html-tag-processor.php +++ b/lib/experimental/html/class-wp-html-tag-processor.php @@ -908,7 +908,9 @@ public function get_attribute( $name ) { return true; } - return substr( $this->html, $attribute->value_starts_at, $attribute->value_length ); + $raw_value = substr( $this->html, $attribute->value_starts_at, $attribute->value_length ); + + return html_entity_decode( $raw_value ); } /** @@ -945,6 +947,8 @@ public function get_tag() { * - When `true` is passed as the value, then only the attribute name is added to the tag. * - When `false` is passed, the attribute gets removed if it existed before. * + * For string attributes, the value is escaped using the `esc_attr` function. + * * @since 6.2.0 * * @param string $name The attribute name to target. @@ -968,8 +972,7 @@ public function set_attribute( $name, $value ) { if ( true === $value ) { $updated_attribute = $name; } else { - // @TODO: What escaping and sanitization do we need here? - $escaped_new_value = str_replace( '"', '"', $value ); + $escaped_new_value = esc_attr( $value ); $updated_attribute = "{$name}=\"{$escaped_new_value}\""; } diff --git a/phpunit/html/wp-html-tag-processor-test.php b/phpunit/html/wp-html-tag-processor-standalone-test.php similarity index 92% rename from phpunit/html/wp-html-tag-processor-test.php rename to phpunit/html/wp-html-tag-processor-standalone-test.php index 41bf04a138abc..a09e982f7cb44 100644 --- a/phpunit/html/wp-html-tag-processor-test.php +++ b/phpunit/html/wp-html-tag-processor-standalone-test.php @@ -1,14 +1,23 @@ Text'; const HTML_WITH_CLASSES = '
+ * $p = new WP_HTML_Tag_Processor( '' );
+ * $p->next_tag();
+ * $p->set_attribute('class', '" onclick="alert');
+ * echo $p;
+ * //
+ *
+ *
+ * To prevent it, `set_attribute` calls `esc_attr()` on its given values.
+ *
+ *
+ *
+ *
+ *
+ * @ticket 56299
+ *
+ * @dataProvider data_set_attribute_escapable_values
+ * @covers set_attribute
+ */
+ public function test_set_attribute_prevents_xss( $attribute_value ) {
+ $p = new WP_HTML_Tag_Processor( '' );
+ $p->next_tag();
+ $p->set_attribute( 'test', $attribute_value );
+
+ /*
+ * Testing the escaping is hard using tools that properly parse
+ * HTML because they might interpret the escaped values. It's hard
+ * with tools that don't understand HTML because they might get
+ * confused by improperly-escaped values.
+ *
+ * For this test, since we control the input HTML we're going to
+ * do what looks like the opposite of what we want to be doing with
+ * this library but are only doing so because we have full control
+ * over the content and because we want to look at the raw values.
+ */
+ $match = null;
+ preg_match( '~^$~', (string) $p, $match );
+ list( , $actual_value ) = $match;
+
+ $this->assertEquals( $actual_value, '"' . esc_attr( $attribute_value ) . '"' );
+ }
+
+ /**
+ * Data provider with HTML attribute values that might need escaping.
+ */
+ public function data_set_attribute_escapable_values() {
+ return array(
+ array( '"' ),
+ array( '"' ),
+ array( '&' ),
+ array( '&' ),
+ array( '€' ),
+ array( "'" ),
+ array( '<>' ),
+ array( '"";' ),
+ array( '" onclick="alert(\'1\');">' ),
+ );
+ }
+
/**
* @ticket 56299
*
@@ -999,12 +1081,12 @@ public function data_malformed_tag() {
$examples['HTML tag opening inside attribute value'] = array(
'This <is> a <strong is="true">thing.
test',
- 'This <is> a <strong is="true">thing.
test',
+ 'This <is> a <strong is="true">thing.
test',
);
$examples['HTML tag brackets in attribute values and data markup'] = array(
'This <is> a <strong is="true">thing.
test',
- 'This <is> a <strong is="true">thing.
test',
+ 'This <is> a <strong is="true">thing.
test',
);
$examples['Single and double quotes in attribute value'] = array(
diff --git a/phpunit/html/wp-html-tag-processor-wp-test.php b/phpunit/html/wp-html-tag-processor-wp-test.php
new file mode 100644
index 0000000000000..6c3a30340b267
--- /dev/null
+++ b/phpunit/html/wp-html-tag-processor-wp-test.php
@@ -0,0 +1,91 @@
+
+ * $p = new WP_HTML_Tag_Processor( '' );
+ * $p->next_tag();
+ * $p->set_attribute('class', '" onclick="alert');
+ * echo $p;
+ * //
+ *
+ *
+ * To prevent it, `set_attribute` calls `esc_attr()` on its given values.
+ *
+ *
+ *
+ *
+ *
+ * @ticket 56299
+ *
+ * @dataProvider data_set_attribute_escapable_values
+ * @covers set_attribute
+ */
+ public function test_set_attribute_prevents_xss( $value_to_set, $expected_result ) {
+ $p = new WP_HTML_Tag_Processor( '' );
+ $p->next_tag();
+ $p->set_attribute( 'test', $value_to_set );
+
+ /*
+ * Testing the escaping is hard using tools that properly parse
+ * HTML because they might interpret the escaped values. It's hard
+ * with tools that don't understand HTML because they might get
+ * confused by improperly-escaped values.
+ *
+ * For this test, since we control the input HTML we're going to
+ * do what looks like the opposite of what we want to be doing with
+ * this library but are only doing so because we have full control
+ * over the content and because we want to look at the raw values.
+ */
+ $match = null;
+ preg_match( '~^$~', (string) $p, $match );
+ list( , $actual_value ) = $match;
+
+ $this->assertEquals( $actual_value, '"' . $expected_result . '"' );
+ }
+
+ /**
+ * Data provider with HTML attribute values that might need escaping.
+ */
+ public function data_set_attribute_escapable_values() {
+ return array(
+ array( '"', '"' ),
+ array( '"', '"' ),
+ array( '&', '&' ),
+ array( '&', '&' ),
+ array( '€', '€' ),
+ array( "'", ''' ),
+ array( '<>', '<>' ),
+ array( '"";', '"";' ),
+ array(
+ '" onclick="alert(\'1\');">',
+ '" onclick="alert('1');"><span onclick=""></span><script>alert("1")</script>',
+ ),
+ );
+ }
+
+}