From 2ca1069a83158cb6fcaf4e273fb7c6b66f6c661b Mon Sep 17 00:00:00 2001 From: Jonathan Desrosiers Date: Wed, 7 Aug 2024 15:50:33 +0000 Subject: [PATCH 01/10] External Libraries: Update the `whatwg-fetch` polyfill library. This updates the `whatwg-fetch` library from version `3.6.17` to `3.6.20`, the latest current version. This library is included and registered within WordPress as the `wp-polyfill-fetch` script but is no longer used by WordPress itself. Updates are provided as a courtesy, and all projects using this polyfill should reevaluate usage. Props manooweb. Fixes #60514. git-svn-id: https://develop.svn.wordpress.org/trunk@58860 602fd350-edb4-49c9-b593-d223f7449a82 --- package-lock.json | 14 +++++++------- package.json | 2 +- src/wp-includes/script-loader.php | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/package-lock.json b/package-lock.json index 4906648085f9d..c5c985d2137f4 100644 --- a/package-lock.json +++ b/package-lock.json @@ -98,7 +98,7 @@ "react-is": "18.3.1", "regenerator-runtime": "0.14.1", "underscore": "1.13.6", - "whatwg-fetch": "3.6.17", + "whatwg-fetch": "3.6.20", "wicg-inert": "3.1.2" }, "devDependencies": { @@ -33887,9 +33887,9 @@ } }, "node_modules/whatwg-fetch": { - "version": "3.6.17", - "resolved": "https://registry.npmjs.org/whatwg-fetch/-/whatwg-fetch-3.6.17.tgz", - "integrity": "sha512-c4ghIvG6th0eudYwKZY5keb81wtFz9/WeAHAoy8+r18kcWlitUIrmGFQ2rWEl4UCKUilD3zCLHOIPheHx5ypRQ==" + "version": "3.6.20", + "resolved": "https://registry.npmjs.org/whatwg-fetch/-/whatwg-fetch-3.6.20.tgz", + "integrity": "sha512-EqhiFU6daOA8kpjOWTL0olhVOF3i7OrFzSYiGsEMB8GcXS+RrzauAERX65xMeNWVqxA6HXH2m69Z9LaKKdisfg==" }, "node_modules/whatwg-mimetype": { "version": "3.0.0", @@ -59511,9 +59511,9 @@ } }, "whatwg-fetch": { - "version": "3.6.17", - "resolved": "https://registry.npmjs.org/whatwg-fetch/-/whatwg-fetch-3.6.17.tgz", - "integrity": "sha512-c4ghIvG6th0eudYwKZY5keb81wtFz9/WeAHAoy8+r18kcWlitUIrmGFQ2rWEl4UCKUilD3zCLHOIPheHx5ypRQ==" + "version": "3.6.20", + "resolved": "https://registry.npmjs.org/whatwg-fetch/-/whatwg-fetch-3.6.20.tgz", + "integrity": "sha512-EqhiFU6daOA8kpjOWTL0olhVOF3i7OrFzSYiGsEMB8GcXS+RrzauAERX65xMeNWVqxA6HXH2m69Z9LaKKdisfg==" }, "whatwg-mimetype": { "version": "3.0.0", diff --git a/package.json b/package.json index 06fcc6829a8eb..7c55cb8bc1278 100644 --- a/package.json +++ b/package.json @@ -167,7 +167,7 @@ "react-is": "18.3.1", "regenerator-runtime": "0.14.1", "underscore": "1.13.6", - "whatwg-fetch": "3.6.17", + "whatwg-fetch": "3.6.20", "wicg-inert": "3.1.2" }, "scripts": { diff --git a/src/wp-includes/script-loader.php b/src/wp-includes/script-loader.php index 121adc646e0ad..24a35a291911e 100644 --- a/src/wp-includes/script-loader.php +++ b/src/wp-includes/script-loader.php @@ -114,7 +114,7 @@ function wp_default_packages_vendor( $scripts ) { 'regenerator-runtime' => '0.14.1', 'moment' => '2.29.4', 'lodash' => '4.17.21', - 'wp-polyfill-fetch' => '3.6.17', + 'wp-polyfill-fetch' => '3.6.20', 'wp-polyfill-formdata' => '4.0.10', 'wp-polyfill-node-contains' => '4.8.0', 'wp-polyfill-url' => '3.6.4', From 20cb3098c825cef99371f72447aa4986426c331f Mon Sep 17 00:00:00 2001 From: Tonya Mork Date: Wed, 7 Aug 2024 19:56:43 +0000 Subject: [PATCH 02/10] Docs: Remove bugfix since annotations from WP_Theme_JSON::get_block_nodes(). Removes the `@since` bugfix annotations from `WP_Theme_JSON::get_block_nodes()` docblock. Bugfixes are not annotated in docblocks. Follow-up to [58856]. See #61704. git-svn-id: https://develop.svn.wordpress.org/trunk@58864 602fd350-edb4-49c9-b593-d223f7449a82 --- src/wp-includes/class-wp-theme-json.php | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/wp-includes/class-wp-theme-json.php b/src/wp-includes/class-wp-theme-json.php index 0cb7405ab548d..894ce25a1cf3a 100644 --- a/src/wp-includes/class-wp-theme-json.php +++ b/src/wp-includes/class-wp-theme-json.php @@ -2738,8 +2738,6 @@ private static function get_block_nodes( $theme_json, $selectors = array(), $opt * @since 6.6.0 Setting a min-height of HTML when root styles have a background gradient or image. * Updated general global styles specificity to 0-1-0. * Fixed custom CSS output in block style variations. - * @since 6.6.1 Avoid applying `:root :where()` wrapper to top-level element-only selectors. - * @since 6.6.2 Avoid applying `:root :where()` wrapper to root selectors. * * @param array $block_metadata Metadata about the block to get styles for. * From f9af91cfa550fe94b60160e6ed44f7d8cad2b183 Mon Sep 17 00:00:00 2001 From: Sergey Biryukov Date: Thu, 8 Aug 2024 02:25:32 +0000 Subject: [PATCH 03/10] Docs: Correct alignment for `rest_insert_attachment` action DocBlock. Follow-up to [39348]. Props krupalpanchal. See #61608. git-svn-id: https://develop.svn.wordpress.org/trunk@58865 602fd350-edb4-49c9-b593-d223f7449a82 --- .../endpoints/class-wp-rest-attachments-controller.php | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/wp-includes/rest-api/endpoints/class-wp-rest-attachments-controller.php b/src/wp-includes/rest-api/endpoints/class-wp-rest-attachments-controller.php index aa73286102276..0c98a729e43f9 100644 --- a/src/wp-includes/rest-api/endpoints/class-wp-rest-attachments-controller.php +++ b/src/wp-includes/rest-api/endpoints/class-wp-rest-attachments-controller.php @@ -339,8 +339,7 @@ protected function insert_attachment( $request ) { * * @since 4.7.0 * - * @param WP_Post $attachment Inserted or updated attachment - * object. + * @param WP_Post $attachment Inserted or updated attachment object. * @param WP_REST_Request $request The request sent to the API. * @param bool $creating True when creating an attachment, false when updating. */ From 4026237dadd2f4c1f11ba4a89605e8f394c11f38 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Thu, 8 Aug 2024 04:24:03 +0000 Subject: [PATCH 04/10] HTML API: Ensure that `get_modifiable_text()` reads enqueued updates. When `set_modifiable_text()` was added to the Tag Processor, it was considered that the same information could be queried after setting its value and before proceeding to the next token, but unfortunately overlooked that if the starting modifiable text length was zero, then the read in `get_modifiable_text()` would ignore enqueued updates. In this patch, `get_modifiable_text()` will read any enqueued values before reading from the input HTML document to ensure consistency. Follow-up to [58829]. Props dmsnell, jonsurrell, ramonopoly. Fixes #61617. git-svn-id: https://develop.svn.wordpress.org/trunk@58866 602fd350-edb4-49c9-b593-d223f7449a82 --- .../html-api/class-wp-html-tag-processor.php | 8 +- .../wpHtmlTagProcessorModifiableText.php | 77 +++++++++++++++++++ 2 files changed, 82 insertions(+), 3 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index fcf418e856d5e..0ff2cdc4dd10d 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -614,7 +614,7 @@ class WP_HTML_Tag_Processor { * * @since 6.5.0 * - * @var string + * @var int */ private $text_length; @@ -2894,11 +2894,13 @@ public function get_comment_type(): ?string { * @return string */ public function get_modifiable_text(): string { - if ( null === $this->text_starts_at || 0 === $this->text_length ) { + $has_enqueued_update = isset( $this->lexical_updates['modifiable text'] ); + + if ( ! $has_enqueued_update && ( null === $this->text_starts_at || 0 === $this->text_length ) ) { return ''; } - $text = isset( $this->lexical_updates['modifiable text'] ) + $text = $has_enqueued_update ? $this->lexical_updates['modifiable text']->text : substr( $this->html, $this->text_starts_at, $this->text_length ); diff --git a/tests/phpunit/tests/html-api/wpHtmlTagProcessorModifiableText.php b/tests/phpunit/tests/html-api/wpHtmlTagProcessorModifiableText.php index 717d061016a2d..03c65321e34bd 100644 --- a/tests/phpunit/tests/html-api/wpHtmlTagProcessorModifiableText.php +++ b/tests/phpunit/tests/html-api/wpHtmlTagProcessorModifiableText.php @@ -39,6 +39,83 @@ public function test_get_modifiable_text_is_idempotent() { } } + /** + * Ensures that `get_modifiable_text()` reads enqueued updates when read + * from after writing; guarantees consistency through writes. + * + * @ticket 61617 + */ + public function test_get_modifiable_text_is_consistent_after_writes() { + $before = 'just some text'; + $after = 'different text'; + $processor = new WP_HTML_Tag_Processor( $before ); + $processor->next_token(); + + $this->assertSame( + '#text', + $processor->get_token_name(), + "Should have found text node but found '{$processor->get_token_name()}' instead: check test setup." + ); + + $this->assertSame( + $before, + $processor->get_modifiable_text(), + 'Should have found initial test text: check test setup.' + ); + + $processor->set_modifiable_text( $after ); + $this->assertSame( + $after, + $processor->get_modifiable_text(), + 'Should have found enqueued updated text.' + ); + + $processor->get_updated_html(); + $this->assertSame( + $after, + $processor->get_modifiable_text(), + 'Should have found updated text.' + ); + } + + /** + * Ensures that `get_modifiable_text()` reads enqueued updates when read from after + * writing when starting from an empty text; guarantees consistency through writes. + * + * @ticket 61617 + */ + public function test_get_modifiable_text_is_consistent_after_writes_to_empty_text() { + $after = 'different text'; + $processor = new WP_HTML_Tag_Processor( '' ); + $processor->next_token(); + + $this->assertSame( + 'SCRIPT', + $processor->get_token_name(), + "Should have found text node but found '{$processor->get_token_name()}' instead: check test setup." + ); + + $this->assertSame( + '', + $processor->get_modifiable_text(), + 'Should have found initial test text: check test setup.' + ); + + $processor->set_modifiable_text( $after ); + $this->assertSame( + $after, + $processor->get_modifiable_text(), + 'Should have found enqueued updated text.' + ); + + $processor->get_updated_html(); + $this->assertSame( + $after, + $processor->get_modifiable_text(), + 'Should have found updated text.' + ); + } + /** * Ensures that updates to modifiable text that are shorter than the * original text do not cause the parser to lose its orientation. From de084d7d0e302027fb8f0a99bbeedf88e079efee Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Thu, 8 Aug 2024 07:23:53 +0000 Subject: [PATCH 05/10] HTML API: Add support for SVG and MathML (Foreign content) As part of work to add more spec support to the HTML API, this patch adds support for SVG and MathML elements, or more generally, "foreign content." The rules in foreign content are a mix of XML and HTML parsing rules and introduce additional complexity into the processor, but is important in order to avoid getting lost when inside these elements. Developed in https://github.com/wordpress/wordpress-develop/pull/6006 Discussed in https://core.trac.wordpress.org/ticket/61576 Props: dmsnell, jonsurrell, westonruter. See #61576. git-svn-id: https://develop.svn.wordpress.org/trunk@58867 602fd350-edb4-49c9-b593-d223f7449a82 --- .../html-api/class-wp-html-open-elements.php | 113 ++-- .../class-wp-html-processor-state.php | 12 - .../html-api/class-wp-html-processor.php | 598 ++++++++++++++++-- .../html-api/class-wp-html-tag-processor.php | 464 +++++++++++++- .../html-api/class-wp-html-token.php | 19 + .../tests/html-api/wpHtmlProcessor.php | 31 - .../html-api/wpHtmlProcessorBreadcrumbs.php | 43 -- .../html-api/wpHtmlProcessorHtml5lib.php | 70 +- .../wpHtmlSupportRequiredOpenElements.php | 219 ------- .../wpHtmlTagProcessor-token-scanning.php | 61 ++ 10 files changed, 1199 insertions(+), 431 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-open-elements.php b/src/wp-includes/html-api/class-wp-html-open-elements.php index c760009ce0c28..5ce1f8feb552c 100644 --- a/src/wp-includes/html-api/class-wp-html-open-elements.php +++ b/src/wp-includes/html-api/class-wp-html-open-elements.php @@ -113,13 +113,13 @@ public function set_push_handler( Closure $handler ): void { * * @param int $nth Retrieve the nth item on the stack, with 1 being * the top element, 2 being the second, etc... - * @return string|null Name of the node on the stack at the given location, - * or `null` if the location isn't on the stack. + * @return WP_HTML_Token|null Name of the node on the stack at the given location, + * or `null` if the location isn't on the stack. */ - public function at( int $nth ): ?string { + public function at( int $nth ): ?WP_HTML_Token { foreach ( $this->walk_down() as $item ) { if ( 0 === --$nth ) { - return $item->node_name; + return $item; } } @@ -242,18 +242,22 @@ public function current_node_is( string $identity ): bool { */ public function has_element_in_specific_scope( string $tag_name, $termination_list ): bool { foreach ( $this->walk_up() as $node ) { - if ( $node->node_name === $tag_name ) { + $namespaced_name = 'html' === $node->namespace + ? $node->node_name + : "{$node->namespace} {$node->node_name}"; + + if ( $namespaced_name === $tag_name ) { return true; } if ( '(internal: H1 through H6 - do not use)' === $tag_name && - in_array( $node->node_name, array( 'H1', 'H2', 'H3', 'H4', 'H5', 'H6' ), true ) + in_array( $namespaced_name, array( 'H1', 'H2', 'H3', 'H4', 'H5', 'H6' ), true ) ) { return true; } - if ( in_array( $node->node_name, $termination_list, true ) ) { + if ( in_array( $namespaced_name, $termination_list, true ) ) { return false; } } @@ -288,7 +292,7 @@ public function has_element_in_specific_scope( string $tag_name, $termination_li * > - SVG title * * @since 6.4.0 - * @since 6.7.0 Supports all required HTML elements. + * @since 6.7.0 Full support. * * @see https://html.spec.whatwg.org/#has-an-element-in-scope * @@ -309,19 +313,16 @@ public function has_element_in_scope( string $tag_name ): bool { 'OBJECT', 'TEMPLATE', - /* - * @todo Support SVG and MathML nodes when support for foreign content is added. - * - * - MathML mi - * - MathML mo - * - MathML mn - * - MathML ms - * - MathML mtext - * - MathML annotation-xml - * - SVG foreignObject - * - SVG desc - * - SVG title - */ + 'math MI', + 'math MO', + 'math MN', + 'math MS', + 'math MTEXT', + 'math ANNOTATION-XML', + + 'svg FOREIGNOBJECT', + 'svg DESC', + 'svg TITLE', ) ); } @@ -363,19 +364,16 @@ public function has_element_in_list_item_scope( string $tag_name ): bool { 'TEMPLATE', 'UL', - /* - * @todo Support SVG and MathML nodes when support for foreign content is added. - * - * - MathML mi - * - MathML mo - * - MathML mn - * - MathML ms - * - MathML mtext - * - MathML annotation-xml - * - SVG foreignObject - * - SVG desc - * - SVG title - */ + 'math MI', + 'math MO', + 'math MN', + 'math MS', + 'math MTEXT', + 'math ANNOTATION-XML', + + 'svg FOREIGNOBJECT', + 'svg DESC', + 'svg TITLE', ) ); } @@ -413,19 +411,16 @@ public function has_element_in_button_scope( string $tag_name ): bool { 'OBJECT', 'TEMPLATE', - /* - * @todo Support SVG and MathML nodes when support for foreign content is added. - * - * - MathML mi - * - MathML mo - * - MathML mn - * - MathML ms - * - MathML mtext - * - MathML annotation-xml - * - SVG foreignObject - * - SVG desc - * - SVG title - */ + 'math MI', + 'math MO', + 'math MN', + 'math MS', + 'math MTEXT', + 'math ANNOTATION-XML', + + 'svg FOREIGNOBJECT', + 'svg DESC', + 'svg TITLE', ) ); } @@ -692,11 +687,15 @@ public function walk_up( ?WP_HTML_Token $above_this_node = null ) { * @param WP_HTML_Token $item Element that was added to the stack of open elements. */ public function after_element_push( WP_HTML_Token $item ): void { + $namespaced_name = 'html' === $item->namespace + ? $item->node_name + : "{$item->namespace} {$item->node_name}"; + /* * When adding support for new elements, expand this switch to trap * cases where the precalculated value needs to change. */ - switch ( $item->node_name ) { + switch ( $namespaced_name ) { case 'APPLET': case 'BUTTON': case 'CAPTION': @@ -707,6 +706,15 @@ public function after_element_push( WP_HTML_Token $item ): void { case 'MARQUEE': case 'OBJECT': case 'TEMPLATE': + case 'math MI': + case 'math MO': + case 'math MN': + case 'math MS': + case 'math MTEXT': + case 'math ANNOTATION-XML': + case 'svg FOREIGNOBJECT': + case 'svg DESC': + case 'svg TITLE': $this->has_p_in_button_scope = false; break; @@ -750,6 +758,15 @@ public function after_element_pop( WP_HTML_Token $item ): void { case 'MARQUEE': case 'OBJECT': case 'TEMPLATE': + case 'math MI': + case 'math MO': + case 'math MN': + case 'math MS': + case 'math MTEXT': + case 'math ANNOTATION-XML': + case 'svg FOREIGNOBJECT': + case 'svg DESC': + case 'svg TITLE': $this->has_p_in_button_scope = $this->has_element_in_button_scope( 'P' ); break; } diff --git a/src/wp-includes/html-api/class-wp-html-processor-state.php b/src/wp-includes/html-api/class-wp-html-processor-state.php index 97f6da95a0012..16875c4ac1b2b 100644 --- a/src/wp-includes/html-api/class-wp-html-processor-state.php +++ b/src/wp-includes/html-api/class-wp-html-processor-state.php @@ -299,18 +299,6 @@ class WP_HTML_Processor_State { */ const INSERTION_MODE_AFTER_AFTER_FRAMESET = 'insertion-mode-after-after-frameset'; - /** - * In foreign content insertion mode for full HTML parser. - * - * @since 6.7.0 - * - * @see https://html.spec.whatwg.org/#parsing-main-inforeign - * @see WP_HTML_Processor_State::$insertion_mode - * - * @var string - */ - const INSERTION_MODE_IN_FOREIGN_CONTENT = 'insertion-mode-in-foreign-content'; - /** * No-quirks mode document compatability mode. * diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 39ba43e467d5c..3820fe027723d 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -307,14 +307,14 @@ public static function create_fragment( $html, $context = '', $encoding = $processor->bookmarks['root-node'] = new WP_HTML_Span( 0, 0 ); $processor->bookmarks['context-node'] = new WP_HTML_Span( 0, 0 ); - $processor->state->stack_of_open_elements->push( - new WP_HTML_Token( - 'root-node', - 'HTML', - false - ) + $root_node = new WP_HTML_Token( + 'root-node', + 'HTML', + false ); + $processor->state->stack_of_open_elements->push( $root_node ); + $context_node = new WP_HTML_Token( 'context-node', $processor->state->context_node[0], @@ -392,6 +392,8 @@ function ( WP_HTML_Token $token ): void { $same_node = isset( $this->state->current_token ) && $token->node_name === $this->state->current_token->node_name; $provenance = ( ! $same_node || $is_virtual ) ? 'virtual' : 'real'; $this->element_queue[] = new WP_HTML_Stack_Event( $token, WP_HTML_Stack_Event::PUSH, $provenance ); + + $this->change_parsing_namespace( $token->namespace ); } ); @@ -401,6 +403,12 @@ function ( WP_HTML_Token $token ): void { $same_node = isset( $this->state->current_token ) && $token->node_name === $this->state->current_token->node_name; $provenance = ( ! $same_node || $is_virtual ) ? 'virtual' : 'real'; $this->element_queue[] = new WP_HTML_Stack_Event( $token, WP_HTML_Stack_Event::POP, $provenance ); + $adjusted_current_node = $this->get_adjusted_current_node(); + $this->change_parsing_namespace( + $adjusted_current_node + ? $adjusted_current_node->namespace + : 'html' + ); } ); @@ -767,19 +775,20 @@ public function matches_breadcrumbs( $breadcrumbs ): bool { * foreign content will also act just like a void tag, immediately * closing as soon as the processor advances to the next token. * - * @since 6.6.0 + * @todo Review the self-closing logic when no node is present, ensure it + * matches the expectations in `step()`. * - * @todo When adding support for foreign content, ensure that - * this returns false for self-closing elements in the - * SVG and MathML namespace. + * @since 6.6.0 * * @param WP_HTML_Token|null $node Optional. Node to examine, if provided. * Default is to examine current node. * @return bool|null Whether to expect a closer for the currently-matched node, * or `null` if not matched on any token. */ - public function expects_closer( $node = null ): ?bool { - $token_name = $node->node_name ?? $this->get_token_name(); + public function expects_closer( WP_HTML_Token $node = null ): ?bool { + $token_name = $node->node_name ?? $this->get_token_name(); + $token_namespace = $node->namespace ?? $this->get_namespace(); + if ( ! isset( $token_name ) ) { return null; } @@ -792,7 +801,9 @@ public function expects_closer( $node = null ): ?bool { // Void elements. self::is_void( $token_name ) || // Special atomic elements. - in_array( $token_name, array( 'IFRAME', 'NOEMBED', 'NOFRAMES', 'SCRIPT', 'STYLE', 'TEXTAREA', 'TITLE', 'XMP' ), true ) + ( 'html' === $token_namespace && in_array( $token_name, array( 'IFRAME', 'NOEMBED', 'NOFRAMES', 'SCRIPT', 'STYLE', 'TEXTAREA', 'TITLE', 'XMP' ), true ) ) || + // Self-closing elements in foreign content. + ( isset( $node ) && 'html' !== $node->namespace && $node->has_self_closing_flag ) ); } @@ -824,14 +835,9 @@ public function step( $node_to_process = self::PROCESS_NEXT_NODE ): bool { * * When moving on to the next node, therefore, if the bottom-most element * on the stack is a void element, it must be closed. - * - * @todo Once self-closing foreign elements and BGSOUND are supported, - * they must also be implicitly closed here too. BGSOUND is - * special since it's only self-closing if the self-closing flag - * is provided in the opening tag, otherwise it expects a tag closer. */ $top_node = $this->state->stack_of_open_elements->current_node(); - if ( isset( $top_node ) && ! static::expects_closer( $top_node ) ) { + if ( isset( $top_node ) && ! $this->expects_closer( $top_node ) ) { $this->state->stack_of_open_elements->pop(); } } @@ -848,14 +854,46 @@ public function step( $node_to_process = self::PROCESS_NEXT_NODE ): bool { return false; } - $this->state->current_token = new WP_HTML_Token( - $this->bookmark_token(), - $this->get_token_name(), - $this->has_self_closing_flag(), - $this->release_internal_bookmark_on_destruct + $adjusted_current_node = $this->get_adjusted_current_node(); + $is_closer = $this->is_tag_closer(); + $is_start_tag = WP_HTML_Tag_Processor::STATE_MATCHED_TAG === $this->parser_state && ! $is_closer; + $token_name = $this->get_token_name(); + + if ( self::REPROCESS_CURRENT_NODE !== $node_to_process ) { + $this->state->current_token = new WP_HTML_Token( + $this->bookmark_token(), + $token_name, + $this->has_self_closing_flag(), + $this->release_internal_bookmark_on_destruct + ); + } + + $parse_in_current_insertion_mode = ( + 0 === $this->state->stack_of_open_elements->count() || + 'html' === $adjusted_current_node->namespace || + ( + 'math' === $adjusted_current_node->integration_node_type && + ( + ( $is_start_tag && ! in_array( $token_name, array( 'MGLYPH', 'MALIGNMARK' ), true ) ) || + '#text' === $token_name + ) + ) || + ( + 'math' === $adjusted_current_node->namespace && + 'ANNOTATION-XML' === $adjusted_current_node->node_name && + $is_start_tag && 'SVG' === $token_name + ) || + ( + 'html' === $adjusted_current_node->integration_node_type && + ( $is_start_tag || '#text' === $token_name ) + ) ); try { + if ( ! $parse_in_current_insertion_mode ) { + return $this->step_in_foreign_content(); + } + switch ( $this->state->insertion_mode ) { case WP_HTML_Processor_State::INSERTION_MODE_INITIAL: return $this->step_initial(); @@ -923,9 +961,6 @@ public function step( $node_to_process = self::PROCESS_NEXT_NODE ): bool { case WP_HTML_Processor_State::INSERTION_MODE_AFTER_AFTER_FRAMESET: return $this->step_after_after_frameset(); - case WP_HTML_Processor_State::INSERTION_MODE_IN_FOREIGN_CONTENT: - return $this->step_in_foreign_content(); - // This should be unreachable but PHP doesn't have total type checking on switch. default: $this->bail( "Unaware of the requested parsing mode: '{$this->state->insertion_mode}'." ); @@ -1853,7 +1888,7 @@ private function step_in_body(): bool { case '+BODY': if ( 1 === $this->state->stack_of_open_elements->count() || - 'BODY' !== $this->state->stack_of_open_elements->at( 2 ) || + 'BODY' !== ( $this->state->stack_of_open_elements->at( 2 )->node_name ?? null ) || $this->state->stack_of_open_elements->contains( 'TEMPLATE' ) ) { // Ignore the token. @@ -1879,7 +1914,7 @@ private function step_in_body(): bool { case '+FRAMESET': if ( 1 === $this->state->stack_of_open_elements->count() || - 'BODY' !== $this->state->stack_of_open_elements->at( 2 ) || + 'BODY' !== ( $this->state->stack_of_open_elements->at( 2 )->node_name ?? null ) || false === $this->state->frameset_ok ) { // Ignore the token. @@ -2075,7 +2110,7 @@ private function step_in_body(): bool { 'ADDRESS' !== $node->node_name && 'DIV' !== $node->node_name && 'P' !== $node->node_name && - $this->is_special( $node->node_name ) + self::is_special( $node ) ) { /* * > If node is in the special category, but is not an address, div, @@ -2136,11 +2171,6 @@ private function step_in_body(): bool { * > "button", "center", "details", "dialog", "dir", "div", "dl", "fieldset", * > "figcaption", "figure", "footer", "header", "hgroup", "listing", "main", * > "menu", "nav", "ol", "pre", "search", "section", "summary", "ul" - * - * @todo This needs to check if the element in scope is an HTML element, meaning that - * when SVG and MathML support is added, this needs to differentiate between an - * HTML element of the given name, such as `
`, and a foreign element of - * the same given name. */ case '-ADDRESS': case '-ARTICLE': @@ -2411,11 +2441,6 @@ private function step_in_body(): bool { /* * > A end tag token whose tag name is one of: "applet", "marquee", "object" - * - * @todo This needs to check if the element in scope is an HTML element, meaning that - * when SVG and MathML support is added, this needs to differentiate between an - * HTML element of the given name, such as ``, and a foreign element of - * the same given name. */ case '-APPLET': case '-MARQUEE': @@ -2679,9 +2704,12 @@ private function step_in_body(): bool { * * These ought to be handled in the attribute methods. */ - - $this->bail( 'Cannot process MATH element, opening foreign content.' ); - break; + $this->state->current_token->namespace = 'math'; + $this->insert_html_element( $this->state->current_token ); + if ( $this->state->current_token->has_self_closing_flag ) { + $this->state->stack_of_open_elements->pop(); + } + return true; /* * > A start tag whose tag name is "svg" @@ -2695,9 +2723,12 @@ private function step_in_body(): bool { * * These ought to be handled in the attribute methods. */ - - $this->bail( 'Cannot process SVG element, opening foreign content.' ); - break; + $this->state->current_token->namespace = 'svg'; + $this->insert_html_element( $this->state->current_token ); + if ( $this->state->current_token->has_self_closing_flag ) { + $this->state->stack_of_open_elements->pop(); + } + return true; /* * > A start tag whose tag name is one of: "caption", "col", "colgroup", @@ -2737,17 +2768,11 @@ private function step_in_body(): bool { * close anything beyond its containing `P` or `DIV` element. */ foreach ( $this->state->stack_of_open_elements->walk_up() as $node ) { - /* - * @todo This needs to check if the element in scope is an HTML element, meaning that - * when SVG and MathML support is added, this needs to differentiate between an - * HTML element of the given name, such as ``, and a foreign element of - * the same given name. - */ - if ( $token_name === $node->node_name ) { + if ( 'html' === $node->namespace && $token_name === $node->node_name ) { break; } - if ( self::is_special( $node->node_name ) ) { + if ( self::is_special( $node ) ) { // This is a parse error, ignore the token. return $this->step(); } @@ -4069,7 +4094,284 @@ private function step_after_after_frameset(): bool { * @return bool Whether an element was found. */ private function step_in_foreign_content(): bool { - $this->bail( 'No support for parsing in the ' . WP_HTML_Processor_State::INSERTION_MODE_IN_FOREIGN_CONTENT . ' state.' ); + $tag_name = $this->get_token_name(); + $token_type = $this->get_token_type(); + $op_sigil = '#tag' === $token_type ? ( $this->is_tag_closer() ? '-' : '+' ) : ''; + $op = "{$op_sigil}{$tag_name}"; + + /* + * > A start tag whose name is "font", if the token has any attributes named "color", "face", or "size" + * + * This section drawn out above the switch to more easily incorporate + * the additional rules based on the presence of the attributes. + */ + if ( + '+FONT' === $op && + ( + null !== $this->get_attribute( 'color' ) || + null !== $this->get_attribute( 'face' ) || + null !== $this->get_attribute( 'size' ) + ) + ) { + $op = '+FONT with attributes'; + } + + switch ( $op ) { + case '#text': + /* + * > A character token that is U+0000 NULL + * + * This is handled by `get_modifiable_text()`. + */ + + /* + * Whitespace-only text does not affect the frameset-ok flag. + * It is probably inter-element whitespace, but it may also + * contain character references which decode only to whitespace. + */ + $text = $this->get_modifiable_text(); + if ( strlen( $text ) !== strspn( $text, " \t\n\f\r" ) ) { + $this->state->frameset_ok = false; + } + + $this->insert_foreign_element( $this->state->current_token, false ); + return true; + + /* + * > A comment token + */ + case '#cdata-section': + case '#comment': + case '#funky_comment': + $this->insert_foreign_element( $this->state->current_token, false ); + return true; + + /* + * > A DOCTYPE token + */ + case 'html': + // Parse error: ignore the token. + return $this->step(); + + /* + * > A start tag whose tag name is "b", "big", "blockquote", "body", "br", "center", + * > "code", "dd", "div", "dl", "dt", "em", "embed", "h1", "h2", "h3", "h4", "h5", + * > "h6", "head", "hr", "i", "img", "li", "listing", "menu", "meta", "nobr", "ol", + * > "p", "pre", "ruby", "s", "small", "span", "strong", "strike", "sub", "sup", + * > "table", "tt", "u", "ul", "var" + * + * > A start tag whose name is "font", if the token has any attributes named "color", "face", or "size" + * + * > An end tag whose tag name is "br", "p" + * + * Closing BR tags are always reported by the Tag Processor as opening tags. + */ + case '+B': + case '+BIG': + case '+BLOCKQUOTE': + case '+BODY': + case '+BR': + case '+CENTER': + case '+CODE': + case '+DD': + case '+DIV': + case '+DL': + case '+DT': + case '+EM': + case '+EMBED': + case '+H1': + case '+H2': + case '+H3': + case '+H4': + case '+H5': + case '+H6': + case '+HEAD': + case '+HR': + case '+I': + case '+IMG': + case '+LI': + case '+LISTING': + case '+MENU': + case '+META': + case '+NOBR': + case '+OL': + case '+P': + case '+PRE': + case '+RUBY': + case '+S': + case '+SMALL': + case '+SPAN': + case '+STRONG': + case '+STRIKE': + case '+SUB': + case '+SUP': + case '+TABLE': + case '+TT': + case '+U': + case '+UL': + case '+VAR': + case '+FONT with attributes': + case '-BR': + case '-P': + // @todo Indicate a parse error once it's possible. + foreach ( $this->state->stack_of_open_elements->walk_up() as $current_node ) { + if ( + 'math' === $current_node->integration_node_type || + 'html' === $current_node->integration_node_type || + 'html' === $current_node->namespace + ) { + break; + } + + $this->state->stack_of_open_elements->pop(); + } + return $this->step( self::REPROCESS_CURRENT_NODE ); + } + + /* + * > Any other start tag + */ + if ( ! $this->is_tag_closer() ) { + $this->insert_foreign_element( $this->state->current_token, false ); + + /* + * > If the token has its self-closing flag set, then run + * > the appropriate steps from the following list: + */ + if ( $this->state->current_token->has_self_closing_flag ) { + if ( 'SCRIPT' === $this->state->current_token->node_name && 'svg' === $this->state->current_token->namespace ) { + /* + * > Acknowledge the token's self-closing flag, and then act as + * > described in the steps for a "script" end tag below. + * + * @todo Verify that this shouldn't be handled by the rule for + * "An end tag whose name is 'script', if the current node + * is an SVG script element." + */ + goto in_foreign_content_any_other_end_tag; + } else { + $this->state->stack_of_open_elements->pop(); + } + } + return true; + } + + /* + * > An end tag whose name is "script", if the current node is an SVG script element. + */ + if ( $this->is_tag_closer() && 'SCRIPT' === $this->state->current_token->node_name && 'svg' === $this->state->current_token->namespace ) { + $this->state->stack_of_open_elements->pop(); + } + + /* + * > Any other end tag + */ + if ( $this->is_tag_closer() ) { + in_foreign_content_any_other_end_tag: + $node = $this->state->stack_of_open_elements->current_node(); + if ( $tag_name !== $node->node_name ) { + // @todo Indicate a parse error once it's possible. + } + in_foreign_content_end_tag_loop: + if ( $node === $this->state->stack_of_open_elements->at( 1 ) ) { + return true; + } + + /* + * > If node's tag name, converted to ASCII lowercase, is the same as the tag name + * > of the token, pop elements from the stack of open elements until node has + * > been popped from the stack, and then return. + */ + if ( 0 === strcasecmp( $node->node_name, $tag_name ) ) { + foreach ( $this->state->stack_of_open_elements->walk_up() as $item ) { + $this->state->stack_of_open_elements->pop(); + if ( $node === $item ) { + return true; + } + } + } + + foreach ( $this->state->stack_of_open_elements->walk_up( $node ) as $item ) { + $node = $item; + break; + } + + if ( 'html' !== $node->namespace ) { + goto in_foreign_content_end_tag_loop; + } + + switch ( $this->state->insertion_mode ) { + case WP_HTML_Processor_State::INSERTION_MODE_INITIAL: + return $this->step_initial(); + + case WP_HTML_Processor_State::INSERTION_MODE_BEFORE_HTML: + return $this->step_before_html(); + + case WP_HTML_Processor_State::INSERTION_MODE_BEFORE_HEAD: + return $this->step_before_head(); + + case WP_HTML_Processor_State::INSERTION_MODE_IN_HEAD: + return $this->step_in_head(); + + case WP_HTML_Processor_State::INSERTION_MODE_IN_HEAD_NOSCRIPT: + return $this->step_in_head_noscript(); + + case WP_HTML_Processor_State::INSERTION_MODE_AFTER_HEAD: + return $this->step_after_head(); + + case WP_HTML_Processor_State::INSERTION_MODE_IN_BODY: + return $this->step_in_body(); + + case WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE: + return $this->step_in_table(); + + case WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_TEXT: + return $this->step_in_table_text(); + + case WP_HTML_Processor_State::INSERTION_MODE_IN_CAPTION: + return $this->step_in_caption(); + + case WP_HTML_Processor_State::INSERTION_MODE_IN_COLUMN_GROUP: + return $this->step_in_column_group(); + + case WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY: + return $this->step_in_table_body(); + + case WP_HTML_Processor_State::INSERTION_MODE_IN_ROW: + return $this->step_in_row(); + + case WP_HTML_Processor_State::INSERTION_MODE_IN_CELL: + return $this->step_in_cell(); + + case WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT: + return $this->step_in_select(); + + case WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT_IN_TABLE: + return $this->step_in_select_in_table(); + + case WP_HTML_Processor_State::INSERTION_MODE_IN_TEMPLATE: + return $this->step_in_template(); + + case WP_HTML_Processor_State::INSERTION_MODE_AFTER_BODY: + return $this->step_after_body(); + + case WP_HTML_Processor_State::INSERTION_MODE_IN_FRAMESET: + return $this->step_in_frameset(); + + case WP_HTML_Processor_State::INSERTION_MODE_AFTER_FRAMESET: + return $this->step_after_frameset(); + + case WP_HTML_Processor_State::INSERTION_MODE_AFTER_AFTER_BODY: + return $this->step_after_after_body(); + + case WP_HTML_Processor_State::INSERTION_MODE_AFTER_AFTER_FRAMESET: + return $this->step_after_after_frameset(); + + // This should be unreachable but PHP doesn't have total type checking on switch. + default: + $this->bail( "Unaware of the requested parsing mode: '{$this->state->insertion_mode}'." ); + } + } } /* @@ -4099,6 +4401,19 @@ private function bookmark_token() { * HTML semantic overrides for Tag Processor */ + /** + * Indicates the namespace of the current token, or "html" if there is none. + * + * @return string One of "html", "math", or "svg". + */ + public function get_namespace(): string { + if ( ! isset( $this->current_element ) ) { + return 'html'; + } + + return $this->current_element->token->namespace; + } + /** * Returns the uppercase name of the matched tag. * @@ -4734,6 +5049,28 @@ private function generate_implied_end_tags_thoroughly(): void { } } + /** + * Returns the adjusted current node. + * + * > The adjusted current node is the context element if the parser was created as + * > part of the HTML fragment parsing algorithm and the stack of open elements + * > has only one element in it (fragment case); otherwise, the adjusted current + * > node is the current node. + * + * @see https://html.spec.whatwg.org/#adjusted-current-node + * + * @since 6.7.0 + * + * @return WP_HTML_Token|null The adjusted current node. + */ + private function get_adjusted_current_node(): ?WP_HTML_Token { + if ( isset( $this->context_node ) && 1 === $this->state->stack_of_open_elements->count() ) { + return $this->context_node; + } + + return $this->state->stack_of_open_elements->current_node(); + } + /** * Reconstructs the active formatting elements. * @@ -5043,7 +5380,7 @@ private function run_adoption_agency_algorithm(): void { continue; } - if ( self::is_special( $item->node_name ) ) { + if ( self::is_special( $item ) ) { $furthest_block = $item; break; } @@ -5111,6 +5448,45 @@ private function insert_html_element( WP_HTML_Token $token ): void { $this->state->stack_of_open_elements->push( $token ); } + /** + * Inserts a foreign element on to the stack of open elements. + * + * @since 6.7.0 + * + * @see https://html.spec.whatwg.org/#insert-a-foreign-element + * + * @param WP_HTML_Token $token Insert this token. The token's namespace and + * insertion point will be updated correctly. + * @param bool $only_add_to_element_stack Whether to skip the "insert an element at the adjusted + * insertion location" algorithm when adding this element. + */ + private function insert_foreign_element( WP_HTML_Token $token, bool $only_add_to_element_stack ): void { + $adjusted_current_node = $this->get_adjusted_current_node(); + + $token->namespace = $adjusted_current_node ? $adjusted_current_node->namespace : 'html'; + + if ( $this->is_mathml_integration_point() ) { + $token->integration_node_type = 'math'; + } elseif ( $this->is_html_integration_point() ) { + $token->integration_node_type = 'html'; + } + + if ( false === $only_add_to_element_stack ) { + /* + * @todo Implement the "appropriate place for inserting a node" and the + * "insert an element at the adjusted insertion location" algorithms. + * + * These algorithms mostly impacts DOM tree construction and not the HTML API. + * Here, there's no DOM node onto which the element will be appended, so the + * parser will skip this step. + * + * @see https://html.spec.whatwg.org/#insert-an-element-at-the-adjusted-insertion-location + */ + } + + $this->insert_html_element( $token ); + } + /** * Inserts a virtual element on the stack of open elements. * @@ -5136,6 +5512,88 @@ private function insert_virtual_node( $token_name, $bookmark_name = null ): WP_H * HTML Specification Helpers */ + /** + * Indicates if the current token is a MathML integration point. + * + * @since 6.7.0 + * + * @see https://html.spec.whatwg.org/#mathml-text-integration-point + * + * @return bool Whether the current token is a MathML integration point. + */ + private function is_mathml_integration_point(): bool { + $current_token = $this->state->current_token; + if ( ! isset( $current_token ) ) { + return false; + } + + if ( 'math' !== $current_token->namespace || 'M' !== $current_token->node_name[0] ) { + return false; + } + + $tag_name = $current_token->node_name; + + return ( + 'MI' === $tag_name || + 'MO' === $tag_name || + 'MN' === $tag_name || + 'MS' === $tag_name || + 'MTEXT' === $tag_name + ); + } + + /** + * Indicates if the current token is an HTML integration point. + * + * Note that this method must be an instance method with access + * to the current token, since it needs to examine the attributes + * of the currently-matched tag, if it's in the MathML namespace. + * Otherwise it would be required to scan the HTML and ensure that + * no other accounting is overlooked. + * + * @since 6.7.0 + * + * @see https://html.spec.whatwg.org/#html-integration-point + * + * @return bool Whether the current token is an HTML integration point. + */ + private function is_html_integration_point(): bool { + $current_token = $this->state->current_token; + if ( ! isset( $current_token ) ) { + return false; + } + + if ( 'html' === $current_token->namespace ) { + return false; + } + + $tag_name = $current_token->node_name; + + if ( 'svg' === $current_token->namespace ) { + return ( + 'DESC' === $tag_name || + 'FOREIGNOBJECT' === $tag_name || + 'TITLE' === $tag_name + ); + } + + if ( 'math' === $current_token->namespace ) { + if ( 'ANNOTATION-XML' !== $tag_name ) { + return false; + } + + $encoding = $this->get_attribute( 'encoding' ); + + return ( + is_string( $encoding ) && + ( + 0 === strcasecmp( $encoding, 'application/xhtml+xml' ) || + 0 === strcasecmp( $encoding, 'text/html' ) + ) + ); + } + } + /** * Returns whether an element of a given name is in the HTML special category. * @@ -5143,11 +5601,17 @@ private function insert_virtual_node( $token_name, $bookmark_name = null ): WP_H * * @see https://html.spec.whatwg.org/#special * - * @param string $tag_name Name of element to check. + * @param WP_HTML_Token|string $tag_name Node to check, or only its name if in the HTML namespace. * @return bool Whether the element of the given name is in the special category. */ public static function is_special( $tag_name ): bool { - $tag_name = strtoupper( $tag_name ); + if ( is_string( $tag_name ) ) { + $tag_name = strtoupper( $tag_name ); + } else { + $tag_name = 'html' === $tag_name->namespace + ? strtoupper( $tag_name->node_name ) + : "{$tag_name->namespace} {$tag_name->node_name}"; + } return ( 'ADDRESS' === $tag_name || @@ -5235,17 +5699,17 @@ public static function is_special( $tag_name ): bool { 'XMP' === $tag_name || // MathML. - 'MI' === $tag_name || - 'MO' === $tag_name || - 'MN' === $tag_name || - 'MS' === $tag_name || - 'MTEXT' === $tag_name || - 'ANNOTATION-XML' === $tag_name || + 'math MI' === $tag_name || + 'math MO' === $tag_name || + 'math MN' === $tag_name || + 'math MS' === $tag_name || + 'math MTEXT' === $tag_name || + 'math ANNOTATION-XML' === $tag_name || // SVG. - 'FOREIGNOBJECT' === $tag_name || - 'DESC' === $tag_name || - 'TITLE' === $tag_name + 'svg DESC' === $tag_name || + 'svg FOREIGNOBJECT' === $tag_name || + 'svg TITLE' === $tag_name ); } diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index 0ff2cdc4dd10d..fb21c15d1d96e 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -511,6 +511,23 @@ class WP_HTML_Tag_Processor { */ protected $parser_state = self::STATE_READY; + /** + * Indicates whether the parser is inside foreign content, + * e.g. inside an SVG or MathML element. + * + * One of 'html', 'svg', or 'math'. + * + * Several parsing rules change based on whether the parser + * is inside foreign content, including whether CDATA sections + * are allowed and whether a self-closing flag indicates that + * an element has no content. + * + * @since 6.7.0 + * + * @var string + */ + private $parsing_namespace = 'html'; + /** * What kind of syntax token became an HTML comment. * @@ -780,6 +797,25 @@ public function __construct( $html ) { $this->html = $html; } + /** + * Switches parsing mode into a new namespace, such as when + * encountering an SVG tag and entering foreign content. + * + * @since 6.7.0 + * + * @param string $new_namespace One of 'html', 'svg', or 'math' indicating into what + * namespace the next tokens will be processed. + * @return bool Whether the namespace was valid and changed. + */ + public function change_parsing_namespace( string $new_namespace ): bool { + if ( ! in_array( $new_namespace, array( 'html', 'math', 'svg' ), true ) ) { + return false; + } + + $this->parsing_namespace = $new_namespace; + return true; + } + /** * Finds the next tag matching the $query. * @@ -843,6 +879,7 @@ public function next_tag( $query = null ): bool { * The Tag Processor currently only supports the tag token. * * @since 6.5.0 + * @since 6.7.0 Recognizes CDATA sections within foreign content. * * @return bool Whether a token was parsed. */ @@ -956,6 +993,7 @@ private function base_class_next_token(): bool { */ if ( $this->is_closing_tag || + 'html' !== $this->parsing_namespace || 1 !== strspn( $this->html, 'iIlLnNpPsStTxX', $this->tag_name_starts_at, 1 ) ) { return true; @@ -996,7 +1034,6 @@ private function base_class_next_token(): bool { $duplicate_attributes = $this->duplicate_attributes; // Find the closing tag if necessary. - $found_closer = false; switch ( $tag_name ) { case 'SCRIPT': $found_closer = $this->skip_script_data(); @@ -1759,6 +1796,32 @@ private function parse_next_tag(): bool { return true; } + if ( + 'html' !== $this->parsing_namespace && + strlen( $html ) > $at + 8 && + '[' === $html[ $at + 2 ] && + 'C' === $html[ $at + 3 ] && + 'D' === $html[ $at + 4 ] && + 'A' === $html[ $at + 5 ] && + 'T' === $html[ $at + 6 ] && + 'A' === $html[ $at + 7 ] && + '[' === $html[ $at + 8 ] + ) { + $closer_at = strpos( $html, ']]>', $at + 9 ); + if ( false === $closer_at ) { + $this->parser_state = self::STATE_INCOMPLETE_INPUT; + + return false; + } + + $this->parser_state = self::STATE_CDATA_NODE; + $this->text_starts_at = $at + 9; + $this->text_length = $closer_at - $this->text_starts_at; + $this->token_length = $closer_at + 3 - $this->token_starts_at; + $this->bytes_already_parsed = $closer_at + 3; + return true; + } + /* * Anything else here is an incorrectly-opened comment and transitions * to the bogus comment state - skip to the nearest >. If no closer is @@ -2653,6 +2716,17 @@ public function get_attribute_names_with_prefix( $prefix ): ?array { return $matches; } + /** + * Returns the namespace of the matched token. + * + * @since 6.7.0 + * + * @return string One of 'html', 'math', or 'svg'. + */ + public function get_namespace(): string { + return $this->parsing_namespace; + } + /** * Returns the uppercase name of the matched tag. * @@ -2690,6 +2764,388 @@ public function get_tag(): ?string { return null; } + /** + * Returns the adjusted tag name for a given token, taking into + * account the current parsing context, whether HTML, SVG, or MathML. + * + * @since 6.7.0 + * + * @return string|null Name of current tag name. + */ + public function get_qualified_tag_name(): ?string { + $tag_name = $this->get_tag(); + if ( null === $tag_name ) { + return null; + } + + if ( 'html' === $this->get_namespace() ) { + return $tag_name; + } + + $lower_tag_name = strtolower( $tag_name ); + if ( 'math' === $this->get_namespace() ) { + return $lower_tag_name; + } + + if ( 'svg' === $this->get_namespace() ) { + switch ( $lower_tag_name ) { + case 'altglyph': + return 'altGlyph'; + + case 'altglyphdef': + return 'altGlyphDef'; + + case 'altglyphitem': + return 'altGlyphItem'; + + case 'animatecolor': + return 'animateColor'; + + case 'animatemotion': + return 'animateMotion'; + + case 'animatetransform': + return 'animateTransform'; + + case 'clippath': + return 'clipPath'; + + case 'feblend': + return 'feBlend'; + + case 'fecolormatrix': + return 'feColorMatrix'; + + case 'fecomponenttransfer': + return 'feComponentTransfer'; + + case 'fecomposite': + return 'feComposite'; + + case 'feconvolvematrix': + return 'feConvolveMatrix'; + + case 'fediffuselighting': + return 'feDiffuseLighting'; + + case 'fedisplacementmap': + return 'feDisplacementMap'; + + case 'fedistantlight': + return 'feDistantLight'; + + case 'fedropshadow': + return 'feDropShadow'; + + case 'feflood': + return 'feFlood'; + + case 'fefunca': + return 'feFuncA'; + + case 'fefuncb': + return 'feFuncB'; + + case 'fefuncg': + return 'feFuncG'; + + case 'fefuncr': + return 'feFuncR'; + + case 'fegaussianblur': + return 'feGaussianBlur'; + + case 'feimage': + return 'feImage'; + + case 'femerge': + return 'feMerge'; + + case 'femergenode': + return 'feMergeNode'; + + case 'femorphology': + return 'feMorphology'; + + case 'feoffset': + return 'feOffset'; + + case 'fepointlight': + return 'fePointLight'; + + case 'fespecularlighting': + return 'feSpecularLighting'; + + case 'fespotlight': + return 'feSpotLight'; + + case 'fetile': + return 'feTile'; + + case 'feturbulence': + return 'feTurbulence'; + + case 'foreignobject': + return 'foreignObject'; + + case 'glyphref': + return 'glyphRef'; + + case 'lineargradient': + return 'linearGradient'; + + case 'radialgradient': + return 'radialGradient'; + + case 'textpath': + return 'textPath'; + + default: + return $lower_tag_name; + } + } + } + + /** + * Returns the adjusted attribute name for a given attribute, taking into + * account the current parsing context, whether HTML, SVG, or MathML. + * + * @since 6.7.0 + * + * @param string $attribute_name Which attribute to adjust. + * + * @return string|null + */ + public function get_qualified_attribute_name( $attribute_name ): ?string { + if ( self::STATE_MATCHED_TAG !== $this->parser_state ) { + return null; + } + + $namespace = $this->get_namespace(); + $lower_name = strtolower( $attribute_name ); + + if ( 'math' === $namespace && 'definitionurl' === $lower_name ) { + return 'definitionURL'; + } + + if ( 'svg' === $this->get_namespace() ) { + switch ( $lower_name ) { + case 'attributename': + return 'attributeName'; + + case 'attributetype': + return 'attributeType'; + + case 'basefrequency': + return 'baseFrequency'; + + case 'baseprofile': + return 'baseProfile'; + + case 'calcmode': + return 'calcMode'; + + case 'clippathunits': + return 'clipPathUnits'; + + case 'diffuseconstant': + return 'diffuseConstant'; + + case 'edgemode': + return 'edgeMode'; + + case 'filterunits': + return 'filterUnits'; + + case 'glyphref': + return 'glyphRef'; + + case 'gradienttransform': + return 'gradientTransform'; + + case 'gradientunits': + return 'gradientUnits'; + + case 'kernelmatrix': + return 'kernelMatrix'; + + case 'kernelunitlength': + return 'kernelUnitLength'; + + case 'keypoints': + return 'keyPoints'; + + case 'keysplines': + return 'keySplines'; + + case 'keytimes': + return 'keyTimes'; + + case 'lengthadjust': + return 'lengthAdjust'; + + case 'limitingconeangle': + return 'limitingConeAngle'; + + case 'markerheight': + return 'markerHeight'; + + case 'markerunits': + return 'markerUnits'; + + case 'markerwidth': + return 'markerWidth'; + + case 'maskcontentunits': + return 'maskContentUnits'; + + case 'maskunits': + return 'maskUnits'; + + case 'numoctaves': + return 'numOctaves'; + + case 'pathlength': + return 'pathLength'; + + case 'patterncontentunits': + return 'patternContentUnits'; + + case 'patterntransform': + return 'patternTransform'; + + case 'patternunits': + return 'patternUnits'; + + case 'pointsatx': + return 'pointsAtX'; + + case 'pointsaty': + return 'pointsAtY'; + + case 'pointsatz': + return 'pointsAtZ'; + + case 'preservealpha': + return 'preserveAlpha'; + + case 'preserveaspectratio': + return 'preserveAspectRatio'; + + case 'primitiveunits': + return 'primitiveUnits'; + + case 'refx': + return 'refX'; + + case 'refy': + return 'refY'; + + case 'repeatcount': + return 'repeatCount'; + + case 'repeatdur': + return 'repeatDur'; + + case 'requiredextensions': + return 'requiredExtensions'; + + case 'requiredfeatures': + return 'requiredFeatures'; + + case 'specularconstant': + return 'specularConstant'; + + case 'specularexponent': + return 'specularExponent'; + + case 'spreadmethod': + return 'spreadMethod'; + + case 'startoffset': + return 'startOffset'; + + case 'stddeviation': + return 'stdDeviation'; + + case 'stitchtiles': + return 'stitchTiles'; + + case 'surfacescale': + return 'surfaceScale'; + + case 'systemlanguage': + return 'systemLanguage'; + + case 'tablevalues': + return 'tableValues'; + + case 'targetx': + return 'targetX'; + + case 'targety': + return 'targetY'; + + case 'textlength': + return 'textLength'; + + case 'viewbox': + return 'viewBox'; + + case 'viewtarget': + return 'viewTarget'; + + case 'xchannelselector': + return 'xChannelSelector'; + + case 'ychannelselector': + return 'yChannelSelector'; + + case 'zoomandpan': + return 'zoomAndPan'; + } + } + + if ( 'html' !== $namespace ) { + switch ( $lower_name ) { + case 'xlink:actuate': + return 'xlink actuate'; + + case 'xlink:arcrole': + return 'xlink arcrole'; + + case 'xlink:href': + return 'xlink href'; + + case 'xlink:role': + return 'xlink role'; + + case 'xlink:show': + return 'xlink show'; + + case 'xlink:title': + return 'xlink title'; + + case 'xlink:type': + return 'xlink type'; + + case 'xml:lang': + return 'xml lang'; + + case 'xml:space': + return 'xml space'; + + case 'xmlns': + return 'xmlns'; + + case 'xmlns:xlink': + return 'xmlns xlink'; + } + } + + return $attribute_name; + } + /** * Indicates if the currently matched tag contains the self-closing flag. * @@ -2963,8 +3419,12 @@ public function get_modifiable_text(): string { * In all other contexts it's replaced by the replacement character (U+FFFD) * for security reasons (to avoid joining together strings that were safe * when separated, but not when joined). + * + * @todo Inside HTML integration points and MathML integration points, the + * text is processed according to the insertion mode, not according + * to the foreign content rules. This should strip the NULL bytes. */ - return '#text' === $tag_name + return ( '#text' === $tag_name && 'html' === $this->get_namespace() ) ? str_replace( "\x00", '', $decoded ) : str_replace( "\x00", "\u{FFFD}", $decoded ); } diff --git a/src/wp-includes/html-api/class-wp-html-token.php b/src/wp-includes/html-api/class-wp-html-token.php index 948fe343dfbaa..d5e51ac29007f 100644 --- a/src/wp-includes/html-api/class-wp-html-token.php +++ b/src/wp-includes/html-api/class-wp-html-token.php @@ -60,6 +60,24 @@ class WP_HTML_Token { */ public $has_self_closing_flag = false; + /** + * Indicates if the element is an HTML element or if it's inside foreign content. + * + * @since 6.7.0 + * + * @var string 'html', 'svg', or 'math'. + */ + public $namespace = 'html'; + + /** + * Indicates which kind of integration point the element is, if any. + * + * @since 6.7.0 + * + * @var string|null 'math', 'html', or null if not an integration point. + */ + public $integration_node_type = null; + /** * Called when token is garbage-collected or otherwise destroyed. * @@ -80,6 +98,7 @@ class WP_HTML_Token { */ public function __construct( ?string $bookmark_name, string $node_name, bool $has_self_closing_flag, ?callable $on_destroy = null ) { $this->bookmark_name = $bookmark_name; + $this->namespace = 'html'; $this->node_name = $node_name; $this->has_self_closing_flag = $has_self_closing_flag; $this->on_destroy = $on_destroy; diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor.php b/tests/phpunit/tests/html-api/wpHtmlProcessor.php index 01e0f4f02c0b5..68c60a1ff85cc 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor.php @@ -358,37 +358,6 @@ public static function data_void_tags_not_ignored_in_body() { return $all_void_tags; } - /** - * Ensures that special handling of unsupported tags is cleaned up - * as handling is implemented. Otherwise there's risk of leaving special - * handling (that is never reached) when tag handling is implemented. - * - * @ticket 60092 - * - * @dataProvider data_unsupported_special_in_body_tags - * - * @covers WP_HTML_Processor::step_in_body - * - * @param string $tag_name Name of the tag to test. - */ - public function test_step_in_body_fails_on_unsupported_tags( $tag_name ) { - $fragment = WP_HTML_Processor::create_fragment( '<' . $tag_name . '>' ); - $this->assertFalse( $fragment->next_tag(), 'Should fail to find tag: ' . $tag_name . '.' ); - $this->assertEquals( $fragment->get_last_error(), WP_HTML_Processor::ERROR_UNSUPPORTED, 'Should have unsupported last error.' ); - } - - /** - * Data provider. - * - * @return array[] - */ - public static function data_unsupported_special_in_body_tags() { - return array( - 'MATH' => array( 'MATH' ), - 'SVG' => array( 'SVG' ), - ); - } - /** * Ensures that the HTML Processor properly reports the depth of a given element. * diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php b/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php index 1486769533e96..911fa8b910b37 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php @@ -164,49 +164,6 @@ public static function data_single_tag_of_supported_elements() { return $data; } - /** - * Ensures that no new HTML elements are accidentally partially-supported. - * - * When introducing support for new HTML elements, there are multiple places - * in the HTML Processor that need to be updated, until the time that the class - * has full HTML5 support. Because of this, these tests lock down the interface - * to ensure that support isn't accidentally updated in one place for a new - * element while overlooked in another. - * - * @ticket 58517 - * - * @covers WP_HTML_Processor::step - * - * @dataProvider data_unsupported_elements - * - * @param string $html HTML string containing unsupported elements. - */ - public function test_fails_when_encountering_unsupported_tag( $html ) { - $processor = WP_HTML_Processor::create_fragment( $html ); - - $this->assertFalse( $processor->step(), "Should not have stepped into unsupported {$processor->get_tag()} element." ); - } - - /** - * Data provider. - * - * @return array[] - */ - public static function data_unsupported_elements() { - $unsupported_elements = array( - 'MATH', - 'PLAINTEXT', // Neutralized. - 'SVG', - ); - - $data = array(); - foreach ( $unsupported_elements as $tag_name ) { - $data[ $tag_name ] = array( "<{$tag_name}>" ); - } - - return $data; - } - /** * @ticket 58517 * diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php b/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php index 22eef774d4e90..b6213aac8d0e9 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php @@ -181,19 +181,24 @@ private static function build_tree_representation( ?string $fragment_context, st $is_closer = $processor->is_tag_closer(); if ( $was_text && '#text' !== $token_name ) { - $output .= "{$text_node}\"\n"; + if ( '' !== $text_node ) { + $output .= "{$text_node}\"\n"; + } $was_text = false; $text_node = ''; } switch ( $token_type ) { case '#tag': - $tag_name = strtolower( $token_name ); + $namespace = $processor->get_namespace(); + $tag_name = 'html' === $namespace + ? strtolower( $processor->get_tag() ) + : "{$namespace} {$processor->get_qualified_tag_name()}"; if ( $is_closer ) { --$indent_level; - if ( 'TEMPLATE' === $token_name ) { + if ( 'html' === $namespace && 'TEMPLATE' === $token_name ) { --$indent_level; } @@ -202,7 +207,11 @@ private static function build_tree_representation( ?string $fragment_context, st $tag_indent = $indent_level; - if ( ! WP_HTML_Processor::is_void( $tag_name ) ) { + if ( 'html' !== $namespace ) { + if ( ! $processor->has_self_closing_flag() ) { + ++$indent_level; + } + } elseif ( ! WP_HTML_Processor::is_void( $tag_name ) ) { ++$indent_level; } @@ -210,9 +219,47 @@ private static function build_tree_representation( ?string $fragment_context, st $attribute_names = $processor->get_attribute_names_with_prefix( '' ); if ( $attribute_names ) { - sort( $attribute_names, SORT_STRING ); - + $sorted_attributes = array(); foreach ( $attribute_names as $attribute_name ) { + $sorted_attributes[ $attribute_name ] = $processor->get_qualified_attribute_name( $attribute_name ); + } + + /* + * Sorts attributes to match html5lib sort order. + * + * - First comes normal HTML attributes. + * - Then come adjusted foreign attributes; these have spaces in their names. + * - Finally come non-adjusted foreign attributes; these have a colon in their names. + * + * Example: + * + * From: + * Sorted: 'definitionURL', 'xlink show', 'xlink title', 'xlink:author' + */ + uasort( + $sorted_attributes, + static function ( $a, $b ) { + $a_has_ns = str_contains( $a, ':' ); + $b_has_ns = str_contains( $b, ':' ); + + // Attributes with `:` should follow all other attributes. + if ( $a_has_ns !== $b_has_ns ) { + return $a_has_ns ? 1 : -1; + } + + $a_has_sp = str_contains( $a, ' ' ); + $b_has_sp = str_contains( $b, ' ' ); + + // Attributes with a namespace ' ' should come after those without. + if ( $a_has_sp !== $b_has_sp ) { + return $a_has_sp ? 1 : -1; + } + + return $a <=> $b; + } + ); + + foreach ( $sorted_attributes as $attribute_name => $display_name ) { $val = $processor->get_attribute( $attribute_name ); /* * Attributes with no value are `true` with the HTML API, @@ -221,7 +268,7 @@ private static function build_tree_representation( ?string $fragment_context, st if ( true === $val ) { $val = ''; } - $output .= str_repeat( $indent, $tag_indent + 1 ) . "{$attribute_name}=\"{$val}\"\n"; + $output .= str_repeat( $indent, $tag_indent + 1 ) . "{$display_name}=\"{$val}\"\n"; } } @@ -231,7 +278,7 @@ private static function build_tree_representation( ?string $fragment_context, st $output .= str_repeat( $indent, $indent_level ) . "\"{$modifiable_text}\"\n"; } - if ( 'TEMPLATE' === $token_name ) { + if ( 'html' === $namespace && 'TEMPLATE' === $token_name ) { $output .= str_repeat( $indent, $indent_level ) . "content\n"; ++$indent_level; } @@ -242,12 +289,17 @@ private static function build_tree_representation( ?string $fragment_context, st break; + case '#cdata-section': case '#text': + $text_content = $processor->get_modifiable_text(); + if ( '' === $text_content ) { + break; + } $was_text = true; if ( '' === $text_node ) { $text_node .= str_repeat( $indent, $indent_level ) . '"'; } - $text_node .= $processor->get_modifiable_text(); + $text_node .= $text_content; break; case '#funky-comment': diff --git a/tests/phpunit/tests/html-api/wpHtmlSupportRequiredOpenElements.php b/tests/phpunit/tests/html-api/wpHtmlSupportRequiredOpenElements.php index d2b24cd8bbcbc..e69de29bb2d1d 100644 --- a/tests/phpunit/tests/html-api/wpHtmlSupportRequiredOpenElements.php +++ b/tests/phpunit/tests/html-api/wpHtmlSupportRequiredOpenElements.php @@ -1,219 +0,0 @@ -" ); - - $this->assertFalse( $processor->step(), "Must support terminating elements in specific scope check before adding support for the {$tag_name} element." ); - } - - /** - * The check for whether an element is in a scope depends on - * looking for a number of terminating elements in the stack of open - * elements. Until the listed elements are supported in the HTML - * processor, there are no terminating elements and there's no - * point in taking the time to look for them. - * - * @since 6.4.0 - * - * @ticket 58517 - */ - public function test_has_element_in_scope_needs_support() { - // MathML Elements: MI, MO, MN, MS, MTEXT, ANNOTATION-XML. - $this->ensure_support_is_added_everywhere( 'MATH' ); - - /* - * SVG elements: note that TITLE is both an HTML element and an SVG element - * so care must be taken when adding support for either one. - * - * FOREIGNOBJECT, DESC, TITLE. - */ - $this->ensure_support_is_added_everywhere( 'SVG' ); - } - - /** - * The check for whether an element is in list item scope depends on - * the elements for any scope, plus UL and OL. - * - * The method for asserting list item scope doesn't currently exist - * because the LI element isn't yet supported and the LI element is - * the only element that needs to know about list item scope. - * - * @since 6.4.0 - * - * @ticket 58517 - * - * @covers WP_HTML_Open_Elements::has_element_in_list_item_scope - */ - public function test_has_element_in_list_item_scope_needs_support() { - // MathML Elements: MI, MO, MN, MS, MTEXT, ANNOTATION-XML. - $this->ensure_support_is_added_everywhere( 'MATH' ); - - /* - * SVG elements: note that TITLE is both an HTML element and an SVG element - * so care must be taken when adding support for either one. - * - * FOREIGNOBJECT, DESC, TITLE. - */ - $this->ensure_support_is_added_everywhere( 'SVG' ); - } - - /** - * The check for whether an element is in BUTTON scope depends on - * the elements for any scope, plus BUTTON. - * - * @since 6.4.0 - * - * @ticket 58517 - * - * @covers WP_HTML_Open_Elements::has_element_in_button_scope - */ - public function test_has_element_in_button_scope_needs_support() { - // MathML Elements: MI, MO, MN, MS, MTEXT, ANNOTATION-XML. - $this->ensure_support_is_added_everywhere( 'MATH' ); - - /* - * SVG elements: note that TITLE is both an HTML element and an SVG element - * so care must be taken when adding support for either one. - * - * FOREIGNOBJECT, DESC, TITLE. - */ - $this->ensure_support_is_added_everywhere( 'SVG' ); - } - - /** - * The optimization maintaining a flag for "P is in BUTTON scope" requires - * updating that flag every time an element is popped from the stack of - * open elements. - * - * @since 6.4.0 - * - * @ticket 58517 - * - * @covers WP_HTML_Open_Elements::after_element_pop - */ - public function test_after_element_pop_must_maintain_p_in_button_scope_flag() { - // MathML Elements: MI, MO, MN, MS, MTEXT, ANNOTATION-XML. - $this->ensure_support_is_added_everywhere( 'MATH' ); - - /* - * SVG elements: note that TITLE is both an HTML element and an SVG element - * so care must be taken when adding support for either one. - * - * FOREIGNOBJECT, DESC, TITLE. - */ - $this->ensure_support_is_added_everywhere( 'SVG' ); - } - - /** - * The optimization maintaining a flag for "P is in BUTTON scope" requires - * updating that flag every time an element is pushed onto the stack of - * open elements. - * - * @since 6.4.0 - * - * @ticket 58517 - * - * @covers WP_HTML_Open_Elements::after_element_push - */ - public function test_after_element_push_must_maintain_p_in_button_scope_flag() { - // MathML Elements: MI, MO, MN, MS, MTEXT, ANNOTATION-XML. - $this->ensure_support_is_added_everywhere( 'MATH' ); - - /* - * SVG elements: note that TITLE is both an HTML element and an SVG element - * so care must be taken when adding support for either one. - * - * FOREIGNOBJECT, DESC, TITLE. - */ - $this->ensure_support_is_added_everywhere( 'SVG' ); - } - - /** - * The check for whether an element is in TABLE scope depends on - * the HTML, TABLE, and TEMPLATE elements. - * - * @since 6.4.0 - * - * @ticket 58517 - * - * @covers WP_HTML_Open_Elements::has_element_in_table_scope - */ - public function test_has_element_in_table_scope_needs_support() { - // MathML Elements: MI, MO, MN, MS, MTEXT, ANNOTATION-XML. - $this->ensure_support_is_added_everywhere( 'MATH' ); - - /* - * SVG elements: note that TITLE is both an HTML element and an SVG element - * so care must be taken when adding support for either one. - * - * FOREIGNOBJECT, DESC, TITLE. - */ - $this->ensure_support_is_added_everywhere( 'SVG' ); - } - - /** - * The check for whether an element is in SELECT scope depends on - * the OPTGROUP and OPTION elements. - * - * @since 6.4.0 - * - * @ticket 58517 - * - * @covers WP_HTML_Open_Elements::has_element_in_select_scope - */ - public function test_has_element_in_select_scope_needs_support() { - // MathML Elements: MI, MO, MN, MS, MTEXT, ANNOTATION-XML. - $this->ensure_support_is_added_everywhere( 'MATH' ); - - /* - * SVG elements: note that TITLE is both an HTML element and an SVG element - * so care must be taken when adding support for either one. - * - * FOREIGNOBJECT, DESC, TITLE. - */ - $this->ensure_support_is_added_everywhere( 'SVG' ); - } -} diff --git a/tests/phpunit/tests/html-api/wpHtmlTagProcessor-token-scanning.php b/tests/phpunit/tests/html-api/wpHtmlTagProcessor-token-scanning.php index fbb2521233679..e8195dcfa28c6 100644 --- a/tests/phpunit/tests/html-api/wpHtmlTagProcessor-token-scanning.php +++ b/tests/phpunit/tests/html-api/wpHtmlTagProcessor-token-scanning.php @@ -512,6 +512,67 @@ public function test_basic_assertion_abruptly_closed_cdata_section() { ); } + /** + * Ensures that basic CDATA sections inside foreign content are detected. + * + * @ticket 61576 + */ + public function test_basic_cdata_in_foreign_content() { + $processor = new WP_HTML_Tag_Processor( 'this is >&gt; real CDATA' ); + $processor->next_token(); + + // Artificially change namespace; this should be done in the HTML Processor. + $processor->change_parsing_namespace( 'svg' ); + $processor->next_token(); + + $this->assertSame( + '#cdata-section', + $processor->get_token_name(), + "Should have found a CDATA section but found {$processor->get_token_name()} instead." + ); + + $this->assertNull( + $processor->get_tag(), + 'Should not have been able to query tag name on non-element token.' + ); + + $this->assertNull( + $processor->get_attribute( 'type' ), + 'Should not have been able to query attributes on non-element token.' + ); + + $this->assertSame( + 'this is >> real CDATA', + $processor->get_modifiable_text(), + 'Found incorrect modifiable text.' + ); + } + + /** + * Ensures that empty CDATA sections inside foreign content are detected. + * + * @ticket 61576 + */ + public function test_empty_cdata_in_foreign_content() { + $processor = new WP_HTML_Tag_Processor( '' ); + $processor->next_token(); + + // Artificially change namespace; this should be done in the HTML Processor. + $processor->change_parsing_namespace( 'svg' ); + $processor->next_token(); + + $this->assertSame( + '#cdata-section', + $processor->get_token_name(), + "Should have found a CDATA section but found {$processor->get_token_name()} instead." + ); + + $this->assertEmpty( + $processor->get_modifiable_text(), + 'Found non-empty modifiable text.' + ); + } + /** * Ensures that normative Processing Instruction nodes are properly parsed. * From fe9aa7c48d4c72bda6c55273015bd01cd24aa603 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Thu, 8 Aug 2024 07:31:15 +0000 Subject: [PATCH 06/10] HTML API: Add support for SVG and MathML (Foreign content) (remove file) As part of work to add more spec support to the HTML API, this patch adds support for SVG and MathML elements, or more generally, "foreign content." The rules in foreign content are a mix of XML and HTML parsing rules and introduce additional complexity into the processor, but is important in order to avoid getting lost when inside these elements. This patch follows the first by deleting the empty files, which were mistakenly left in during the initial merge. Developed in https://github.com/wordpress/wordpress-develop/pull/6006 Discussed in https://core.trac.wordpress.org/ticket/61576 Follow-up to [58867]. Props: dmsnell, jonsurrell, westonruter. See #61576. git-svn-id: https://develop.svn.wordpress.org/trunk@58868 602fd350-edb4-49c9-b593-d223f7449a82 --- .../phpunit/tests/html-api/wpHtmlSupportRequiredOpenElements.php | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 tests/phpunit/tests/html-api/wpHtmlSupportRequiredOpenElements.php diff --git a/tests/phpunit/tests/html-api/wpHtmlSupportRequiredOpenElements.php b/tests/phpunit/tests/html-api/wpHtmlSupportRequiredOpenElements.php deleted file mode 100644 index e69de29bb2d1d..0000000000000 From b9014d69e3b34db9e4ec792fd48651241ae4a4f9 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Thu, 8 Aug 2024 16:13:25 +0000 Subject: [PATCH 07/10] HTML API: `expect_closer()` should report false for self-closing foreign elements. Previously, `WP_HTML_Processor::expects_closer()` would report `true` for self-closing foreign elements when called without supplying a node in question, but it should have been reporting `true` just as it does for HTML elements. This patch adds a test case demonstrating the issue and a bugfix. The `html5lib` test runner was relying on the incorrect behavior, accidentally working. This is also corrected and the `html5lib` test now relies on the correct behavior of `expects_closer()`. Developed in https://github.com/wordpress/wordpress-develop/pull/7162 Discussed in https://core.trac.wordpress.org/ticket/61576 Follow-up to [58868]. Props: dmsnell. See #61576. git-svn-id: https://develop.svn.wordpress.org/trunk@58870 602fd350-edb4-49c9-b593-d223f7449a82 --- .../html-api/class-wp-html-processor.php | 8 +++++--- .../html-api/class-wp-html-tag-processor.php | 2 +- .../phpunit/tests/html-api/wpHtmlProcessor.php | 18 ++++++++++++++++++ .../tests/html-api/wpHtmlProcessorHtml5lib.php | 12 ++---------- 4 files changed, 26 insertions(+), 14 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 3820fe027723d..415ff23eea95f 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -786,13 +786,15 @@ public function matches_breadcrumbs( $breadcrumbs ): bool { * or `null` if not matched on any token. */ public function expects_closer( WP_HTML_Token $node = null ): ?bool { - $token_name = $node->node_name ?? $this->get_token_name(); - $token_namespace = $node->namespace ?? $this->get_namespace(); + $token_name = $node->node_name ?? $this->get_token_name(); if ( ! isset( $token_name ) ) { return null; } + $token_namespace = $node->namespace ?? $this->get_namespace(); + $token_has_self_closing = $node->has_self_closing_flag ?? $this->has_self_closing_flag(); + return ! ( // Comments, text nodes, and other atomic tokens. '#' === $token_name[0] || @@ -803,7 +805,7 @@ public function expects_closer( WP_HTML_Token $node = null ): ?bool { // Special atomic elements. ( 'html' === $token_namespace && in_array( $token_name, array( 'IFRAME', 'NOEMBED', 'NOFRAMES', 'SCRIPT', 'STYLE', 'TEXTAREA', 'TITLE', 'XMP' ), true ) ) || // Self-closing elements in foreign content. - ( isset( $node ) && 'html' !== $node->namespace && $node->has_self_closing_flag ) + ( 'html' !== $token_namespace && $token_has_self_closing ) ); } diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index fb21c15d1d96e..95216b08a1988 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -2921,7 +2921,7 @@ public function get_qualified_attribute_name( $attribute_name ): ?string { return null; } - $namespace = $this->get_namespace(); + $namespace = $this->get_namespace(); $lower_name = strtolower( $attribute_name ); if ( 'math' === $namespace && 'definitionurl' === $lower_name ) { diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor.php b/tests/phpunit/tests/html-api/wpHtmlProcessor.php index 68c60a1ff85cc..2b56cefedcd9a 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor.php @@ -503,4 +503,22 @@ public function __construct( $html ) { $subclass_processor = call_user_func( array( get_class( $subclass_instance ), 'create_fragment' ), '' ); $this->assertInstanceOf( get_class( $subclass_instance ), $subclass_processor, '::create_fragment did not return subclass instance.' ); } + + /** + * Ensures that self-closing elements in foreign content properly report + * that they expect no closer. + * + * @ticket 61576 + */ + public function test_expects_closer_foreign_content_self_closing() { + $processor = WP_HTML_Processor::create_fragment( '' ); + + $this->assertTrue( $processor->next_tag() ); + $this->assertSame( 'SVG', $processor->get_tag() ); + $this->assertFalse( $processor->expects_closer() ); + + $this->assertTrue( $processor->next_tag() ); + $this->assertSame( 'MATH', $processor->get_tag() ); + $this->assertTrue( $processor->expects_closer() ); + } } diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php b/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php index b6213aac8d0e9..4de4ebd1cd5c4 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php @@ -207,11 +207,7 @@ private static function build_tree_representation( ?string $fragment_context, st $tag_indent = $indent_level; - if ( 'html' !== $namespace ) { - if ( ! $processor->has_self_closing_flag() ) { - ++$indent_level; - } - } elseif ( ! WP_HTML_Processor::is_void( $tag_name ) ) { + if ( $processor->expects_closer() ) { ++$indent_level; } @@ -275,7 +271,7 @@ static function ( $a, $b ) { // Self-contained tags contain their inner contents as modifiable text. $modifiable_text = $processor->get_modifiable_text(); if ( '' !== $modifiable_text ) { - $output .= str_repeat( $indent, $indent_level ) . "\"{$modifiable_text}\"\n"; + $output .= str_repeat( $indent, $tag_indent + 1 ) . "\"{$modifiable_text}\"\n"; } if ( 'html' === $namespace && 'TEMPLATE' === $token_name ) { @@ -283,10 +279,6 @@ static function ( $a, $b ) { ++$indent_level; } - if ( ! $processor->is_void( $tag_name ) && ! $processor->expects_closer() ) { - --$indent_level; - } - break; case '#cdata-section': From 7ceb8394c75d52c9c263b0cf201527fda8935bf8 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Thu, 8 Aug 2024 17:02:46 +0000 Subject: [PATCH 08/10] HTML API: Test and fix SVG script handling. When support was added for foreign content, an ambiguity in the HTML specification led to code that followed the wrong path when encountering a self-closing SCRIPT element in the SVG namespace. Further, a fallthrough was discovered during manual testing. This patch adds a new test to assert the proper behaviors and fixes these issues. In the case of the SCRIPT element, the outcome was the same with the wrong code path, making the defect benign. In the case of the fallthrough, the wrong behavior would occur. The updates in this patch also resolves a todo relating to the spec ambiguity. Developed in https://github.com/wordpress/wordpress-develop/pull/7164 Discussed in https://core.trac.wordpress.org/ticket/61576 Follow-up to [58868]. Props: dmsnell, jonsurrell. See #61576. git-svn-id: https://develop.svn.wordpress.org/trunk@58871 602fd350-edb4-49c9-b593-d223f7449a82 --- .../html-api/class-wp-html-processor.php | 29 ++++++++++--------- .../tests/html-api/wpHtmlProcessor.php | 10 +++++++ 2 files changed, 25 insertions(+), 14 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 415ff23eea95f..2bb6302c99781 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -4239,21 +4239,22 @@ private function step_in_foreign_content(): bool { /* * > If the token has its self-closing flag set, then run * > the appropriate steps from the following list: + * > + * > ↪ the token's tag name is "script", and the new current node is in the SVG namespace + * > Acknowledge the token's self-closing flag, and then act as + * > described in the steps for a "script" end tag below. + * > + * > ↪ Otherwise + * > Pop the current node off the stack of open elements and + * > acknowledge the token's self-closing flag. + * + * Since the rules for SCRIPT below indicate to pop the element off of the stack of + * open elements, which is the same for the Otherwise condition, there's no need to + * separate these checks. The difference comes when a parser operates with the scripting + * flag enabled, and executes the script, which this parser does not support. */ if ( $this->state->current_token->has_self_closing_flag ) { - if ( 'SCRIPT' === $this->state->current_token->node_name && 'svg' === $this->state->current_token->namespace ) { - /* - * > Acknowledge the token's self-closing flag, and then act as - * > described in the steps for a "script" end tag below. - * - * @todo Verify that this shouldn't be handled by the rule for - * "An end tag whose name is 'script', if the current node - * is an SVG script element." - */ - goto in_foreign_content_any_other_end_tag; - } else { - $this->state->stack_of_open_elements->pop(); - } + $this->state->stack_of_open_elements->pop(); } return true; } @@ -4263,13 +4264,13 @@ private function step_in_foreign_content(): bool { */ if ( $this->is_tag_closer() && 'SCRIPT' === $this->state->current_token->node_name && 'svg' === $this->state->current_token->namespace ) { $this->state->stack_of_open_elements->pop(); + return true; } /* * > Any other end tag */ if ( $this->is_tag_closer() ) { - in_foreign_content_any_other_end_tag: $node = $this->state->stack_of_open_elements->current_node(); if ( $tag_name !== $node->node_name ) { // @todo Indicate a parse error once it's possible. diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor.php b/tests/phpunit/tests/html-api/wpHtmlProcessor.php index 2b56cefedcd9a..0b7d72bdbee9a 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor.php @@ -521,4 +521,14 @@ public function test_expects_closer_foreign_content_self_closing() { $this->assertSame( 'MATH', $processor->get_tag() ); $this->assertTrue( $processor->expects_closer() ); } + + /** + * Ensures that self-closing foreign SCRIPT elements are properly found. + * + * @ticket 61576 + */ + public function test_foreign_content_script_self_closing() { + $processor = WP_HTML_Processor::create_fragment( '' ); + $this->assertTrue( $processor->next_tag( 'script' ) ); + } } From ec58d38eef3f4387177e4bfca90df5b3a34f96be Mon Sep 17 00:00:00 2001 From: Jonathan Desrosiers Date: Thu, 8 Aug 2024 19:35:24 +0000 Subject: [PATCH 09/10] External Libraries: Update the Backbone.js library to version `1.6.0`. This updates the `backbone` library from version `1.5.0` to `1.6.0`. This is a minor bug fix release. The full list of changes can be found in the Backbone.js change log: https://backbonejs.org/#changelog. Props manooweb mardroid. Fixes #60512. git-svn-id: https://develop.svn.wordpress.org/trunk@58872 602fd350-edb4-49c9-b593-d223f7449a82 --- package-lock.json | 14 +++++++------- package.json | 2 +- src/wp-includes/script-loader.php | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/package-lock.json b/package-lock.json index c5c985d2137f4..af97de8dc6244 100644 --- a/package-lock.json +++ b/package-lock.json @@ -75,7 +75,7 @@ "@wordpress/warning": "3.0.1", "@wordpress/widgets": "4.0.6", "@wordpress/wordcount": "4.0.1", - "backbone": "1.5.0", + "backbone": "1.6.0", "clipboard": "2.0.11", "core-js-url-browser": "3.6.4", "element-closest": "^3.0.2", @@ -10067,9 +10067,9 @@ "dev": true }, "node_modules/backbone": { - "version": "1.5.0", - "resolved": "https://registry.npmjs.org/backbone/-/backbone-1.5.0.tgz", - "integrity": "sha512-RPKlstw5NW+rD2X4PnEnvgLhslRnXOugXw2iBloHkPMgOxvakP1/A+tZIGM3qCm8uvZeEf8zMm0uvcK1JwL+IA==", + "version": "1.6.0", + "resolved": "https://registry.npmjs.org/backbone/-/backbone-1.6.0.tgz", + "integrity": "sha512-13PUjmsgw/49EowNcQvfG4gmczz1ximTMhUktj0Jfrjth0MVaTxehpU+qYYX4MxnuIuhmvBLC6/ayxuAGnOhbA==", "dependencies": { "underscore": ">=1.8.3" } @@ -41569,9 +41569,9 @@ } }, "backbone": { - "version": "1.5.0", - "resolved": "https://registry.npmjs.org/backbone/-/backbone-1.5.0.tgz", - "integrity": "sha512-RPKlstw5NW+rD2X4PnEnvgLhslRnXOugXw2iBloHkPMgOxvakP1/A+tZIGM3qCm8uvZeEf8zMm0uvcK1JwL+IA==", + "version": "1.6.0", + "resolved": "https://registry.npmjs.org/backbone/-/backbone-1.6.0.tgz", + "integrity": "sha512-13PUjmsgw/49EowNcQvfG4gmczz1ximTMhUktj0Jfrjth0MVaTxehpU+qYYX4MxnuIuhmvBLC6/ayxuAGnOhbA==", "requires": { "underscore": ">=1.8.3" } diff --git a/package.json b/package.json index 7c55cb8bc1278..eedd555935c36 100644 --- a/package.json +++ b/package.json @@ -144,7 +144,7 @@ "@wordpress/warning": "3.0.1", "@wordpress/widgets": "4.0.6", "@wordpress/wordcount": "4.0.1", - "backbone": "1.5.0", + "backbone": "1.6.0", "clipboard": "2.0.11", "core-js-url-browser": "3.6.4", "element-closest": "^3.0.2", diff --git a/src/wp-includes/script-loader.php b/src/wp-includes/script-loader.php index 24a35a291911e..2852dc2431760 100644 --- a/src/wp-includes/script-loader.php +++ b/src/wp-includes/script-loader.php @@ -1040,7 +1040,7 @@ function wp_default_scripts( $scripts ) { did_action( 'init' ) && $scripts->add_data( 'json2', 'conditional', 'lt IE 8' ); $scripts->add( 'underscore', "/wp-includes/js/underscore$dev_suffix.js", array(), '1.13.4', 1 ); - $scripts->add( 'backbone', "/wp-includes/js/backbone$dev_suffix.js", array( 'underscore', 'jquery' ), '1.5.0', 1 ); + $scripts->add( 'backbone', "/wp-includes/js/backbone$dev_suffix.js", array( 'underscore', 'jquery' ), '1.6.0', 1 ); $scripts->add( 'wp-util', "/wp-includes/js/wp-util$suffix.js", array( 'underscore', 'jquery' ), false, 1 ); did_action( 'init' ) && $scripts->localize( From 9f09c574c0ac64041c81760f10646994d21ce49d Mon Sep 17 00:00:00 2001 From: Sergey Biryukov Date: Fri, 9 Aug 2024 00:16:33 +0000 Subject: [PATCH 10/10] Docs: Remove unsupported values in `plugins_api()` DocBlocks. The `group` field and the `hot_categories` action were never actually implemented on the WordPress.org side. Follow-up to [34596], [meta3227]. Props lopo, milana_cap. See #55645. git-svn-id: https://develop.svn.wordpress.org/trunk@58873 602fd350-edb4-49c9-b593-d223f7449a82 --- src/wp-admin/includes/plugin-install.php | 38 +++++++++++------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/src/wp-admin/includes/plugin-install.php b/src/wp-admin/includes/plugin-install.php index 38c4b50e7d049..04287736bc661 100644 --- a/src/wp-admin/includes/plugin-install.php +++ b/src/wp-admin/includes/plugin-install.php @@ -20,34 +20,33 @@ * * The second filter, {@see 'plugins_api'}, allows a plugin to override the WordPress.org * Plugin Installation API entirely. If `$action` is 'query_plugins' or 'plugin_information', - * an object MUST be passed. If `$action` is 'hot_tags' or 'hot_categories', an array MUST - * be passed. + * an object MUST be passed. If `$action` is 'hot_tags', an array MUST be passed. * * Finally, the third filter, {@see 'plugins_api_result'}, makes it possible to filter the * response object or array, depending on the `$action` type. * * Supported arguments per action: * - * | Argument Name | query_plugins | plugin_information | hot_tags | hot_categories | - * | -------------------- | :-----------: | :----------------: | :------: | :------------: | - * | `$slug` | No | Yes | No | No | - * | `$per_page` | Yes | No | No | No | - * | `$page` | Yes | No | No | No | - * | `$number` | No | No | Yes | Yes | - * | `$search` | Yes | No | No | No | - * | `$tag` | Yes | No | No | No | - * | `$author` | Yes | No | No | No | - * | `$user` | Yes | No | No | No | - * | `$browse` | Yes | No | No | No | - * | `$locale` | Yes | Yes | No | No | - * | `$installed_plugins` | Yes | No | No | No | - * | `$is_ssl` | Yes | Yes | No | No | - * | `$fields` | Yes | Yes | No | No | + * | Argument Name | query_plugins | plugin_information | hot_tags | + * | -------------------- | :-----------: | :----------------: | :------: | + * | `$slug` | No | Yes | No | + * | `$per_page` | Yes | No | No | + * | `$page` | Yes | No | No | + * | `$number` | No | No | Yes | + * | `$search` | Yes | No | No | + * | `$tag` | Yes | No | No | + * | `$author` | Yes | No | No | + * | `$user` | Yes | No | No | + * | `$browse` | Yes | No | No | + * | `$locale` | Yes | Yes | No | + * | `$installed_plugins` | Yes | No | No | + * | `$is_ssl` | Yes | Yes | No | + * | `$fields` | Yes | Yes | No | * * @since 2.7.0 * * @param string $action API action to perform: 'query_plugins', 'plugin_information', - * 'hot_tags' or 'hot_categories'. + * or 'hot_tags'. * @param array|object $args { * Optional. Array or object of arguments to serialize for the Plugin Info API. * @@ -91,7 +90,6 @@ * @type bool $banners Whether to return the banner images links. Default false. * @type bool $icons Whether to return the icon links. Default false. * @type bool $active_installs Whether to return the number of active installations. Default false. - * @type bool $group Whether to return the assigned group. Default false. * @type bool $contributors Whether to return the list of contributors. Default false. * } * } @@ -136,7 +134,7 @@ function plugins_api( $action, $args = array() ) { * Returning a non-false value will effectively short-circuit the WordPress.org API request. * * If `$action` is 'query_plugins' or 'plugin_information', an object MUST be passed. - * If `$action` is 'hot_tags' or 'hot_categories', an array should be passed. + * If `$action` is 'hot_tags', an array should be passed. * * @since 2.7.0 *