Skip to content

Commit

Permalink
Move theme support and dom utils to new dom document
Browse files Browse the repository at this point in the history
  • Loading branch information
schlessera committed Nov 27, 2019
1 parent 0c07c51 commit 7513e41
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 78 deletions.
83 changes: 35 additions & 48 deletions includes/class-amp-theme-support.php
Original file line number Diff line number Diff line change
Expand Up @@ -1536,10 +1536,10 @@ public static function filter_admin_bar_script_loader_tag( $tag, $handle ) {
* @todo All of this might be better placed inside of a sanitizer.
* @todo Consider removing any scripts that are not among the $script_handles.
*
* @param DOMDocument $dom Document.
* @param string[] $script_handles AMP script handles for components identified during output buffering.
* @param AMP_DOM_Document $dom Document.
* @param string[] $script_handles AMP script handles for components identified during output buffering.
*/
public static function ensure_required_markup( DOMDocument $dom, $script_handles = [] ) {
public static function ensure_required_markup( AMP_DOM_Document $dom, $script_handles = [] ) {
/**
* Elements.
*
Expand All @@ -1550,18 +1550,14 @@ public static function ensure_required_markup( DOMDocument $dom, $script_handles
* @var DOMElement $noscript
*/

$xpath = new DOMXPath( $dom );

$head = $dom->getElementsByTagName( 'head' )->item( 0 );

// Ensure there is a schema.org script in the document.
// @todo Consider applying the amp_schemaorg_metadata filter on the contents when a script is already present.
$schema_org_meta_script = $xpath->query( '//script[ @type = "application/ld+json" ][ contains( ./text(), "schema.org" ) ]' )->item( 0 );
$schema_org_meta_script = $dom->xpath->query( '//script[ @type = "application/ld+json" ][ contains( ./text(), "schema.org" ) ]' )->item( 0 );
if ( ! $schema_org_meta_script ) {
$script = $dom->createElement( 'script' );
$script->setAttribute( 'type', 'application/ld+json' );
$script->appendChild( $dom->createTextNode( wp_json_encode( amp_get_schemaorg_metadata(), JSON_UNESCAPED_UNICODE ) ) );
$head->appendChild( $script );
$dom->head->appendChild( $script );
}

// Gather all links.
Expand All @@ -1578,7 +1574,7 @@ public static function ensure_required_markup( DOMDocument $dom, $script_handles
),
],
];
$link_elements = $head->getElementsByTagName( 'link' );
$link_elements = $dom->head->getElementsByTagName( 'link' );
foreach ( $link_elements as $link ) {
if ( $link->hasAttribute( 'rel' ) ) {
$links[ $link->getAttribute( 'rel' ) ][] = $link;
Expand All @@ -1596,18 +1592,18 @@ public static function ensure_required_markup( DOMDocument $dom, $script_handles
'href' => self::get_current_canonical_url(),
]
);
$head->appendChild( $rel_canonical );
$dom->head->appendChild( $rel_canonical );
}

// Store the last meta tag as the previous node to append to.
$meta_tags = $head->getElementsByTagName( 'meta' );
$previous_node = $meta_tags->length > 0 ? $meta_tags->item( $meta_tags->length - 1 ) : $head->firstChild;
$meta_tags = $dom->head->getElementsByTagName( 'meta' );
$previous_node = $meta_tags->length > 0 ? $meta_tags->item( $meta_tags->length - 1 ) : $dom->head->firstChild;

// Handle the title.
$title = $head->getElementsByTagName( 'title' )->item( 0 );
$title = $dom->head->getElementsByTagName( 'title' )->item( 0 );
if ( $title ) {
$title->parentNode->removeChild( $title ); // So we can move it.
$head->insertBefore( $title, $previous_node->nextSibling );
$dom->head->insertBefore( $title, $previous_node->nextSibling );
$previous_node = $title;
}

Expand All @@ -1623,7 +1619,7 @@ public static function ensure_required_markup( DOMDocument $dom, $script_handles
$ordered_scripts = [];
$head_scripts = [];
$runtime_src = wp_scripts()->registered['amp-runtime']->src;
foreach ( $head->getElementsByTagName( 'script' ) as $script ) { // Note that prepare_response() already moved body scripts to head.
foreach ( $dom->head->getElementsByTagName( 'script' ) as $script ) { // Note that prepare_response() already moved body scripts to head.
$head_scripts[] = $script;
}
foreach ( $head_scripts as $script ) {
Expand Down Expand Up @@ -1719,7 +1715,7 @@ public static function ensure_required_markup( DOMDocument $dom, $script_handles
if ( $link->parentNode ) {
$link->parentNode->removeChild( $link ); // So we can move it.
}
$head->insertBefore( $link, $previous_node->nextSibling );
$dom->head->insertBefore( $link, $previous_node->nextSibling );
$previous_node = $link;
}
}
Expand Down Expand Up @@ -1753,25 +1749,25 @@ public static function ensure_required_markup( DOMDocument $dom, $script_handles
*/
$ordered_scripts = array_merge( $ordered_scripts, $amp_scripts );
foreach ( $ordered_scripts as $ordered_script ) {
$head->insertBefore( $ordered_script, $previous_node->nextSibling );
$dom->insertBefore( $ordered_script, $previous_node->nextSibling );
$previous_node = $ordered_script;
}

/*
* "8. Specify any custom styles by using the <style amp-custom> tag."
*/
$style = $xpath->query( './style[ @amp-custom ]', $head )->item( 0 );
$style = $dom->xpath->query( './style[ @amp-custom ]', $dom->head )->item( 0 );
if ( $style ) {
// Ensure the CSS manifest comment remains before style[amp-custom].
if ( $style->previousSibling instanceof DOMComment ) {
$comment = $style->previousSibling;
$comment->parentNode->removeChild( $comment );
$head->insertBefore( $comment, $previous_node->nextSibling );
$dom->insertBefore( $comment, $previous_node->nextSibling );
$previous_node = $comment;
}

$style->parentNode->removeChild( $style );
$head->insertBefore( $style, $previous_node->nextSibling );
$dom->insertBefore( $style, $previous_node->nextSibling );
$previous_node = $style;
}

Expand All @@ -1784,17 +1780,17 @@ public static function ensure_required_markup( DOMDocument $dom, $script_handles
* "10. Finally, specify the AMP boilerplate code. By putting the boilerplate code last, it prevents custom styles
* from accidentally overriding the boilerplate css rules."
*/
$style = $xpath->query( './style[ @amp-boilerplate ]', $head )->item( 0 );
$style = $dom->xpath->query( './style[ @amp-boilerplate ]', $dom->head )->item( 0 );
if ( ! $style ) {
$style = $dom->createElement( 'style' );
$style->setAttribute( 'amp-boilerplate', '' );
$style->appendChild( $dom->createTextNode( amp_get_boilerplate_stylesheets()[0] ) );
} else {
$style->parentNode->removeChild( $style ); // So we can move it.
}
$head->appendChild( $style );
$dom->head->appendChild( $style );

$noscript = $xpath->query( './noscript[ style[ @amp-boilerplate ] ]', $head )->item( 0 );
$noscript = $dom->xpath->query( './noscript[ style[ @amp-boilerplate ] ]', $dom->head )->item( 0 );
if ( ! $noscript ) {
$noscript = $dom->createElement( 'noscript' );
$style = $dom->createElement( 'style' );
Expand All @@ -1804,7 +1800,7 @@ public static function ensure_required_markup( DOMDocument $dom, $script_handles
} else {
$noscript->parentNode->removeChild( $noscript ); // So we can move it.
}
$head->appendChild( $noscript );
$dom->head->appendChild( $noscript );

unset( $previous_node );
}
Expand Down Expand Up @@ -2154,33 +2150,26 @@ public static function prepare_response( $response, $args = [] ) {
);
}

$dom = AMP_DOM_Utils::get_dom( $response );
$xpath = new DOMXPath( $dom );
$head = $dom->getElementsByTagName( 'head' )->item( 0 );
$dom = AMP_DOM_Utils::get_dom( $response );

// Move anything after </html>, such as Query Monitor output added at shutdown, to be moved before </body>.
$body = $dom->getElementsByTagName( 'body' )->item( 0 );
if ( $body ) {
while ( $dom->documentElement->nextSibling ) {
// Trailing elements after </html> will get wrapped in additional <html> elements.
if ( 'html' === $dom->documentElement->nextSibling->nodeName ) {
while ( $dom->documentElement->nextSibling->firstChild ) {
$body->appendChild( $dom->documentElement->nextSibling->firstChild );
}
$dom->removeChild( $dom->documentElement->nextSibling );
} else {
$body->appendChild( $dom->documentElement->nextSibling );
while ( $dom->documentElement->nextSibling ) {
// Trailing elements after </html> will get wrapped in additional <html> elements.
if ( 'html' === $dom->documentElement->nextSibling->nodeName ) {
while ( $dom->documentElement->nextSibling->firstChild ) {
$dom->body->appendChild( $dom->documentElement->nextSibling->firstChild );
}
$dom->removeChild( $dom->documentElement->nextSibling );
} else {
$dom->body->appendChild( $dom->documentElement->nextSibling );
}
}

AMP_HTTP::send_server_timing( 'amp_dom_parse', -$dom_parse_start, 'AMP DOM Parse' );

// Make sure scripts from the body get moved to the head.
if ( isset( $head ) ) {
foreach ( $xpath->query( '//body//script[ @custom-element or @custom-template or @src = "https://cdn.ampproject.org/v0.js" ]' ) as $script ) {
$head->appendChild( $script->parentNode->removeChild( $script ) );
}
foreach ( $dom->xpath->query( '//body//script[ @custom-element or @custom-template or @src = "https://cdn.ampproject.org/v0.js" ]' ) as $script ) {
$dom->head->appendChild( $script->parentNode->removeChild( $script ) );
}

// Ensure the mandatory amp attribute is present on the html element.
Expand Down Expand Up @@ -2237,11 +2226,9 @@ public static function prepare_response( $response, $args = [] ) {
* Make sure that document.write() is disabled to prevent dynamically-added content (such as added
* via amp-live-list) from wiping out the page by introducing any scripts that call this function.
*/
if ( $head ) {
$script = $dom->createElement( 'script' );
$script->appendChild( $dom->createTextNode( 'document.addEventListener( "DOMContentLoaded", function() { document.write = function( text ) { throw new Error( "[AMP-WP] Prevented document.write() call with: " + text ); }; } );' ) );
$head->appendChild( $script );
}
$script = $dom->createElement( 'script' );
$script->appendChild( $dom->createTextNode( 'document.addEventListener( "DOMContentLoaded", function() { document.write = function( text ) { throw new Error( "[AMP-WP] Prevented document.write() call with: " + text ); }; } );' ) );
$dom->head->appendChild( $script );
} elseif ( ! self::is_customize_preview_iframe() ) {
$response = esc_html__( 'Redirecting to non-AMP version.', 'amp' );

Expand Down
46 changes: 17 additions & 29 deletions includes/utils/class-amp-dom-utils.php
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ class AMP_DOM_Utils {
*
* @param string $document Valid HTML document to be represented by a DOMDocument.
* @param string $encoding Optional. Encoding to use for the content. Defaults to `get_bloginfo( 'charset' )`.
* @return DOMDocument|false Returns DOMDocument, or false if conversion failed.
* @return AMP_DOM_Document|false Returns DOMDocument, or false if conversion failed.
*/
public static function get_dom( $document, $encoding = null ) {
$libxml_previous_state = libxml_use_internal_errors( true );
Expand Down Expand Up @@ -170,18 +170,15 @@ static function( $noscript_matches ) {
* Apparently PHP's DOM is more lenient when parsing HTML to allow nodes in the HEAD which do not belong. A proper
* HTML5 parser should rather prematurely short-circuit the HEAD when it finds an illegal element.
*
* @param DOMDocument $dom DOM Document to manipulate.
* @param AMP_DOM_Document $dom DOM Document to manipulate.
*/
private static function move_invalid_head_nodes_to_body( DOMDocument $dom ) {
$head = $dom->getElementsByTagName( 'head' )->item( 0 );
$body = $dom->getElementsByTagName( 'body' )->item( 0 );

private static function move_invalid_head_nodes_to_body( AMP_DOM_Document $dom ) {
// Walking backwards makes it easier to move elements in the expected order.
$node = $head->lastChild;
$node = $dom->head->lastChild;
while ( $node ) {
$next_sibling = $node->previousSibling;
if ( ! self::is_valid_head_node( $node ) ) {
$body->insertBefore( $head->removeChild( $node ), $body->firstChild );
$dom->body->insertBefore( $dom->head->removeChild( $node ), $dom->body->firstChild );
}
$node = $next_sibling;
}
Expand Down Expand Up @@ -377,7 +374,7 @@ public static function restore_amp_bind_attributes( $html ) {
* @param string $content Valid HTML content to be represented by a DOMDocument.
* @param string $encoding Optional. Encoding to use for the content. Defaults to `get_bloginfo( 'charset' )`.
*
* @return DOMDocument|false Returns DOMDocument, or false if conversion failed.
* @return AMP_DOM_Document|false Returns a DOM document, or false if conversion failed.
*/
public static function get_dom_from_content( $content, $encoding = null ) {
// Detect encoding from the current WordPress installation.
Expand All @@ -401,21 +398,14 @@ public static function get_dom_from_content( $content, $encoding = null ) {
* @since 0.2
* @see AMP_DOM_Utils::get_content_from_dom_node() Reciprocal function.
*
* @param DOMDocument $dom Represents an HTML document from which to extract HTML content.
* @param AMP_DOM_Document $dom Represents an HTML document from which to extract HTML content.
* @return string Returns the HTML content of the body element represented in the DOMDocument.
*/
public static function get_content_from_dom( $dom ) {
$body = $dom->getElementsByTagName( 'body' )->item( 0 );

// The DOMDocument may contain no body. In which case return nothing.
if ( null === $body ) {
return '';
}

public static function get_content_from_dom( AMP_DOM_Document $dom ) {
return preg_replace(
'#^.*?<body.*?>(.*)</body>.*?$#si',
'$1',
self::get_content_from_dom_node( $dom, $body )
self::get_content_from_dom_node( $dom, $dom->body )
);
}

Expand All @@ -426,13 +416,12 @@ public static function get_content_from_dom( $dom ) {
* @since 0.6
* @see AMP_DOM_Utils::get_dom() Where the operations in this method are mirrored.
* @see AMP_DOM_Utils::get_content_from_dom() Reciprocal function.
* @todo In the future consider an AMP_DOMDocument subclass that does this automatically at saveHTML(). See <https://github.com/ampproject/amp-wp/pull/895/files#r163825513>.
*
* @param DOMDocument $dom Represents an HTML document.
* @param DOMElement $node Represents an HTML element of the $dom from which to extract HTML content.
* @param AMP_DOM_Document $dom Represents an HTML document.
* @param DOMElement $node Represents an HTML element of the $dom from which to extract HTML content.
* @return string Returns the HTML content represented in the DOMNode
*/
public static function get_content_from_dom_node( $dom, $node ) {
public static function get_content_from_dom_node( AMP_DOM_Document $dom, $node ) {
/**
* Self closing tags regex.
*
Expand Down Expand Up @@ -460,12 +449,11 @@ public static function get_content_from_dom_node( $dom, $node ) {
*/
$mustache_tag_placeholders = self::get_mustache_tag_placeholders();
$mustache_tags_replaced = false;
$xpath = new DOMXPath( $dom );
$templates = $dom->getElementsByTagName( 'template' );
foreach ( $templates as $template ) {

// These attributes are the only ones that saveHTML() will URL-encode.
foreach ( $xpath->query( './/*/@src|.//*/@href|.//*/@action', $template ) as $attribute ) {
foreach ( $dom->xpath->query( './/*/@src|.//*/@href|.//*/@action', $template ) as $attribute ) {
$attribute->nodeValue = str_replace(
array_keys( $mustache_tag_placeholders ),
array_values( $mustache_tag_placeholders ),
Expand Down Expand Up @@ -624,15 +612,15 @@ public static function is_node_empty( $node ) {
* @since 0.2
* @deprecated
*
* @param DOMDocument $dom Represents HTML document on which to force closing tags.
* @param DOMElement $node Represents HTML element to start closing tags on.
* If not passed, defaults to first child of body.
* @param AMP_DOM_Document $dom Represents HTML document on which to force closing tags.
* @param DOMElement $node Represents HTML element to start closing tags on.
* If not passed, defaults to first child of body.
*/
public static function recursive_force_closing_tags( $dom, $node = null ) {
_deprecated_function( __METHOD__, '0.7' );

if ( null === $node ) {
$node = $dom->getElementsByTagName( 'body' )->item( 0 );
$node = $dom->body;
}

if ( XML_ELEMENT_NODE !== $node->nodeType ) {
Expand Down
2 changes: 1 addition & 1 deletion tests/php/test-class-amp-theme-support.php
Original file line number Diff line number Diff line change
Expand Up @@ -1382,7 +1382,7 @@ public function get_schema_script_data() {
*/
public function test_ensure_required_markup_schemaorg( $script, $expected ) {
$page = '<html><head><script type="application/ld+json">%s</script></head><body>Test</body></html>';
$dom = new DOMDocument();
$dom = new AMP_DOM_Document();
$dom->loadHTML( sprintf( $page, $script ) );
AMP_Theme_Support::ensure_required_markup( $dom );
$this->assertEquals( $expected, substr_count( $dom->saveHTML(), 'schema.org' ) );
Expand Down

0 comments on commit 7513e41

Please sign in to comment.