Skip to content

Commit

Permalink
Merge pull request #895 from Automattic/fix/amp-bind
Browse files Browse the repository at this point in the history
Add support for amp-bind
  • Loading branch information
westonruter authored Jan 25, 2018
2 parents 216ec37 + 40d48e3 commit 4744f2d
Show file tree
Hide file tree
Showing 5 changed files with 334 additions and 45 deletions.
137 changes: 95 additions & 42 deletions includes/sanitizers/class-amp-tag-and-attribute-sanitizer.php
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,14 @@ class AMP_Tag_And_Attribute_Sanitizer extends AMP_Base_Sanitizer {
*/
protected $layout_allowed_attributes;

/**
* Mapping of alternative names back to their primary names.
*
* @since 0.7
* @var array
*/
protected $rev_alternate_attr_name_lookup = array();

/**
* Stack.
*
Expand Down Expand Up @@ -91,16 +99,26 @@ public function __construct( $dom, $args = array() ) {
'amp_allowed_tags' => AMP_Allowed_Tags_Generated::get_allowed_tags(),
'amp_globally_allowed_attributes' => AMP_Allowed_Tags_Generated::get_allowed_attributes(),
'amp_layout_allowed_attributes' => AMP_Allowed_Tags_Generated::get_layout_attributes(),
'amp_bind_placeholder_prefix' => AMP_DOM_Utils::get_amp_bind_placeholder_prefix(),
);

parent::__construct( $dom, $args );

/**
* Prepare whitelists
*/
$this->allowed_tags = $this->args['amp_allowed_tags'];
$this->globally_allowed_attributes = $this->args['amp_globally_allowed_attributes'];
$this->layout_allowed_attributes = $this->args['amp_layout_allowed_attributes'];
// Prepare whitelists.
$this->allowed_tags = $this->args['amp_allowed_tags'];
foreach ( AMP_Rule_Spec::$additional_allowed_tags as $tag_name => $tag_rule_spec ) {
$this->allowed_tags[ $tag_name ][] = $tag_rule_spec;
}

foreach ( $this->allowed_tags as &$tag_specs ) {
foreach ( $tag_specs as &$tag_spec ) {
if ( isset( $tag_spec[ AMP_Rule_Spec::ATTR_SPEC_LIST ] ) ) {
$tag_spec[ AMP_Rule_Spec::ATTR_SPEC_LIST ] = $this->process_alternate_names( $tag_spec[ AMP_Rule_Spec::ATTR_SPEC_LIST ] );
}
}
}
$this->globally_allowed_attributes = $this->process_alternate_names( $this->args['amp_globally_allowed_attributes'] );
$this->layout_allowed_attributes = $this->process_alternate_names( $this->args['amp_layout_allowed_attributes'] );
}

/**
Expand All @@ -127,17 +145,41 @@ public function get_scripts() {
return $scripts;
}

/**
* Process alternative names in attribute spec list.
*
* @since 0.7
*
* @param array $attr_spec_list Attribute spec list.
* @return array Modified attribute spec list.
*/
private function process_alternate_names( $attr_spec_list ) {
foreach ( $attr_spec_list as $attr_name => &$attr_spec ) {
if ( '[' === $attr_name[0] ) {
$placeholder_attr_name = $this->args['amp_bind_placeholder_prefix'] . trim( $attr_name, '[]' );
if ( ! isset( $attr_spec[ AMP_Rule_Spec::ALTERNATIVE_NAMES ] ) ) {
$attr_spec[ AMP_Rule_Spec::ALTERNATIVE_NAMES ] = array();
}
$attr_spec[ AMP_Rule_Spec::ALTERNATIVE_NAMES ][] = $placeholder_attr_name;
}

// Save all alternative names in lookup to improve performance.
if ( isset( $attr_spec[ AMP_Rule_Spec::ALTERNATIVE_NAMES ] ) ) {
foreach ( $attr_spec[ AMP_Rule_Spec::ALTERNATIVE_NAMES ] as $alternative_name ) {
$this->rev_alternate_attr_name_lookup[ $alternative_name ] = $attr_name;
}
}
}
return $attr_spec_list;
}

/**
* Sanitize the <video> elements from the HTML contained in this instance's DOMDocument.
*
* @since 0.5
*/
public function sanitize() {

foreach ( AMP_Rule_Spec::$additional_allowed_tags as $tag_name => $tag_rule_spec ) {
$this->allowed_tags[ $tag_name ][] = $tag_rule_spec;
}

/**
* Add root of content to the stack.
*
Expand Down Expand Up @@ -279,19 +321,41 @@ private function process_node( $node ) {
return;
}

// Obtain list of component scripts required.
if ( ! empty( $tag_spec['also_requires_tag_warning'] ) ) {
$this->script_components[] = strtok( $tag_spec['also_requires_tag_warning'][0], ' ' );
}
if ( ! empty( $tag_spec['requires_extension'] ) ) {
$this->script_components = array_merge( $this->script_components, $tag_spec['requires_extension'] );
}
$merged_attr_spec_list = array_merge(
$this->globally_allowed_attributes,
$attr_spec_list
);

// Remove any remaining disallowed attributes.
$this->sanitize_disallowed_attributes_in_node( $node, $attr_spec_list );
$this->sanitize_disallowed_attributes_in_node( $node, $merged_attr_spec_list );

// Remove values that don't conform to the attr_spec.
$this->sanitize_disallowed_attribute_values_in_node( $node, $attr_spec_list );
$this->sanitize_disallowed_attribute_values_in_node( $node, $merged_attr_spec_list );

// Add required AMP component scripts if the element is still in the document.
if ( $node->parentNode ) {
if ( ! empty( $tag_spec['also_requires_tag_warning'] ) ) {
$this->script_components[] = strtok( $tag_spec['also_requires_tag_warning'][0], ' ' );
}
if ( ! empty( $tag_spec['requires_extension'] ) ) {
$this->script_components = array_merge( $this->script_components, $tag_spec['requires_extension'] );
}

// Check if element needs amp-bind component.
if ( $node instanceof DOMElement && ! in_array( 'amp-bind', $this->script_components, true ) ) {
foreach ( $node->attributes as $name => $value ) {
$is_bind_attribute = (
'[' === $name[0]
||
( isset( $this->rev_alternate_attr_name_lookup[ $name ] ) && '[' === $this->rev_alternate_attr_name_lookup[ $name ][0] )
);
if ( $is_bind_attribute ) {
$this->script_components[] = 'amp-bind';
break;
}
}
}
}
}

/**
Expand Down Expand Up @@ -525,33 +589,12 @@ private function sanitize_disallowed_attributes_in_node( $node, $attr_spec_list
$attrs_to_remove = array();
foreach ( $node->attributes as $attr_name => $attr_node ) {
if ( ! $this->is_amp_allowed_attribute( $attr_name, $attr_spec_list ) ) {
/**
* This attribute is not allowed for this node; plan to remove it.
*/
$attrs_to_remove[] = $attr_name;
}
}

if ( ! empty( $attrs_to_remove ) ) {
/*
* Ensure we are not removing attributes listed as an alternate
* or allowed attributes, e.g. 'srcset' is an alternate for 'src'.
*/
foreach ( $attr_spec_list as $attr_name => $attr_spec_rule_value ) {
if ( isset( $attr_spec_rule_value[ AMP_Rule_Spec::ALTERNATIVE_NAMES ] ) ) {
foreach ( $attr_spec_rule_value[ AMP_Rule_Spec::ALTERNATIVE_NAMES ] as $alternative_name ) {
$alt_name_keys = array_keys( $attrs_to_remove, $alternative_name, true );
if ( ! empty( $alt_name_keys ) ) {
unset( $attrs_to_remove[ $alt_name_keys[0] ] );
}
}
}
}

// Remove the disallowed attributes.
foreach ( $attrs_to_remove as $attr_name ) {
$node->removeAttribute( $attr_name );
}
foreach ( $attrs_to_remove as $attr_name ) {
$node->removeAttribute( $attr_name );
}
}

Expand Down Expand Up @@ -1065,6 +1108,16 @@ private function is_amp_allowed_attribute( $attr_name, $attr_spec_list ) {
}
}
}

$is_allowed_alt_name_attr = (
isset( $this->rev_alternate_attr_name_lookup[ $attr_name ] )
&&
isset( $attr_spec_list[ $this->rev_alternate_attr_name_lookup[ $attr_name ] ] )
);
if ( $is_allowed_alt_name_attr ) {
return true;
}

return false;
}

Expand Down
110 changes: 110 additions & 0 deletions includes/utils/class-amp-dom-utils.php
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ public static function get_dom( $document ) {

$dom = new DOMDocument();

// @todo In the future consider an AMP_DOMDocument subclass that does this automatically. See <https://github.com/Automattic/amp-wp/pull/895/files#r163825513>.
$document = self::convert_amp_bind_attributes( $document );

/*
* Wrap in dummy tags, since XML needs one parent node.
* It also makes it easier to loop through nodes.
Expand All @@ -74,6 +77,110 @@ public static function get_dom( $document ) {
return $dom;
}

/**
* Get attribute prefix for converted amp-bind attributes.
*
* This contains a random string to prevent HTML content containing this data- attribute
* originally from being mutated to contain an amp-bind attribute when attributes are restored.
*
* @since 0.7
* @see \AMP_DOM_Utils::convert_amp_bind_attributes()
* @see \AMP_DOM_Utils::restore_amp_bind_attributes()
* @link https://www.ampproject.org/docs/reference/components/amp-bind
*
* @return string HTML5 data-* attribute name prefix for AMP binding attributes.
*/
public static function get_amp_bind_placeholder_prefix() {
static $attribute_prefix;
if ( ! isset( $attribute_prefix ) ) {
$attribute_prefix = sprintf( 'amp-binding-%s-', md5( wp_rand() ) );
}
return $attribute_prefix;
}

/**
* Replace AMP binding attributes with something that libxml can parse (as HTML5 data-* attributes).
*
* This is necessary because attributes in square brackets are not understood in PHP and
* get dropped with an error raised:
* > Warning: DOMDocument::loadHTML(): error parsing attribute name
* This is a reciprocal function of AMP_DOM_Utils::restore_amp_bind_attributes().
*
* @since 0.7
* @see \AMP_DOM_Utils::convert_amp_bind_attributes()
* @link https://www.ampproject.org/docs/reference/components/amp-bind
*
* @param string $html HTML containing amp-bind attributes.
* @return string HTML with AMP binding attributes replaced with HTML5 data-* attributes.
*/
public static function convert_amp_bind_attributes( $html ) {
$amp_bind_attr_prefix = self::get_amp_bind_placeholder_prefix();

// Pattern for HTML attribute accounting for binding attr name, boolean attribute, single/double-quoted attribute value, and unquoted attribute values.
$attr_regex = '#^\s+(?P<name>\[?[a-zA-Z0-9_\-]+\]?)(?P<value>=(?:"[^"]*"|\'[^\']*\'|[^\'"\s]+))?#';

/**
* Replace callback.
*
* @param array $tag_matches Tag matches.
* @return string Replacement.
*/
$replace_callback = function( $tag_matches ) use ( $amp_bind_attr_prefix, $attr_regex ) {
$old_attrs = rtrim( $tag_matches['attrs'] );
$new_attrs = '';
$offset = 0;
while ( preg_match( $attr_regex, substr( $old_attrs, $offset ), $attr_matches ) ) {
$offset += strlen( $attr_matches[0] );

if ( '[' === $attr_matches['name'][0] ) {
$new_attrs .= ' ' . $amp_bind_attr_prefix . trim( $attr_matches['name'], '[]' );
if ( isset( $attr_matches['value'] ) ) {
$new_attrs .= $attr_matches['value'];
}
} else {
$new_attrs .= $attr_matches[0];
}
}

// Bail on parse error which occurs when the regex isn't able to consume the entire $new_attrs string.
if ( strlen( $old_attrs ) !== $offset ) {
return $tag_matches[0];
}

return '<' . $tag_matches['name'] . $new_attrs . '>';
};

$html = preg_replace_callback(
// Match all start tags that probably contain a binding attribute.
'#<(?P<name>[a-zA-Z0-9_\-]+)(?P<attrs>\s+[^>]+\]=[^>]+)\s*>#',
$replace_callback,
$html
);

return $html;
}

/**
* Convert AMP bind-attributes back to their original syntax.
*
* This is a reciprocal function of AMP_DOM_Utils::convert_amp_bind_attributes().
*
* @since 0.7
* @see \AMP_DOM_Utils::convert_amp_bind_attributes()
* @link https://www.ampproject.org/docs/reference/components/amp-bind
*
* @param string $html HTML with amp-bind attributes converted.
* @return string HTML with amp-bind attributes restored.
*/
public static function restore_amp_bind_attributes( $html ) {
$html = preg_replace(
'#\s' . self::get_amp_bind_placeholder_prefix() . '([a-zA-Z0-9_\-]+)#',
' [$1]',
$html
);
return $html;
}

/**
* Return a valid DOMDocument representing arbitrary HTML content passed as a parameter.
*
Expand Down Expand Up @@ -146,6 +253,7 @@ public static function get_content_from_dom( $dom ) {
* @see Called by function get_content_from_dom()
*
* @since 0.6
* @todo In the future consider an AMP_DOMDocument subclass that does this automatically at saveHTML(). See <https://github.com/Automattic/amp-wp/pull/895/files#r163825513>.
*
* @param DOMDocument $dom Represents an HTML document.
* @param DOMNode $node Represents an HTML element of the $dom from which to extract HTML content.
Expand Down Expand Up @@ -175,6 +283,8 @@ public static function get_content_from_dom_node( $dom, $node ) {
return '';
}

$html = self::restore_amp_bind_attributes( $html );

/*
* Travis w/PHP 7.1 generates <br></br> and <hr></hr> vs. <br/> and <hr/>, respectively.
* Travis w/PHP 7.x generates <source ...></source> vs. <source ... />. Etc.
Expand Down
2 changes: 1 addition & 1 deletion phpcs.xml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
</rule>

<!-- Include sniffs for PHP cross-version compatibility. -->
<config name="testVersion" value="5.2-99.0"/>
<config name="testVersion" value="5.3-99.0"/>
<rule ref="PHPCompatibility">
<exclude-pattern>bin/*</exclude-pattern>
</rule>
Expand Down
28 changes: 28 additions & 0 deletions tests/test-class-amp-dom-utils.php
Original file line number Diff line number Diff line change
Expand Up @@ -90,4 +90,32 @@ public function test__get_content_from_dom__br_no_closing_tag() {

$this->assertEquals( $expected, $actual );
}

/**
* Test convert_amp_bind_attributes.
*
* @covers \AMP_DOM_Utils::convert_amp_bind_attributes()
* @covers \AMP_DOM_Utils::restore_amp_bind_attributes()
* @covers \AMP_DOM_Utils::get_amp_bind_placeholder_prefix()
*/
public function test_amp_bind_conversion() {
$original = '<amp-img width=300 height="200" data-foo="bar" selected src="/img/dog.jpg" [src]="myAnimals[currentAnimal].imageUrl"></amp-img>';
$converted = AMP_DOM_Utils::convert_amp_bind_attributes( $original );
$this->assertNotEquals( $converted, $original );
$this->assertContains( AMP_DOM_Utils::get_amp_bind_placeholder_prefix() . 'src="myAnimals[currentAnimal].imageUrl"', $converted );
$this->assertContains( 'width=300 height="200" data-foo="bar" selected', $converted );
$restored = AMP_DOM_Utils::restore_amp_bind_attributes( $converted );
$this->assertEquals( $original, $restored );

// Test malformed.
$malformed_html = array(
'<amp-img width="123" [text]="..."</amp-img>',
'<amp-img width="123" [text] data-test="asd"></amp-img>',
'<amp-img width="123" [text]="..." *bad*></amp-img>',
);
foreach ( $malformed_html as $html ) {
$converted = AMP_DOM_Utils::convert_amp_bind_attributes( $html );
$this->assertNotContains( AMP_DOM_Utils::get_amp_bind_placeholder_prefix(), $converted );
}
}
}
Loading

0 comments on commit 4744f2d

Please sign in to comment.