diff --git a/src/DOMDocumentFactory.php b/src/DOMDocumentFactory.php index 9ab2a76..1447483 100644 --- a/src/DOMDocumentFactory.php +++ b/src/DOMDocumentFactory.php @@ -5,15 +5,16 @@ namespace SimpleSAML\XML; use DOMDocument; -use RuntimeException; use SimpleSAML\Assert\Assert; use SimpleSAML\XML\Exception\IOException; +use SimpleSAML\XML\Exception\RuntimeException; use SimpleSAML\XML\Exception\UnparseableXMLException; -use function defined; use function file_get_contents; +use function func_num_args; use function libxml_clear_errors; use function libxml_get_last_error; +use function libxml_set_external_entity_loader; use function libxml_use_internal_errors; use function sprintf; @@ -22,25 +23,41 @@ */ final class DOMDocumentFactory { + /** + * @var non-negative-int + * TODO: Add LIBXML_NO_XXE to the defaults when PHP 8.4.0 + libxml 2.13.0 become generally available + */ + public const DEFAULT_OPTIONS = LIBXML_COMPACT | LIBXML_NONET | LIBXML_NSCLEAN; + + /** * @param string $xml - * @param non-empty-string $xml + * @param non-negative-int $options * * @return \DOMDocument */ - public static function fromString(string $xml): DOMDocument - { + public static function fromString( + string $xml, + int $options = self::DEFAULT_OPTIONS, + ): DOMDocument { + libxml_set_external_entity_loader(null); Assert::notWhitespaceOnly($xml); + Assert::notRegex( + $xml, + '/<(\s*)!(\s*)DOCTYPE/', + 'Dangerous XML detected, DOCTYPE nodes are not allowed in the XML body', + RuntimeException::class, + ); $internalErrors = libxml_use_internal_errors(true); libxml_clear_errors(); - $domDocument = self::create(); - $options = LIBXML_DTDLOAD | LIBXML_DTDATTR | LIBXML_NONET | LIBXML_PARSEHUGE | LIBXML_NSCLEAN; - if (defined('LIBXML_COMPACT')) { - $options |= LIBXML_COMPACT; + // If LIBXML_NO_XXE is available and option not set + if (func_num_args() === 1 && defined('LIBXML_NO_XXE')) { + $options != LIBXML_NO_XXE; } + $domDocument = self::create(); $loaded = $domDocument->loadXML($xml, $options); libxml_use_internal_errors($internalErrors); @@ -55,11 +72,11 @@ public static function fromString(string $xml): DOMDocument libxml_clear_errors(); foreach ($domDocument->childNodes as $child) { - if ($child->nodeType === XML_DOCUMENT_TYPE_NODE) { - throw new RuntimeException( - 'Dangerous XML detected, DOCTYPE nodes are not allowed in the XML body', - ); - } + Assert::false( + $child->nodeType === XML_DOCUMENT_TYPE_NODE, + 'Dangerous XML detected, DOCTYPE nodes are not allowed in the XML body', + RuntimeException::class, + ); } return $domDocument; @@ -68,10 +85,11 @@ public static function fromString(string $xml): DOMDocument /** * @param string $file + * @param non-negative-int $options * * @return \DOMDocument */ - public static function fromFile(string $file): DOMDocument + public static function fromFile(string $file, int $options = self::DEFAULT_OPTIONS): DOMDocument { error_clear_last(); $xml = @file_get_contents($file); @@ -83,7 +101,7 @@ public static function fromFile(string $file): DOMDocument } Assert::notWhitespaceOnly($xml, sprintf('File "%s" does not have content', $file), RuntimeException::class); - return static::fromString($xml); + return (func_num_args() === 1) ? static::fromString($xml) : static::fromString($xml, $options); } diff --git a/tests/XML/DOMDocumentFactoryTest.php b/tests/XML/DOMDocumentFactoryTest.php index 2e01f03..79c4a6a 100644 --- a/tests/XML/DOMDocumentFactoryTest.php +++ b/tests/XML/DOMDocumentFactoryTest.php @@ -84,6 +84,19 @@ public function testStringThatContainsDocTypeIsNotAccepted(): void } + public function testStringThatContainsDocTypeIsNotAccepted2(): void + { + $xml = ' + %exfiltrate;]> + y'; + $this->expectException(RuntimeException::class); + $this->expectExceptionMessage( + 'Dangerous XML detected, DOCTYPE nodes are not allowed in the XML body', + ); + DOMDocumentFactory::fromString($xml); + } + + public function testEmptyFileIsNotValid(): void { $file = 'resources/xml/domdocument_empty.xml';