diff --git a/composer.json b/composer.json index 92dba2c..c649bfd 100644 --- a/composer.json +++ b/composer.json @@ -37,9 +37,24 @@ "require-dev": { "phpunit/phpunit": "^9", "amphp/php-cs-fixer-config": "^2", + "httpwg/structured-field-tests": "1.0", "league/uri": "^6.8 | ^7.1", "psalm/phar": "^5.4" }, + "repositories": [ + { + "type": "package", + "package": { + "name": "httpwg/structured-field-tests", + "version": "1.0", + "source": { + "url": "https://github.com/httpwg/structured-field-tests", + "type": "git", + "reference": "origin/main" + } + } + } + ], "scripts": { "test": "php -dzend.assertions=1 -dassert.exception=1 vendor/bin/phpunit", "code-style": "php vendor/bin/php-cs-fixer fix" diff --git a/src/StructuredFields/Boolean.php b/src/StructuredFields/Boolean.php new file mode 100644 index 0000000..e32a352 --- /dev/null +++ b/src/StructuredFields/Boolean.php @@ -0,0 +1,19 @@ + + */ +class Boolean extends Item +{ + /** + * @psalm-param Rfc8941Parameters $parameters + */ + public function __construct(bool $item, array $parameters) + { + parent::__construct($item, $parameters); + } +} diff --git a/src/StructuredFields/Bytes.php b/src/StructuredFields/Bytes.php new file mode 100644 index 0000000..1520068 --- /dev/null +++ b/src/StructuredFields/Bytes.php @@ -0,0 +1,19 @@ + + */ +class Bytes extends Item +{ + /** + * @psalm-param Rfc8941Parameters $parameters + */ + public function __construct(string $item, array $parameters) + { + parent::__construct($item, $parameters); + } +} diff --git a/src/StructuredFields/Date.php b/src/StructuredFields/Date.php new file mode 100644 index 0000000..18ae6fe --- /dev/null +++ b/src/StructuredFields/Date.php @@ -0,0 +1,19 @@ + + */ +class Date extends Item +{ + /** + * @psalm-param Rfc8941Parameters $parameters + */ + public function __construct(int $item, array $parameters) + { + parent::__construct($item, $parameters); + } +} diff --git a/src/StructuredFields/DisplayString.php b/src/StructuredFields/DisplayString.php new file mode 100644 index 0000000..41faf0c --- /dev/null +++ b/src/StructuredFields/DisplayString.php @@ -0,0 +1,19 @@ + + */ +class DisplayString extends Item +{ + /** + * @psalm-param Rfc8941Parameters $parameters + */ + public function __construct(string $item, array $parameters) + { + parent::__construct($item, $parameters); + } +} diff --git a/src/StructuredFields/InnerList.php b/src/StructuredFields/InnerList.php new file mode 100644 index 0000000..bbbc794 --- /dev/null +++ b/src/StructuredFields/InnerList.php @@ -0,0 +1,20 @@ +> $item + * @template-extends Item>> + */ +class InnerList extends Item +{ + /** + * @psalm-param list> $item + * @psalm-param Rfc8941Parameters $parameters + */ + public function __construct(array $item, array $parameters) + { + parent::__construct($item, $parameters); + } +} diff --git a/src/StructuredFields/Item.php b/src/StructuredFields/Item.php new file mode 100644 index 0000000..c4847dd --- /dev/null +++ b/src/StructuredFields/Item.php @@ -0,0 +1,18 @@ + + */ +class Number extends Item +{ + /** + * @psalm-param Rfc8941Parameters $parameters + */ + public function __construct(int|float $item, array $parameters) + { + parent::__construct($item, $parameters); + } +} diff --git a/src/StructuredFields/Rfc8941.php b/src/StructuredFields/Rfc8941.php new file mode 100644 index 0000000..faca042 --- /dev/null +++ b/src/StructuredFields/Rfc8941.php @@ -0,0 +1,485 @@ + + * @psalm-type Rfc8941ListItem = InnerList|Rfc8941SingleItem + * @psalm-type Rfc8941BareItem = int|float|string|bool + * @psalm-type Rfc8941Parameters = array + */ +class Rfc8941 +{ + /** + * @param string[]|string $value + * @psalm-return null|list + */ + public static function parseList(array|string $value): ?array + { + $string = \is_array($value) ? \implode(",", $value) : $value; + + $i = \strspn($string, " "); + $len = \strlen($string); + if ($len === $i) { + return []; + } + + $list = []; + while (true) { + if (null === $list[] = self::parseItemOrInnerList($string, $i)) { + return null; + } + $i += \strspn($string, " \t", $i); + if ($i >= $len) { + return $list; + } + if ($string[$i] !== ",") { + return null; + } + $i += \strspn($string, " \t", ++$i); + if ($i >= $len) { + return null; + } + } + } + + /** + * @param string[]|string $value + * @psalm-return array|null + */ + public static function parseDictionary(array|string $value): ?array + { + $string = \is_array($value) ? \implode(",", $value) : $value; + + $i = \strspn($string, " "); + $len = \strlen($string); + if ($len === $i) { + return []; + } + + $values = []; + while (true) { + $i += \strspn($string, " ", $i); + if (null === $key = self::parseKey($string, $i)) { + return null; + } + if ($i < $len && $string[$i] === "=") { + ++$i; + if (null === $values[$key] = self::parseItemOrInnerList($string, $i)) { + return null; + } + } else { + if (null === $parameters = self::parseParameters($string, $i)) { + return null; + } + $values[$key] = new Boolean(true, $parameters); + } + $i += \strspn($string, " \t", $i); + if ($i >= $len) { + return $values; + } + if ($string[$i] !== ",") { + return null; + } + $i += \strspn($string, " \t", ++$i); + if ($i >= $len) { + return null; + } + } + } + + /** @psalm-return null|Rfc8941SingleItem */ + public static function parseItem(string $string): ?Item + { + $i = \strspn($string, " "); + if ($i === \strlen($string)) { + return null; + } + $parsed = self::parseItemInternal($string, $i); + return $i + \strspn($string, " ", $i) < \strlen($string) ? null : $parsed; + } + + /** @psalm-return null|Rfc8941ListItem */ + private static function parseItemOrInnerList(string $string, int &$i): ?Item + { + $len = \strlen($string); + if ($string[$i] === "(") { + $innerList = []; + ++$i; + while (true) { + $i += \strspn($string, " ", $i); + if ($i >= $len) { + return null; + } + if ($string[$i] === ")") { + ++$i; + if (null === $params = self::parseParameters($string, $i)) { + return null; + } + return new InnerList($innerList, $params); + } + $chr = $string[$i - 1]; + if ($chr !== " " && $chr !== "(") { + return null; + } + if (null === $innerList[] = self::parseItemInternal($string, $i)) { + return null; + } + } + } + return self::parseItemInternal($string, $i); + } + + /** @psalm-return null|Rfc8941SingleItem */ + private static function parseItemInternal(string $string, int &$i): ?Item + { + if (null === $value = self::parseBareItem($string, $i, $class)) { + return null; + } + if (null === $parameters = self::parseParameters($string, $i)) { + return null; + } + return new $class($value, $parameters); + } + + public static function parseIntegerOrDecimal(string $string): null|int|float + { + if ($string === "") { + return null; + } + + $i = \strspn($string, " "); + $chr = \ord($string[$i]); + if ($chr === \ord("-") || ($chr >= \ord('0') || $chr <= \ord('9'))) { + $parsed = self::parseIntegerOrDecimalInternal($string, $i); + return $i + \strspn($string, " ", $i) < \strlen($string) ? null : $parsed; + } + return null; + } + + private static function parseIntegerOrDecimalInternal(string $string, int &$i): null|int|float + { + $len = \strlen($string); + $sign = 1; + if ($string[$i] === "-") { + ++$i; + $sign = -1; + } + $digits = \strspn($string, "0123456789", $i); + if ($digits < 1) { + return null; + } + $decimaldot = $i + $digits + 1; + if ($decimaldot < $len && $string[$decimaldot - 1] === ".") { + if ($digits > 12) { + return null; + } + $decimals = \strspn($string, "0123456789", $decimaldot); + if ($decimals < 1 || $decimals > 3) { + return null; + } + $length = $decimaldot - $i + $decimals; + $num = $sign * (float)\substr($string, $i, $length); + $i += $length; + } elseif ($digits > 15) { + return null; + } else { + $num = $sign * (int)\substr($string, $i, $digits); + $i += $digits; + } + return $num; + } + + public static function parseString(string $string): ?string + { + if ($string === "") { + return null; + } + + $i = \strspn($string, " "); + if ($string[$i++] === '"') { + $parsed = self::parseStringInternal($string, $i); + return $i + \strspn($string, " ", $i) < \strlen($string) ? null : $parsed; + } + return null; + } + + private static function parseStringInternal(string $string, int &$i): ?string + { + $start = $i; + $len = \strlen($string); + $foundslash = false; + while (true) { + $i += \strspn($string, " !#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_`abcdefghijklmnopqrstuvwxyz{|}~", $i); + if ($i >= $len) { + return null; + } + if ($string[$i] === '"') { + $str = \substr($string, $start, $i++ - $start); + if ($foundslash) { + return \stripslashes($str); + } + return $str; + } + if ($string[$i] === "\\") { + if (++$i >= $len) { + return null; + } + $foundslash = true; + $chr = $string[$i++]; + if ($chr !== '"' && $chr !== "\\") { + return null; + } + } else { + return null; + } + } + } + + public static function parseToken(string $string): ?string + { + if ($string === "") { + return null; + } + + $i = \strspn($string, " "); + $chr = \ord($string[$i]); + if ($chr === \ord("*") || ($chr >= \ord('A') && $chr <= \ord("Z")) || ($chr >= \ord('a') && $chr <= \ord("z"))) { + $parsed = self::parseTokenInternal($string, $i); + return $i + \strspn($string, " ", $i) < \strlen($string) ? null : $parsed; + } + return null; + } + + private static function parseTokenInternal(string $string, int &$i): string + { + $length = \strspn($string, ":/!#$%&'*+-.^_`|~0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", $i); + $str = \substr($string, $i, $length); + $i += $length; + return $str; + } + + public static function parseByteSequence(string $string): ?string + { + if ($string === "") { + return null; + } + + $i = \strspn($string, " "); + if ($string[$i++] === ':') { + $parsed = self::parseByteSequenceInternal($string, $i); + return $i + \strspn($string, " ", $i) < \strlen($string) ? null : $parsed; + } + return null; + } + + private static function parseByteSequenceInternal(string $string, int &$i): ?string + { + $length = \strspn($string, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=", $i); + $str = \base64_decode(\substr($string, $i, $length)); + $i += $length; + if (!isset($string[$i]) || $string[$i++] !== ":") { + return null; + } + return $str === false ? null : $str; + } + + public static function parseBoolean(string $string): ?bool + { + if ($string === "") { + return null; + } + + $i = \strspn($string, " "); + if ($string[$i++] === '?') { + $parsed = self::parseBooleanInternal($string, $i); + return $i + \strspn($string, " ", $i) < \strlen($string) ? null : $parsed; + } + return null; + } + + private static function parseBooleanInternal(string $string, int &$i): ?bool + { + if (!isset($string[$i])) { + return null; + } + $chr = $string[$i++]; + if ($chr === "0") { + return false; + } + if ($chr === "1") { + return true; + } + return null; + } + + public static function parseDate(string $string): ?int + { + if ($string === "") { + return null; + } + + $i = \strspn($string, " "); + if ($string[$i++] === '@') { + $parsed = self::parseDateInternal($string, $i); + return $i + \strspn($string, " ", $i) < \strlen($string) ? null : $parsed; + } + return null; + } + + private static function parseDateInternal(string $string, int &$i): ?int + { + if (!isset($string[$i])) { + return null; + } + $start = $i; + if ($string[$i] === "-") { + ++$i; + } + $length = \strspn($string, "0123456789", $i); + if ($length < 1 || $length > 15) { + return null; + } + if ($start !== $i) { + $i += $length++; + } else { + $i += $length; + } + return (int) \substr($string, $start, $length); + } + + public static function parseDisplayString(string $string): ?string + { + $i = \strspn($string, " "); + if (\strlen($string) < $i + 3) { + return null; + } + + if ($string[$i++] === '%' && $string[$i++] === '"') { + $parsed = self::parseDisplayStringInternal($string, $i); + return $i + \strspn($string, " ", $i) < \strlen($string) ? null : $parsed; + } + return null; + } + + private static function parseDisplayStringInternal(string $string, int &$i): ?string + { + $start = $i; + $len = \strlen($string); + $buf = ""; + while (true) { + $i += \strspn($string, " !#$&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_`abcdefghijklmnopqrstuvwxyz{|}~\\", $i); + if ($i >= $len) { + return null; + } + if ($string[$i] === '%') { + $buf .= \substr($string, $start, $i++ - $start); + $hexlen = \strspn($string, "0123456789abcdef", $i, 2); + if ($hexlen !== 2) { + return null; + } + $buf .= \chr(\hexdec(\substr($string, $i, 2))); + $start = $i += 2; + continue; + } + if ($string[$i] === '"') { + $buf .= \substr($string, $start, $i++ - $start); + if (!\preg_match('//u', $buf)) { + return null; + } + return $buf; + } + return null; + } + } + + /** + * @psalm-template TOutput of Rfc8941BareItem|null + * @param-out (TOutput is null ? null : class-string) $class + * @psalm-return TOutput + */ + private static function parseBareItem(string $string, int &$i, ?string &$class = ""): null|int|float|string|bool + { + $chr = \ord($string[$i]); + if ($chr === \ord("-") || ($chr >= \ord('0') && $chr <= \ord('9'))) { + $class = Number::class; + return self::parseIntegerOrDecimalInternal($string, $i); + } + if ($chr === \ord('"')) { + $class = Str::class; + ++$i; + return self::parseStringInternal($string, $i); + } + if ($chr === \ord("*") || ($chr >= \ord('A') && $chr <= \ord("Z")) || ($chr >= \ord('a') && $chr <= \ord("z"))) { + $class = Token::class; + return self::parseTokenInternal($string, $i); + } + if ($chr === \ord(":")) { + $class = Bytes::class; + ++$i; + return self::parseByteSequenceInternal($string, $i); + } + if ($chr === \ord("%")) { + $class = DisplayString::class; + if (!isset($string[++$i]) || $string[$i++] !== '"') { + return null; + } + return self::parseDisplayStringInternal($string, $i); + } + if ($chr === \ord("?")) { + $class = Boolean::class; + ++$i; + return self::parseBooleanInternal($string, $i); + } + if ($chr === \ord("@")) { + $class = Date::class; + ++$i; + return self::parseDateInternal($string, $i); + } + return null; + } + + /** @psalm-return null|Rfc8941Parameters */ + private static function parseParameters(string $string, int &$i): ?array + { + $parameters = []; + for ($len = \strlen($string); $i < $len;) { + if ($string[$i] !== ";") { + break; + } + $i += \strspn($string, " ", ++$i); + + if ($i >= $len) { + return null; + } + + if (null === $key = self::parseKey($string, $i)) { + return null; + } + + if ($i < $len && $string[$i] === "=") { + ++$i; + if (null === $item = self::parseBareItem($string, $i)) { + return null; + } + $parameters[$key] = $item; + } else { + $parameters[$key] = true; + } + } + return $parameters; + } + + private static function parseKey(string $string, int &$i): ?string + { + $chr = \ord($string[$i]); + if ($chr !== \ord("*") && ($chr < \ord('a') || $chr > \ord('z'))) { + return null; + } + + $keystart = $i++; + $i += \strspn($string, "*.-_abcdefghijklmnopqrstuvwxyz0123456789", $i); + return \substr($string, $keystart, $i - $keystart); + } +} diff --git a/src/StructuredFields/Str.php b/src/StructuredFields/Str.php new file mode 100644 index 0000000..2d18778 --- /dev/null +++ b/src/StructuredFields/Str.php @@ -0,0 +1,19 @@ + + */ +class Str extends Item +{ + /** + * @psalm-param Rfc8941Parameters $parameters + */ + public function __construct(string $item, array $parameters) + { + parent::__construct($item, $parameters); + } +} diff --git a/src/StructuredFields/Token.php b/src/StructuredFields/Token.php new file mode 100644 index 0000000..8e4e7ec --- /dev/null +++ b/src/StructuredFields/Token.php @@ -0,0 +1,19 @@ + + */ +class Token extends Item +{ + /** + * @psalm-param Rfc8941Parameters $parameters + */ + public function __construct(string $item, array $parameters) + { + parent::__construct($item, $parameters); + } +} diff --git a/test/Rfc8941Test.php b/test/Rfc8941Test.php new file mode 100644 index 0000000..d89827e --- /dev/null +++ b/test/Rfc8941Test.php @@ -0,0 +1,133 @@ +executeTest(...$args); + } + + public function executeTest(array $raw, string $header_type, $expected = null, bool $must_fail = false, bool $can_fail = false, $canonical = null, ...$ignoredArgs) + { + switch ($header_type) { + case "item": + $parsed = Rfc8941::parseItem($raw[0]); + break; + + case "list": + $parsed = Rfc8941::parseList($raw); + break; + + case "dictionary": + $parsed = Rfc8941::parseDictionary($raw); + break; + + default: + $this->fail("Unknown $header_type in dataset"); + } + + self::destructureItems($parsed); + + if ($parsed === null) { + $this->assertTrue($must_fail || $can_fail); + } elseif ($must_fail) { + $this->assertNull($parsed); + } else { + $this->assertSame($expected, $parsed); + } + } + + public function provideAll(): array + { + $cases = []; + foreach (\glob(__DIR__ . "/../vendor/httpwg/structured-field-tests/*.json") as $file) { + foreach (\json_decode(\file_get_contents($file), true) as $case) { + + // We don't test merging two headers here + self::sanitize($case); + $cases[\basename($file) . ": {$case["name"]}"] = [$case]; + } + } + return $cases; + } + + public static function destructureItems(&$parsed) + { + if ($parsed instanceof Item) { + $parsed = [$parsed->item, $parsed->parameters]; + } + if (\is_array($parsed)) { + foreach ($parsed as &$value) { + self::destructureItems($value); + } + } + } + + public static function sanitize(&$case) + { + $expected = &$case["expected"]; + self::sanitizeType($expected); + if (\is_array($expected)) { + if ($case["header_type"] === "list" || $case["header_type"] === "dictionary") { + if ($case["header_type"] === "dictionary") { + self::recombineDictonary($expected); + } + foreach ($expected as &$item) { + if (\is_array($item[0])) { + foreach ($item[0] as &$innerItem) { + self::recombineDictonary($innerItem[1]); + } + } + self::recombineDictonary($item[1]); + } + } else { + self::recombineDictonary($expected[1]); + } + } + } + + public static function recombineDictonary(&$data) + { + $data = \array_combine(\array_column($data, 0), \array_column($data, 1)); + } + + public static function sanitizeType(&$data) + { + if (\is_array($data)) { + foreach ($data as &$value) { + if (isset($value["__type"])) { + if ($value["__type"] === "binary") { + // base32? wtf. + $str = \rtrim($value["value"], "="); + $buf = ""; + $keys = \array_flip(\array_merge(\range('A', 'Z'), \range(2, 7))); + $byte = 0; + for ($i = 0, $len = \strlen($str); $i < $len; ++$i) { + $shift = (5 * ($i + 1)) % 8; + $byte = $byte << 5 | $keys[$str[$i]]; + if ($shift < 5) { + $buf .= \chr($byte >> $shift); + $byte &= (1 << $shift) - 1; + } + } + if ((5 * $i) % 8 >= 5) { + $buf .= \chr($byte << (8 - (5 * $i) % 8)); + } + $value = $buf; + } else { + $value = $value["value"]; + } + } else { + self::sanitizeType($value); + } + } + } + } +}