From 75a3436139ea8e616ed5f501c2bab84a68345627 Mon Sep 17 00:00:00 2001 From: Lordfirespeed <28568841+Lordfirespeed@users.noreply.github.com> Date: Thu, 15 Aug 2024 21:43:18 +0100 Subject: [PATCH 1/9] feat(parameters): create `parameters` package --- packages/parameters/.npmignore | 6 + packages/parameters/README.md | 4 + packages/parameters/package.json | 28 ++++ packages/parameters/src/index.ts | 210 +++++++++++++++++++++++++++++ packages/parameters/tsup.config.ts | 3 + 5 files changed, 251 insertions(+) create mode 100644 packages/parameters/.npmignore create mode 100644 packages/parameters/README.md create mode 100644 packages/parameters/package.json create mode 100644 packages/parameters/src/index.ts create mode 100644 packages/parameters/tsup.config.ts diff --git a/packages/parameters/.npmignore b/packages/parameters/.npmignore new file mode 100644 index 00000000..1f4f79ab --- /dev/null +++ b/packages/parameters/.npmignore @@ -0,0 +1,6 @@ +node_modules +pnpm-lock.yaml +rollup.config.js +src +tsconfig.json +CHANGELOG.md \ No newline at end of file diff --git a/packages/parameters/README.md b/packages/parameters/README.md new file mode 100644 index 00000000..4dd10f84 --- /dev/null +++ b/packages/parameters/README.md @@ -0,0 +1,4 @@ +# @otterhttp/parameters + +Utilities pertaining to [HTTP parameters](https://datatracker.ietf.org/doc/html/rfc9110#name-parameters), such as their +parsing and formatting. diff --git a/packages/parameters/package.json b/packages/parameters/package.json new file mode 100644 index 00000000..0a9dd530 --- /dev/null +++ b/packages/parameters/package.json @@ -0,0 +1,28 @@ +{ + "name": "@otterhttp/parameters", + "description": "Utilities pertaining to HTTP parameters", + "version": "0.0.0", + "license": "LGPL-3.0-or-later", + "homepage": "https://otterhttp.lordfirespeed.dev", + "funding": { + "type": "individual", + "url": "https://github.com/otterjs/otterhttp?sponsor=1" + }, + "repository": { + "type": "git", + "url": "https://github.com/otterjs/otterhttp.git", + "directory": "packages/parameters" + }, + "engines": { + "node": ">=20.16.0" + }, + "type": "module", + "types": "./dist/index.d.ts", + "exports": "./dist/index.js", + "files": ["dist"], + "scripts": { + "build": "tsup", + "prepack": "pnpm build" + }, + "dependencies": {} +} diff --git a/packages/parameters/src/index.ts b/packages/parameters/src/index.ts new file mode 100644 index 00000000..db7c3ff4 --- /dev/null +++ b/packages/parameters/src/index.ts @@ -0,0 +1,210 @@ +/** + * RegExp for values that can be escaped to create a valid quoted-string + * + * ``` + * quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE + * ``` + */ +const TEXT_REGEXP = /^[\u0009\u0020-\u007e\u0080-\u00ff]+$/ + +/** + * RegExp to a single whitespace character + * + * ``` + * WS = SP / HTAB + * ``` + */ +const WHITESPACE_CHAR_REGEXP = /[\u0009\u0020]/ + +/** + * RegExp to match a single token character + * + * ``` + * tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" + * / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~" + * / DIGIT / ALPHA + * ; any VCHAR, except delimiters + * ``` + */ +const TOKEN_CHAR_REGEXP = /[!#$%&'*+.^_`|~0-9A-Za-z-]/ + +/** + * RegExp to match values entirely consisting of token characters. + * + * ``` + * token = 1*tchar + * ``` + */ +const TOKEN_REGEXP = /^[!#$%&'*+.^_`|~0-9A-Za-z-]+$/ + +/** + * RegExp to match qdtext chars in RFC 7231 sec 3.1.1.1 + * + * ``` + * qdtext = HTAB / SP / %x21 / %x23-5B / %x5D-7E / obs-text + * ``` + */ +const QUOTED_STRING_CONTENTS_REGEXP = /[\u0009\u0020\u0021\u0023-\u005b\u005d-\u007e\u0080-\u00ff]/ + +/** + * RegExp to match chars that can be quoted-pair in RFC 7230 sec 3.2.6 + * + * ``` + * quoted-pair = "\" ( HTAB / SP / VCHAR / obs-text ) + * obs-text = %x80-FF + * ``` + */ +const CAN_QUOTE_REGEXP = /[\u0009\u0020-\u00ff]/ + +/** + * RegExp to match characters that must be quoted to be valid quoted-string content. + */ +const MUST_QUOTE_REGEXP = /([\\"])/g + +export function qstring(val: string) { + // no need to quote tokens + if (TOKEN_REGEXP.test(val)) return val + + if (val.length > 0 && !TEXT_REGEXP.test(val)) throw new TypeError('invalid parameter value') + + return `"${val.replace(MUST_QUOTE_REGEXP, '\\$1')}"` +} + +export function formatParameters(parameters: Record): string { + return Object.entries(parameters) + .sort() + .map(([parameterName, parameterValue]) => `; ${parameterName}=${qstring(parameterValue)}`) + .join('') +} + +/** + * Parser for parameters as defined in [RFC 9110]{@link https://datatracker.ietf.org/doc/html/rfc9110#name-parameters} + * + * ``` + * parameters = *( OWS ";" OWS [ parameter ] ) + * parameter = token "=" ( token / quoted-string ) + * ``` + */ +export function parseParameters(value: string): Record { + let currentIndex = 0 + let validIfTerminate = false + let semicolonIndex: number | undefined + let parameterNameIndex: number | undefined + let equalsIndex: number | undefined + + let quotedParameterValue: string[] | undefined + let currentCharEscaped = false + + const parsedParameters: Record = {} + + function reset() { + semicolonIndex = undefined + parameterNameIndex = undefined + equalsIndex = undefined + + quotedParameterValue = undefined + currentCharEscaped = false + } + + function pop() { + if (parameterNameIndex == null) return + if (equalsIndex == null) throw new Error() + + const parameterName = value.slice(parameterNameIndex, equalsIndex).toLowerCase() + const parameterValue = + quotedParameterValue != null ? quotedParameterValue.join('') : value.slice(equalsIndex + 1, currentIndex) + parsedParameters[parameterName] = parameterValue + } + + for (; currentIndex < value.length; currentIndex++) { + const currentChar = value.charAt(currentIndex) + validIfTerminate = false + + // match whitespace until ";" + if (semicolonIndex == null) { + if (currentChar === ';') { + semicolonIndex = currentIndex + validIfTerminate = true + continue + } + if (!WHITESPACE_CHAR_REGEXP.test(currentChar)) throw new TypeError('invalid parameter format') + continue + } + + // match whitespace until token (parameter name) or semicolon + if (parameterNameIndex == null) { + if (TOKEN_CHAR_REGEXP.test(currentChar)) { + parameterNameIndex = currentIndex + continue + } + if (currentChar === ';') { + semicolonIndex = currentIndex + validIfTerminate = true + continue + } + if (!WHITESPACE_CHAR_REGEXP.test(currentChar)) throw new TypeError('invalid parameter format') + validIfTerminate = true + continue + } + + // match token until "=" + if (equalsIndex == null) { + if (currentChar === '=') { + equalsIndex = currentIndex + continue + } + if (!TOKEN_CHAR_REGEXP.test(currentChar)) throw new TypeError('invalid parameter format') + continue + } + + // initialize quotedParameterValue and consume the "\"" if the parameter value is quoted + if (equalsIndex === currentIndex - 1 && currentChar === '"') { + quotedParameterValue = [] + continue + } + + // match token until whitespace or semicolon + if (quotedParameterValue == null) { + if (currentChar === ';') { + pop() + reset() + semicolonIndex = currentIndex + validIfTerminate = true + continue + } + if (WHITESPACE_CHAR_REGEXP.test(currentChar)) { + pop() + reset() + continue + } + if (!TOKEN_CHAR_REGEXP.test(currentChar)) throw new TypeError('invalid parameter format') + validIfTerminate = true + continue + } + + // match quoted string contents until double quote + if (currentCharEscaped) { + if (!CAN_QUOTE_REGEXP.test(currentChar)) throw new TypeError('invalid parameter format') + currentCharEscaped = false + quotedParameterValue.push(currentChar) + continue + } + if (currentChar === '"') { + pop() + reset() + validIfTerminate = true + continue + } + if (currentChar === '\\') { + currentCharEscaped = true + continue + } + if (!QUOTED_STRING_CONTENTS_REGEXP.test(currentChar)) throw new TypeError('invalid parameter format') + quotedParameterValue.push(currentChar) + } + + if (!validIfTerminate) throw new TypeError('invalid parameter format') + + pop() + return parsedParameters +} diff --git a/packages/parameters/tsup.config.ts b/packages/parameters/tsup.config.ts new file mode 100644 index 00000000..c2f66478 --- /dev/null +++ b/packages/parameters/tsup.config.ts @@ -0,0 +1,3 @@ +import { build } from '../../config/build' + +export default build() From 233924aa225cce40305016b06494a6ff58b7202a Mon Sep 17 00:00:00 2001 From: Lordfirespeed <28568841+Lordfirespeed@users.noreply.github.com> Date: Thu, 15 Aug 2024 21:44:23 +0100 Subject: [PATCH 2/9] tooling: add testing path alias for `@otterhttp/parameters` --- vitest.config.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/vitest.config.ts b/vitest.config.ts index 9aaf888c..87d9ba08 100644 --- a/vitest.config.ts +++ b/vitest.config.ts @@ -29,6 +29,7 @@ export default defineConfig({ '@otterhttp/forwarded': relative('packages/forwarded/src'), '@otterhttp/ip-filter': relative('packages/ip-filter/src'), '@otterhttp/jsonp': relative('packages/jsonp/src'), + '@otterhttp/parameters': relative('packages/parameters/src'), '@otterhttp/proxy-address': relative('packages/proxy-address/src'), '@otterhttp/rate-limit': relative('packages/rate-limit/src'), '@otterhttp/request': relative('packages/request/src'), From f065f10249875d12fb8cb3008dacff6a709beeb7 Mon Sep 17 00:00:00 2001 From: Lordfirespeed <28568841+Lordfirespeed@users.noreply.github.com> Date: Fri, 16 Aug 2024 01:49:47 +0100 Subject: [PATCH 3/9] feat(parameters): add support for extended parameters (RFC 8187) --- packages/parameters/src/extended-fields.ts | 63 +++++++++++++++++++ packages/parameters/src/index.ts | 73 +++++++++++++++++----- 2 files changed, 121 insertions(+), 15 deletions(-) create mode 100644 packages/parameters/src/extended-fields.ts diff --git a/packages/parameters/src/extended-fields.ts b/packages/parameters/src/extended-fields.ts new file mode 100644 index 00000000..09efb515 --- /dev/null +++ b/packages/parameters/src/extended-fields.ts @@ -0,0 +1,63 @@ +const NON_LATIN1_REGEXP = /[^\x20-\x7e\xa0-\xff]/g + +const HEX_ESCAPE_REPLACE_REGEXP = /%([0-9A-Fa-f]{2})/g + +const EXT_VALUE_REGEXP = + /^([A-Za-z0-9!#$%&+\-^_`{}~]+)'(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}){0,3}|[A-Za-z]{4,8}|)'((?:%[0-9A-Fa-f]{2}|[A-Za-z0-9!#$&+.^_`|~-])+)$/ + +const ENCODE_URL_ATTR_CHAR_REGEXP = /[\x00-\x20"'()*,/:;<=>?@[\\\]{}\x7f]/g + +export function getLatin1Fallback(val: unknown) { + // simple Unicode -> ISO-8859-1 transformation + return String(val).replace(NON_LATIN1_REGEXP, '?') +} + +function percentDecode(_str: string, hex: string) { + return String.fromCharCode(Number.parseInt(hex, 16)) +} + +function percentEncode(char: string) { + return `%${String(char).charCodeAt(0).toString(16).toUpperCase()}` +} + +/** + * @see https://datatracker.ietf.org/doc/html/rfc8187 + */ +export function encodeUtf8ExtendedFieldValue(val: unknown): string { + const str = String(val) + + // percent encode as UTF-8 + ENCODE_URL_ATTR_CHAR_REGEXP.lastIndex = 0 + const encoded = encodeURIComponent(str).replace(ENCODE_URL_ATTR_CHAR_REGEXP, percentEncode) + + return `utf-8''${encoded}` +} + +/** + * @see https://datatracker.ietf.org/doc/html/rfc8187 + */ +export function decodeExtendedFieldValue(str: string) { + const match = EXT_VALUE_REGEXP.exec(str) + if (!match) throw new TypeError('invalid extended field value') + + const charset = match[1].toLowerCase() + const encoded = match[2] + let value: string + switch (charset) { + case 'iso-8859-1': + HEX_ESCAPE_REPLACE_REGEXP.lastIndex = 0 + value = getLatin1Fallback(encoded.replace(HEX_ESCAPE_REPLACE_REGEXP, percentDecode)) + break + case 'utf-8': + try { + value = decodeURIComponent(encoded) + } catch { + throw new TypeError('invalid encoded utf-8') + } + break + default: + throw new TypeError('unsupported charset in extended field') + } + + return value +} diff --git a/packages/parameters/src/index.ts b/packages/parameters/src/index.ts index db7c3ff4..70ef1099 100644 --- a/packages/parameters/src/index.ts +++ b/packages/parameters/src/index.ts @@ -1,3 +1,5 @@ +import { encodeUtf8ExtendedFieldValue, getLatin1Fallback } from './extended-fields' + /** * RegExp for values that can be escaped to create a valid quoted-string * @@ -61,20 +63,46 @@ const CAN_QUOTE_REGEXP = /[\u0009\u0020-\u00ff]/ */ const MUST_QUOTE_REGEXP = /([\\"])/g -export function qstring(val: string) { +export function encodeFieldValue(val: string) { // no need to quote tokens if (TOKEN_REGEXP.test(val)) return val if (val.length > 0 && !TEXT_REGEXP.test(val)) throw new TypeError('invalid parameter value') + MUST_QUOTE_REGEXP.lastIndex = 0 return `"${val.replace(MUST_QUOTE_REGEXP, '\\$1')}"` } -export function formatParameters(parameters: Record): string { - return Object.entries(parameters) - .sort() - .map(([parameterName, parameterValue]) => `; ${parameterName}=${qstring(parameterValue)}`) - .join('') +function formatParameter([parameterName, encodedParameterValue]: [string, string]): string { + return `; ${parameterName}=${encodedParameterValue}` +} + +export function formatParameters( + parameters: Record, + { addFallbacks = true }: { addFallbacks?: boolean } = {} +): string { + const expandedParameters: Map = new Map() + + for (const [parameterName, parameterValue] of Object.entries(parameters)) { + if (!parameterName.endsWith('*')) { + expandedParameters.set(parameterName, encodeFieldValue(parameterValue)) + continue + } + + expandedParameters.set(parameterName, encodeUtf8ExtendedFieldValue(parameterValue)) + if (!addFallbacks) continue + const fallbackParameterName = parameterName.slice(0, -1) + if (Object.prototype.hasOwnProperty.call(parameters, fallbackParameterName)) continue + const fallbackValue = getLatin1Fallback(parameterValue) + expandedParameters.set(fallbackParameterName, fallbackValue) + } + + return Array.from(expandedParameters.entries()).sort().map(formatParameter).join('') +} + +export function validateParameterNames(parameterNames: readonly string[]): void { + if (parameterNames.every((parameterName) => TOKEN_REGEXP.test(parameterName))) return + throw new TypeError('invalid parameter name') } /** @@ -93,6 +121,7 @@ export function parseParameters(value: string): Record { let equalsIndex: number | undefined let quotedParameterValue: string[] | undefined + let extendedParameterValue: string[] | undefined let currentCharEscaped = false const parsedParameters: Record = {} @@ -103,16 +132,24 @@ export function parseParameters(value: string): Record { equalsIndex = undefined quotedParameterValue = undefined + extendedParameterValue = undefined currentCharEscaped = false } + function getParameterValue() { + if (quotedParameterValue != null) return quotedParameterValue.join('') + if (extendedParameterValue != null) return extendedParameterValue.join('') + if (equalsIndex == null) throw new Error() + return value.slice(equalsIndex + 1, currentIndex) + } + function pop() { if (parameterNameIndex == null) return - if (equalsIndex == null) throw new Error() const parameterName = value.slice(parameterNameIndex, equalsIndex).toLowerCase() - const parameterValue = - quotedParameterValue != null ? quotedParameterValue.join('') : value.slice(equalsIndex + 1, currentIndex) + const parameterValue = getParameterValue() + if (Object.prototype.hasOwnProperty.call(parsedParameters, parameterName)) + throw new TypeError('duplicate parameter name') parsedParameters[parameterName] = parameterValue } @@ -151,18 +188,22 @@ export function parseParameters(value: string): Record { if (equalsIndex == null) { if (currentChar === '=') { equalsIndex = currentIndex + + const previousIsAsterisk = value.charAt(currentIndex - 1) === '*' + const nextIsDoubleQuote = value.charAt(currentIndex + 1) === '"' + + if (previousIsAsterisk && nextIsDoubleQuote) throw new TypeError('invalid extended parameter value') + if (nextIsDoubleQuote) { + quotedParameterValue = [] + currentIndex++ + } + continue } if (!TOKEN_CHAR_REGEXP.test(currentChar)) throw new TypeError('invalid parameter format') continue } - // initialize quotedParameterValue and consume the "\"" if the parameter value is quoted - if (equalsIndex === currentIndex - 1 && currentChar === '"') { - quotedParameterValue = [] - continue - } - // match token until whitespace or semicolon if (quotedParameterValue == null) { if (currentChar === ';') { @@ -208,3 +249,5 @@ export function parseParameters(value: string): Record { pop() return parsedParameters } + +export * from './extended-fields' From 722595472385bd9d4d678c093497bff32b651f8b Mon Sep 17 00:00:00 2001 From: Lordfirespeed <28568841+Lordfirespeed@users.noreply.github.com> Date: Fri, 16 Aug 2024 01:50:16 +0100 Subject: [PATCH 4/9] chore(deps): `content-disposition` and `content-type` depend on `parameters` --- packages/content-disposition/package.json | 4 +++- packages/content-type/package.json | 4 +++- pnpm-lock.yaml | 14 ++++++++++++-- 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/packages/content-disposition/package.json b/packages/content-disposition/package.json index deb1ec09..e5b012b5 100644 --- a/packages/content-disposition/package.json +++ b/packages/content-disposition/package.json @@ -24,5 +24,7 @@ "build": "tsup", "prepack": "pnpm build" }, - "dependencies": {} + "dependencies": { + "@otterhttp/parameters": "workspace:*" + } } diff --git a/packages/content-type/package.json b/packages/content-type/package.json index b2d1c901..11adbd7a 100644 --- a/packages/content-type/package.json +++ b/packages/content-type/package.json @@ -24,5 +24,7 @@ "build": "tsup", "prepack": "pnpm build" }, - "dependencies": {} + "dependencies": { + "@otterhttp/parameters": "workspace:*" + } } diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 8e4a7baa..d862e71d 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -94,9 +94,17 @@ importers: specifier: ^2.0.2 version: 2.0.2 - packages/content-disposition: {} + packages/content-disposition: + dependencies: + '@otterhttp/parameters': + specifier: workspace:* + version: link:../parameters - packages/content-type: {} + packages/content-type: + dependencies: + '@otterhttp/parameters': + specifier: workspace:* + version: link:../parameters packages/cookie: {} @@ -128,6 +136,8 @@ importers: specifier: workspace:* version: link:../app + packages/parameters: {} + packages/proxy-address: dependencies: '@otterhttp/forwarded': From 785c042e420562fa792c901c1e9644535b47bd27 Mon Sep 17 00:00:00 2001 From: Lordfirespeed <28568841+Lordfirespeed@users.noreply.github.com> Date: Fri, 16 Aug 2024 01:52:21 +0100 Subject: [PATCH 5/9] refactor(content-type): use `@otterhttp/parameters` to parse params --- packages/content-type/src/index.ts | 228 +++++++++++++++++------------ tests/modules/content-type.test.ts | 121 ++++++++------- 2 files changed, 204 insertions(+), 145 deletions(-) diff --git a/packages/content-type/src/index.ts b/packages/content-type/src/index.ts index 9cb9d883..696ff22c 100644 --- a/packages/content-type/src/index.ts +++ b/packages/content-type/src/index.ts @@ -1,4 +1,5 @@ import type { IncomingHttpHeaders, OutgoingHttpHeaders } from 'node:http' +import { formatParameters, parseParameters, validateParameterNames } from '@otterhttp/parameters' type Request = { headers: IncomingHttpHeaders } type Response = { getHeader: (name: HeaderName) => OutgoingHttpHeaders[HeaderName] } @@ -6,57 +7,46 @@ export type TypeParseableObject = Request | Response export type TypeParseable = string | TypeParseableObject /** - * RegExp to match *( ";" parameter ) in RFC 7231 sec 3.1.1.1 + * RegExp to a single whitespace character * - * parameter = token "=" ( token / quoted-string ) - * token = 1*tchar + * ``` + * WS = SP / HTAB + * ``` + */ +const WHITESPACE_CHAR_REGEXP = /[\u0009\u0020]/ + +/** + * RegExp to match a single token character + * + * ``` * tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" * / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~" * / DIGIT / ALPHA * ; any VCHAR, except delimiters - * quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE - * qdtext = HTAB / SP / %x21 / %x23-5B / %x5D-7E / obs-text - * obs-text = %x80-FF - * quoted-pair = "\" ( HTAB / SP / VCHAR / obs-text ) + * ``` */ -const PARAM_REGEXP = - /; *([!#$%&'*+.^_`|~0-9A-Za-z-]+) *= *("(?:[\u0009\u0020\u0021\u0023-\u005b\u005d-\u007e\u0080-\u00ff]|\\[\u0009\u0020-\u00ff])*"|[!#$%&'*+.^_`|~0-9A-Za-z-]+) */g -const TEXT_REGEXP = /^[\u0009\u0020-\u007e\u0080-\u00ff]+$/ -const TOKEN_REGEXP = /^[!#$%&'*+.^_`|~0-9A-Za-z-]+$/ +const TOKEN_CHAR_REGEXP = /[!#$%&'*+.^_`|~0-9A-Za-z-]/ /** - * RegExp to match quoted-pair in RFC 7230 sec 3.2.6 + * RegExp to match values entirely consisting of token characters. * - * quoted-pair = "\" ( HTAB / SP / VCHAR / obs-text ) - * obs-text = %x80-FF - */ -const QESC_REGEXP = /\\([\u0009\u0020-\u00ff])/g - -/** - * RegExp to match chars that must be quoted-pair in RFC 7230 sec 3.2.6 + * ``` + * token = 1*tchar + * ``` */ -const QUOTE_REGEXP = /([\\"])/g +const TOKEN_REGEXP = /^[!#$%&'*+.^_`|~0-9A-Za-z-]+$/ /** * RegExp to match type in RFC 7231 sec 3.1.1.1 * + * ``` * media-type = type "/" subtype * type = token * subtype = token + * ``` */ const TYPE_REGEXP = /^[!#$%&'*+.^_`|~0-9A-Za-z-]+\/[!#$%&'*+.^_`|~0-9A-Za-z-]+$/ -function qstring(val: unknown) { - const str = String(val) - - // no need to quote tokens - if (TOKEN_REGEXP.test(str)) return str - - if (str.length > 0 && !TEXT_REGEXP.test(str)) throw new TypeError('invalid parameter value') - - return `"${str.replace(QUOTE_REGEXP, '\\$1')}"` -} - function getContentType(obj: TypeParseableObject) { let header: number | string | string[] | undefined @@ -76,38 +66,89 @@ function getContentType(obj: TypeParseableObject) { } /** - * Class to represent a content type. + * Representation of a parsed MIME type. */ -class ContentType { - parameters: Record - type: string - constructor(type: string) { +export class ContentType { + /** + * The top-level media type into which the data type falls, such as `video` or `text`. + * e.g. in `application/json`, the type is `application`. + */ + readonly type: string + + /** + * The whole subtype, such as `manifest+json` or `plain`. + * e.g. in `text/conf+plain`, the subtype is `conf+plain`. + */ + readonly subtype: string + + /** + * The subtype suffix, such as `json` or `plain`. + * e.g. in `text/conf+plain`, the subtype suffix is `plain`. + */ + readonly subtypeSuffix: string + + /** + * Optional parameters added to provide additional details. + * For example, the `charset` parameter is often provided in HTTP contexts, e.g. + * `Content-Type: application/json; charset=utf-8` + */ + parameters: Record + + static parse(contentType: string): ContentType { + return parse(contentType) + } + + /** + * @internal + */ + static fromValidatedInput(type: string, subtype: string, subtypeSuffix: string) { + return new ContentType(type, subtype, subtypeSuffix) + } + + protected constructor(type: string, subtype: string, subtypeSuffix: string) { this.parameters = {} this.type = type + this.subtype = subtype + this.subtypeSuffix = subtypeSuffix + } + + toString() { + return `${this.type}/${this.subtype}${formatParameters(this.parameters)}` + } + + hasWildcard() { + return this.type.indexOf('*') !== -1 || this.subtype.indexOf('*') !== -1 + } + + isPlainText() { + return isPlainText(this) + } + + /** + * The whole media type excluding parameters, such as `application/json` or `text/plain`. + */ + get mediaType() { + return `${this.type}/${this.subtype}` } } /** * Format object to media type. */ -export function format(obj: { type: string; parameters?: Record }) { +export function format(obj: { type: string; subtype: string; parameters?: Record }) { if (!obj || typeof obj !== 'object') throw new TypeError('argument obj is required') - const { parameters, type } = obj + const { parameters, type, subtype } = obj - if (!type || !TYPE_REGEXP.test(type)) throw new TypeError('invalid type') + if (!type || !subtype) throw new TypeError('invalid type') - let string = type + let string = `${type}/${subtype}` + if (!TYPE_REGEXP.test(string)) throw new TypeError('invalid type') // append parameters if (parameters && typeof parameters === 'object') { - const params = Object.keys(parameters).sort() - - for (const param of params) { - if (!TOKEN_REGEXP.test(param)) throw new TypeError('invalid parameter name') - - string += `; ${param}=${qstring(parameters[param])}` - } + validateParameterNames(Object.keys(parameters)) + string += formatParameters(parameters) } return string @@ -117,47 +158,56 @@ export function format(obj: { type: string; parameters?: Record * Parse media type to object. */ export function parse(value: TypeParseable): ContentType { - if (!value) throw new TypeError('argument string is required') + if (!value) throw new TypeError('argument `value` is required') // support req/res-like objects as argument - const header = typeof value === 'object' ? getContentType(value) : value - - if (typeof header !== 'string') throw new TypeError('argument string is required to be a string') - - let index = header.indexOf(';') - const type = index !== -1 ? header.slice(0, index).trim() : header.trim() - - if (!TYPE_REGEXP.test(type)) throw new TypeError('invalid media type') - - const obj = new ContentType(type.toLowerCase()) - - // parse parameters - if (index !== -1) { - let key: string - let match: RegExpExecArray | null - let value: string - - PARAM_REGEXP.lastIndex = index - - while ((match = PARAM_REGEXP.exec(header))) { - if (match.index !== index) throw new TypeError('invalid parameter format') - - index += match[0].length - key = match[1].toLowerCase() - value = match[2] - - if (value[0] === '"') { - // remove quotes and escapes - value = value.slice(1, value.length - 1).replace(QESC_REGEXP, '$1') - } - - obj.parameters[key] = value + let header = typeof value === 'object' ? getContentType(value) : value + + if (typeof header !== 'string') throw new TypeError('argument `value` must be string, request-like or response-like') + header = header.trim() + + let currentIndex = 0 + let slashIndex: number | undefined + for (; currentIndex < header.length; ++currentIndex) { + const currentChar = header.charAt(currentIndex) + if (currentChar === '/') { + slashIndex = currentIndex + break } + if (!TOKEN_CHAR_REGEXP.test(currentChar)) throw new TypeError('invalid media type') + } - if (index !== header.length) throw new TypeError('invalid parameter format') + if (typeof slashIndex === 'undefined') throw new TypeError('invalid media type') + if (slashIndex === 0) throw new TypeError('invalid media type') + + currentIndex += 1 + let plusIndex: number | undefined + let endIndex: number | undefined + for (; currentIndex < header.length; ++currentIndex) { + const currentChar = header.charAt(currentIndex) + if (currentChar === ';' || WHITESPACE_CHAR_REGEXP.test(currentChar)) { + if (currentIndex === slashIndex + 1) throw new TypeError('invalid media type') + endIndex = currentIndex + break + } + if (currentChar === '+') { + if (currentIndex === slashIndex + 1) throw new TypeError('invalid media type') + plusIndex = currentIndex + continue + } + if (!TOKEN_CHAR_REGEXP.test(currentChar)) throw new TypeError('invalid media type') } - return obj + const lowercaseHeader = header.toLowerCase() + const type = lowercaseHeader.slice(0, slashIndex) + const subtype = lowercaseHeader.slice(slashIndex + 1, endIndex) + const subtypeSuffix = plusIndex == null ? subtype : lowercaseHeader.slice(plusIndex + 1, endIndex) + + const parsedRepresentation = ContentType.fromValidatedInput(type, subtype, subtypeSuffix) + if (endIndex === undefined) return parsedRepresentation + + parsedRepresentation.parameters = parseParameters(header.slice(endIndex)) + return parsedRepresentation } /** @@ -173,14 +223,8 @@ const applicationPlaintextWhitelist = new Set([ 'node' ]) -export function isPlainText({ type }: ContentType) { - if (type.startsWith('text/')) return true - if (!type.startsWith('application/')) return false - let index = 12 - let start = index - for (; index < type.length; ++index) { - if (type.charAt(index) === '+') start = index + 1 - } - const subtype = type.slice(start) - return applicationPlaintextWhitelist.has(subtype) +export function isPlainText({ type, subtypeSuffix }: ContentType) { + if (type === 'text') return true + if (type !== 'application') return false + return applicationPlaintextWhitelist.has(subtypeSuffix) } diff --git a/tests/modules/content-type.test.ts b/tests/modules/content-type.test.ts index 71e73584..4c787743 100644 --- a/tests/modules/content-type.test.ts +++ b/tests/modules/content-type.test.ts @@ -3,24 +3,21 @@ import { describe, expect, it } from 'vitest' import * as contentType from '@/packages/content-type/src' import { isPlainText, parse } from '@/packages/content-type/src' -it('should help me', () => { - const parsed = parse('application/manifest+json') -}) - describe('format', () => { it('should format basic type', () => { - const str = contentType.format({ type: 'text/html' }) + const str = contentType.format({ type: 'text', subtype: 'html' }) expect(str).toEqual('text/html') }) it('should format type with suffix', () => { - const str = contentType.format({ type: 'image/svg+xml' }) + const str = contentType.format({ type: 'image', subtype: 'svg+xml' }) expect(str).toBe('image/svg+xml') }) it('should format type with parameter', () => { const str = contentType.format({ - type: 'text/html', + type: 'text', + subtype: 'html', parameters: { charset: 'utf-8' } }) expect(str).toBe('text/html; charset=utf-8') @@ -28,7 +25,8 @@ describe('format', () => { it('should format type with parameter that needs quotes', () => { const str = contentType.format({ - type: 'text/html', + type: 'text', + subtype: 'html', parameters: { foo: 'bar or "baz"' } }) expect(str).toBe('text/html; foo="bar or \\"baz\\""') @@ -36,7 +34,8 @@ describe('format', () => { it('should format type with parameter with empty value', () => { const str = contentType.format({ - type: 'text/html', + type: 'text', + subtype: 'html', parameters: { foo: '' } }) expect(str).toBe('text/html; foo=""') @@ -44,7 +43,8 @@ describe('format', () => { it('should format type with multiple parameters', () => { const str = contentType.format({ - type: 'text/html', + type: 'text', + subtype: 'html', parameters: { charset: 'utf-8', foo: 'bar', bar: 'baz' } }) expect(str).toBe('text/html; bar=baz; charset=utf-8; foo=bar') @@ -70,25 +70,25 @@ describe('format', () => { it('should reject invalid type', () => { expect(() => { - contentType.format({ type: 'text/' } as any) + contentType.format({ type: 'text/', subtype: '' }) }).toThrow(/invalid type/) }) it('should reject invalid type with LWS', () => { expect(() => { - contentType.format({ type: ' text/html' } as any) + contentType.format({ type: ' text', subtype: 'html' }) }).toThrow(/invalid type/) }) it('should reject invalid parameter name', () => { expect(() => { - contentType.format({ type: 'image/svg', parameters: { 'foo/': 'bar' } }) + contentType.format({ type: 'image', subtype: 'svg', parameters: { 'foo/': 'bar' } }) }).toThrow(/invalid parameter name/) }) it('should reject invalid parameter value', () => { expect(() => { - contentType.format({ type: 'image/svg', parameters: { foo: 'bar\u0000' } }) + contentType.format({ type: 'image', subtype: 'svg', parameters: { foo: 'bar\u0000' } }) }).toThrow(/invalid parameter value/) }) }) @@ -97,78 +97,90 @@ describe('parse', () => { describe('contentType.parse(string)', () => { it('should parse basic type', () => { const type = contentType.parse('text/html') - expect(type.type).toBe('text/html') + expect(type).toMatchObject({ type: 'text', subtype: 'html' }) }) it('should parse with suffix', () => { const type = contentType.parse('image/svg+xml') - expect(type.type).toBe('image/svg+xml') + expect(type).toMatchObject({ type: 'image', subtype: 'svg+xml' }) }) it('should parse basic type with surrounding OWS', () => { const type = contentType.parse(' text/html ') - expect(type.type).toBe('text/html') + expect(type).toMatchObject({ type: 'text', subtype: 'html' }) }) it('should parse parameters', () => { const type = contentType.parse('text/html; charset=utf-8; foo=bar') - expect(type.type).toBe('text/html') - - expect(type.parameters).toEqual({ - charset: 'utf-8', - foo: 'bar' + expect(type).toMatchObject({ + type: 'text', + subtype: 'html', + parameters: { + charset: 'utf-8', + foo: 'bar' + } }) }) it('should parse parameters with extra LWS', () => { const type = contentType.parse('text/html ; charset=utf-8 ; foo=bar') - expect(type.type).toBe('text/html') - - expect(type.parameters).toEqual({ - charset: 'utf-8', - foo: 'bar' + expect(type).toMatchObject({ + type: 'text', + subtype: 'html', + parameters: { + charset: 'utf-8', + foo: 'bar' + } }) }) it('should lower-case type', () => { const type = contentType.parse('IMAGE/SVG+XML') - expect(type.type).toBe('image/svg+xml') + expect(type).toMatchObject({ type: 'image', subtype: 'svg+xml' }) }) it('should lower-case parameter names', () => { const type = contentType.parse('text/html; Charset=UTF-8') - expect(type.type).toBe('text/html') - - expect(type.parameters).toEqual({ - charset: 'UTF-8' + expect(type).toMatchObject({ + type: 'text', + subtype: 'html', + parameters: { + charset: 'UTF-8' + } }) }) it('should unquote parameter values', () => { const type = contentType.parse('text/html; charset="UTF-8"') - expect(type.type).toBe('text/html') - - expect(type.parameters).toEqual({ - charset: 'UTF-8' + expect(type).toMatchObject({ + type: 'text', + subtype: 'html', + parameters: { + charset: 'UTF-8' + } }) }) it('should unquote parameter values with escapes', () => { - const type = contentType.parse('text/html; charset = "UT\\F-\\\\\\"8\\""') - expect(type.type).toBe('text/html') - - expect(type.parameters).toEqual({ - charset: 'UTF-\\"8"' + const type = contentType.parse('text/html; charset="UT\\F-\\\\\\"8\\""') + expect(type).toMatchObject({ + type: 'text', + subtype: 'html', + parameters: { + charset: 'UTF-\\"8"' + } }) }) it('should handle balanced quotes', () => { const type = contentType.parse('text/html; param="charset=\\"utf-8\\"; foo=bar"; bar=foo') - expect(type.type).toBe('text/html') - - expect(type.parameters).toEqual({ - param: 'charset="utf-8"; foo=bar', - bar: 'foo' + expect(type).toMatchObject({ + type: 'text', + subtype: 'html', + parameters: { + param: 'charset="utf-8"; foo=bar', + bar: 'foo' + } }) }) @@ -183,7 +195,8 @@ describe('parse', () => { 'text/p£ain', 'text/(plain)', 'text/@plain', - 'text/plain,wrong' + 'text/plain,wrong', + 'text/+plain' ] describe.each(invalidTypes)("'invalid media type '%s'", (type: string) => { @@ -197,7 +210,9 @@ describe('parse', () => { const incorrectlyFormattedTypes = [ 'text/plain; foo="bar', 'text/plain; profile=http://localhost; foo=bar', - 'text/plain; profile=http://localhost' + 'text/plain; profile=http://localhost', + 'text/plain; charset =utf-8', + 'text/plain; charset= utf-8' ] it.each(incorrectlyFormattedTypes)("should throw on invalid parameter format '%s'", (type: string) => { @@ -209,13 +224,13 @@ describe('parse', () => { it('should require argument', () => { expect(() => { contentType.parse(undefined as any) - }).toThrow(/string.*required/) + }).toThrow(/argument.*is required/) }) it('should reject non-strings', () => { expect(() => { contentType.parse(7 as any) - }).toThrow(/string.*required/) + }).toThrow(/argument.*must be string/) }) }) @@ -223,7 +238,7 @@ describe('parse', () => { it('should parse content-type header', () => { const req = { headers: { 'content-type': 'text/html' } } const type = contentType.parse(req) - expect(type.type).toBe('text/html') + expect(type).toMatchObject({ type: 'text', subtype: 'html' }) }) it('should reject objects without either `headers` or `getHeaders` property', () => { @@ -244,7 +259,7 @@ describe('parse', () => { headers: { 'content-type': 'text/html' } } const type = contentType.parse(res) - expect(type.type).toBe('text/html') + expect(type).toMatchObject({ type: 'text', subtype: 'html' }) }) }) @@ -262,7 +277,7 @@ describe('parse', () => { } } const type = contentType.parse(res) - expect(type.type).toBe('text/html') + expect(type).toMatchObject({ type: 'text', subtype: 'html' }) }) }) }) From 3dd5ff1f009e04672e635b7bb47ff5c4885d5e82 Mon Sep 17 00:00:00 2001 From: Lordfirespeed <28568841+Lordfirespeed@users.noreply.github.com> Date: Fri, 16 Aug 2024 01:52:46 +0100 Subject: [PATCH 6/9] refactor(content-disposition): use `@otterhttp/parameters` to parse params --- packages/content-disposition/src/index.ts | 182 +++++----------------- tests/modules/content-disposition.test.ts | 63 ++++---- 2 files changed, 72 insertions(+), 173 deletions(-) diff --git a/packages/content-disposition/src/index.ts b/packages/content-disposition/src/index.ts index 03406bd0..325dcc22 100644 --- a/packages/content-disposition/src/index.ts +++ b/packages/content-disposition/src/index.ts @@ -1,51 +1,26 @@ -const ENCODE_URL_ATTR_CHAR_REGEXP = /[\x00-\x20"'()*,/:;<=>?@[\\\]{}\x7f]/g - -const HEX_ESCAPE_REGEXP = /%[0-9A-Fa-f]{2}/ -const HEX_ESCAPE_REPLACE_REGEXP = /%([0-9A-Fa-f]{2})/g +import { + decodeExtendedFieldValue, + formatParameters, + parseParameters, + validateParameterNames +} from '@otterhttp/parameters' const NON_LATIN1_REGEXP = /[^\x20-\x7e\xa0-\xff]/g -const QESC_REGEXP = /\\([\u0000-\u007f])/g - -const QUOTE_REGEXP = /([\\"])/g - -const PARAM_REGEXP = - /;[\x09\x20]*([!#$%&'*+.0-9A-Z^_`a-z|~-]+)[\x09\x20]*=[\x09\x20]*("(?:[\x20!\x23-\x5b\x5d-\x7e\x80-\xff]|\\[\x20-\x7e])*"|[!#$%&'*+.0-9A-Z^_`a-z|~-]+)[\x09\x20]*/g const TEXT_REGEXP = /^[\x20-\x7e\x80-\xff]+$/ const TOKEN_REGEXP = /^[!#$%&'*+.0-9A-Z^_`a-z|~-]+$/ -const EXT_VALUE_REGEXP = - /^([A-Za-z0-9!#$%&+\-^_`{}~]+)'(?:[A-Za-z]{2,3}(?:-[A-Za-z]{3}){0,3}|[A-Za-z]{4,8}|)'((?:%[0-9A-Fa-f]{2}|[A-Za-z0-9!#$&+.^_`|~-])+)$/ - -const DISPOSITION_TYPE_REGEXP = /^([!#$%&'*+.0-9A-Z^_`a-z|~-]+)[\x09\x20]*(?:$|;)/ - -const getlatin1 = (val: unknown) => { - // simple Unicode -> ISO-8859-1 transformation - return String(val).replace(NON_LATIN1_REGEXP, '?') -} +const DISPOSITION_TYPE_REGEXP = /^([!#$%&'*+.0-9A-Z^_`a-z|~-]+)(?:$|[\x09\x20]*;)/ export class ContentDisposition { type: string - parameters: Record - constructor(type: string, parameters: Record) { + parameters: Record + constructor(type: string, parameters: Record) { this.type = type this.parameters = parameters } } -const qstring = (val: unknown) => `"${String(val).replace(QUOTE_REGEXP, '\\$1')}"` - -const pencode = (char: string) => `%${String(char).charCodeAt(0).toString(16).toUpperCase()}` - -function ustring(val: unknown): string { - const str = String(val) - - // percent encode as UTF-8 - const encoded = encodeURIComponent(str).replace(ENCODE_URL_ATTR_CHAR_REGEXP, pencode) - - return `UTF-8''${encoded}` -} - const basename = (str: string) => str.slice(str.lastIndexOf('/') + 1) /** @@ -54,37 +29,27 @@ const basename = (str: string) => str.slice(str.lastIndexOf('/') + 1) export function format({ parameters, type -}: Partial<{ - parameters: Record +}: { + parameters?: Record type: string | boolean | undefined -}>) { +}) { if (type == null || typeof type !== 'string' || !TOKEN_REGEXP.test(type)) { throw new TypeError('invalid type') } - // start with normalized type let string = String(type).toLowerCase() // append parameters if (parameters && typeof parameters === 'object') { - const params = Object.keys(parameters).sort() - - for (const param of params) { - const val = param.slice(-1) === '*' ? ustring(parameters[param]) : qstring(parameters[param]) - - string += `; ${param}=${val}` - } + validateParameterNames(Object.keys(parameters)) + string += formatParameters(parameters) } return string } -function createParams(filename?: string, fallback?: string | boolean): Record { +function createParams(filename?: string, fallback?: string): Record { if (filename == null) return {} - const params: Record = {} - - // fallback defaults to true - if (fallback == null) fallback = true if (typeof fallback === 'string' && NON_LATIN1_REGEXP.test(fallback)) { throw new TypeError('fallback must be ISO-8859-1 string') } @@ -92,28 +57,26 @@ function createParams(filename?: string, fallback?: string | boolean): Record String.fromCharCode(Number.parseInt(hex, 16)) - /** * Create an attachment Content-Disposition header. * @@ -123,38 +86,13 @@ const pdecode = (_str: string, hex: string) => String.fromCharCode(Number.parseI export function contentDisposition( filename?: string, - options: Partial<{ - type: string - fallback: string | boolean - }> = {} + options?: { + type?: string + fallback?: string + } ): string { // format into string - return format(new ContentDisposition(options.type || 'attachment', createParams(filename, options.fallback))) -} - -function decodefield(str: string) { - const match = EXT_VALUE_REGEXP.exec(str) - if (!match) throw new TypeError('invalid extended field value') - - const charset = match[1].toLowerCase() - const encoded = match[2] - let value: string - switch (charset) { - case 'iso-8859-1': - value = getlatin1(encoded.replace(HEX_ESCAPE_REPLACE_REGEXP, pdecode)) - break - case 'utf-8': - try { - value = decodeURIComponent(encoded) - } catch { - throw new TypeError('invalid encoded utf-8') - } - break - default: - throw new TypeError('unsupported charset in extended field') - } - - return value + return format(new ContentDisposition(options?.type ?? 'attachment', createParams(filename, options?.fallback))) } /** @@ -162,58 +100,22 @@ function decodefield(str: string) { * @param header string */ export function parse(header: string): ContentDisposition { - let match = DISPOSITION_TYPE_REGEXP.exec(header) + const match = DISPOSITION_TYPE_REGEXP.exec(header) if (!match) throw new TypeError('invalid type format') // normalize type - let index = match[0].length + const index = match[0].length const type = match[1].toLowerCase() - let key: string - const names: string[] = [] - const params: Record = {} - let value: string | string[] - // calculate index to start at - index = PARAM_REGEXP.lastIndex = match[0].slice(-1) === ';' ? index - 1 : index - - // match parameters - while ((match = PARAM_REGEXP.exec(header))) { - if (match.index !== index) throw new TypeError('invalid parameter format') - - index += match[0].length - key = match[1].toLowerCase() - value = match[2] - - if (names.indexOf(key) !== -1) { - throw new TypeError('invalid duplicate parameter') - } - - names.push(key) - - if (key.indexOf('*') + 1 === key.length) { - // decode extended value - key = key.slice(0, -1) - value = decodefield(value) - - // overwrite existing value - params[key] = value - continue - } - - if (typeof params[key] === 'string') continue - - if (value[0] === '"') { - value = value.slice(1, value.length - 1).replace(QESC_REGEXP, '$1') - } - - params[key] = value - } + if (!match[0].endsWith(';')) return new ContentDisposition(type, {}) - if (index !== -1 && index !== header.length) { - throw new TypeError('invalid parameter format') + const parameters = parseParameters(header.slice(index - 1)) + for (const [parameterName, parameterValue] of Object.entries(parameters)) { + if (!parameterName.endsWith('*')) continue + parameters[parameterName] = decodeExtendedFieldValue(parameterValue) } - return new ContentDisposition(type, params) + return new ContentDisposition(type, parameters) } diff --git a/tests/modules/content-disposition.test.ts b/tests/modules/content-disposition.test.ts index 3c9790f5..a72008f3 100644 --- a/tests/modules/content-disposition.test.ts +++ b/tests/modules/content-disposition.test.ts @@ -10,11 +10,11 @@ describe('contentDisposition()', () => { describe('contentDisposition(filename)', () => { it('should create a header with file name', () => { - expect(contentDisposition('plans.pdf')).toBe('attachment; filename="plans.pdf"') + expect(contentDisposition('plans.pdf')).toBe('attachment; filename=plans.pdf') }) it('should use the basename of the string', () => { - expect(contentDisposition('/path/to/plans.pdf')).toBe('attachment; filename="plans.pdf"') + expect(contentDisposition('/path/to/plans.pdf')).toBe('attachment; filename=plans.pdf') }) it('should throw an error when non latin fallback is used', () => { expect(() => { @@ -23,17 +23,17 @@ describe('contentDisposition(filename)', () => { }) it('should use hasfallback', () => { expect(contentDisposition('index.ht', { type: 'html', fallback: 'html' })).toEqual( - `html; filename="html"; filename*=UTF-8''index.ht` + `html; filename*=utf-8''index.ht; filename=html` ) }) it('should use pencode fn', () => { expect(contentDisposition('inde(x.ht', { type: 'html', fallback: 'html' })).toEqual( - `html; filename="html"; filename*=UTF-8\'\'inde%28x.ht` + `html; filename*=utf-8\'\'inde%28x.ht; filename=html` ) }) it('should use fallback when file ext is non ascii', () => { expect(contentDisposition('index.ĄÇḐȨĢ', { type: 'html', fallback: 'html' })).toEqual( - `html; filename="html"; filename*=UTF-8\'\'index.%C4%84%C3%87%E1%B8%90%C8%A8%C4%A2` + `html; filename*=utf-8\'\'index.%C4%84%C3%87%E1%B8%90%C8%A8%C4%A2; filename=html` ) }) it('should throw an error when non string options.type is used', () => { @@ -43,7 +43,7 @@ describe('contentDisposition(filename)', () => { }) describe('when "filename" is US-ASCII', () => { it('should only include filename parameter', () => { - expect(contentDisposition('plans.pdf')).toBe('attachment; filename="plans.pdf"') + expect(contentDisposition('plans.pdf')).toBe('attachment; filename=plans.pdf') }) it('should escape quotes', () => { @@ -59,10 +59,10 @@ describe('parse(string)', () => { parse('"attachment"') }).toThrow('invalid type format') }) - it('should throw on trailing semi', () => { + it('should not throw on trailing semi', () => { expect(() => { parse('attachment;') - }).toThrow('invalid parameter format') + }).not.toThrow() }) it('should parse "attachment"', () => { expect(parse('attachment')).toStrictEqual(new ContentDisposition('attachment', {})) @@ -73,18 +73,20 @@ describe('parse(string)', () => { it('should parse "form-data"', () => { expect(parse('form-data')).toStrictEqual(new ContentDisposition('form-data', {})) }) - it('should parse with trailing LWS', () => { - expect(parse('attachment \t ')).toStrictEqual(new ContentDisposition('attachment', {})) + it('should throw with trailing LWS', () => { + expect(() => { + parse('attachment \t ') + }).toThrow() }) it('should normalize to lower-case', () => { expect(parse('ATTACHMENT')).toStrictEqual(new ContentDisposition('attachment', {})) }) }) describe('with parameters', () => { - it('should throw on trailing semi', () => { + it('should not throw on trailing semi', () => { expect(() => { parse('attachment; filename="rates.pdf";') - }).toThrow('invalid parameter format') + }).not.toThrow() }) it('should throw on invalid param name', () => { expect(() => { @@ -112,7 +114,7 @@ describe('parse(string)', () => { it('should reject duplicate parameters', () => { expect(() => { parse('attachment; filename=foo; filename=bar') - }).toThrow(/invalid duplicate parameter/) + }).toThrow(/duplicate parameter/) }) it.each(['filename="plans.pdf"', '; filename="plans.pdf"'])('should reject missing type', (value: string) => { @@ -188,25 +190,25 @@ describe('parse(string)', () => { it('should parse UTF-8 extended parameter value', () => { expect(parse("attachment; filename*=UTF-8''%E2%82%AC%20rates.pdf")).toEqual({ type: 'attachment', - parameters: { filename: '€ rates.pdf' } + parameters: { 'filename*': '€ rates.pdf' } }) }) it('should parse ISO-8859-1 extended parameter value', () => { expect(parse("attachment; filename*=ISO-8859-1''%A3%20rates.pdf")).toEqual({ type: 'attachment', - parameters: { filename: '£ rates.pdf' } + parameters: { 'filename*': '£ rates.pdf' } }) expect(parse("attachment; filename*=ISO-8859-1''%82%20rates.pdf")).toEqual({ type: 'attachment', - parameters: { filename: '? rates.pdf' } + parameters: { 'filename*': '? rates.pdf' } }) }) it('should not be case-sensitive for charser', () => { expect(parse("attachment; filename*=utf-8''%E2%82%AC%20rates.pdf")).toEqual({ type: 'attachment', - parameters: { filename: '€ rates.pdf' } + parameters: { 'filename*': '€ rates.pdf' } }) }) @@ -219,18 +221,18 @@ describe('parse(string)', () => { it('should parse with embedded language', () => { expect(parse("attachment; filename*=UTF-8'en'%E2%82%AC%20rates.pdf")).toEqual({ type: 'attachment', - parameters: { filename: '€ rates.pdf' } + parameters: { 'filename*': '€ rates.pdf' } }) }) - it('should prefer extended parameter value', () => { + it('should keep both extended and fallback parameter value', () => { expect(parse('attachment; filename="EURO rates.pdf"; filename*=UTF-8\'\'%E2%82%AC%20rates.pdf')).toEqual({ type: 'attachment', - parameters: { filename: '€ rates.pdf' } + parameters: { 'filename*': '€ rates.pdf', filename: 'EURO rates.pdf' } }) expect(parse('attachment; filename*=UTF-8\'\'%E2%82%AC%20rates.pdf; filename="EURO rates.pdf"')).toEqual({ type: 'attachment', - parameters: { filename: '€ rates.pdf' } + parameters: { 'filename*': '€ rates.pdf', filename: 'EURO rates.pdf' } }) }) }) @@ -367,16 +369,16 @@ describe('parse(string)', () => { }).toThrow(/invalid parameter format/) }) - it('should reject "attachment; filename=foo.html ;"', () => { + it('should accept "attachment; filename=foo.html ;"', () => { expect(() => { parse('attachment; filename=foo.html ;') - }).toThrow(/invalid parameter format/) + }).not.toThrow() }) - it('should reject "attachment; ;filename=foo"', () => { + it('should accept "attachment; ;filename=foo"', () => { expect(() => { parse('attachment; ;filename=foo') - }).toThrow(/invalid parameter format/) + }).not.toThrow() }) it('should reject "attachment; filename=foo bar.html"', () => { @@ -448,17 +450,12 @@ describe('parse(string)', () => { }) }) - it('should parse "attachment; filename ="foo.html""', () => { - expect(parse('attachment; filename ="foo.html"')).toEqual({ - type: 'attachment', - parameters: { filename: 'foo.html' } - }) + it('should reject "attachment; filename ="foo.html""', () => { + expect(() => parse('attachment; filename ="foo.html"')).toThrow(/invalid parameter format/) }) it('should reject "attachment; filename="foo.html"; filename="bar.html"', () => { - expect(parse.bind(null, 'attachment; filename="foo.html"; filename="bar.html"')).toThrow( - /invalid duplicate parameter/ - ) + expect(parse.bind(null, 'attachment; filename="foo.html"; filename="bar.html"')).toThrow(/duplicate parameter/) }) it('should reject "attachment; filename=foo[1](2).html"', () => { From 7ed7f476534889667f465152dce808b7e5b77996 Mon Sep 17 00:00:00 2001 From: Lordfirespeed <28568841+Lordfirespeed@users.noreply.github.com> Date: Fri, 16 Aug 2024 01:53:20 +0100 Subject: [PATCH 7/9] test(res): adjust tests for behavioural changes --- tests/modules/res.test.ts | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/modules/res.test.ts b/tests/modules/res.test.ts index 1e6842c0..b8b33bcf 100644 --- a/tests/modules/res.test.ts +++ b/tests/modules/res.test.ts @@ -220,7 +220,7 @@ describe('Response extensions', () => { res.attachment(path.resolve(import.meta.dirname, '..', 'fixtures', 'favicon.ico')).end() }) - await makeFetch(app)('/').expect('Content-Disposition', 'attachment; filename="favicon.ico"') + await makeFetch(app)('/').expect('Content-Disposition', 'attachment; filename=favicon.ico') }) }) describe('res.download(filename)', () => { @@ -229,14 +229,14 @@ describe('Response extensions', () => { await res.download(path.resolve(import.meta.dirname, '..', 'fixtures', 'favicon.ico')) }) - await makeFetch(app)('/').expect('Content-Disposition', 'attachment; filename="favicon.ico"') + await makeFetch(app)('/').expect('Content-Disposition', 'attachment; filename=favicon.ico') }) it('should set Content-Disposition based on filename', async () => { const app = runServer(async (_req, res) => { await res.download(path.resolve(import.meta.dirname, '..', 'fixtures', 'favicon.ico'), 'favicon.icon') }) - await makeFetch(app)('/').expect('Content-Disposition', 'attachment; filename="favicon.icon"') + await makeFetch(app)('/').expect('Content-Disposition', 'attachment; filename=favicon.icon') }) it('should raise errors without closing response socket', async () => { const app = runServer(async (_req, res) => { @@ -245,7 +245,7 @@ describe('Response extensions', () => { ).rejects.toThrow(/EISDIR/) }) - await makeFetch(app)('/').expect('Content-Disposition', 'attachment; filename="some_file.png"') + await makeFetch(app)('/').expect('Content-Disposition', 'attachment; filename=some_file.png') }) it('should set "root" from options', async () => { const app = runServer(async (_req, res) => { @@ -254,7 +254,7 @@ describe('Response extensions', () => { }) }) - await makeFetch(app)('/').expect('Content-Disposition', 'attachment; filename="favicon.ico"') + await makeFetch(app)('/').expect('Content-Disposition', 'attachment; filename=favicon.ico') }) it(`'should pass options to sendFile's ReadStream'`, async () => { const app = runServer(async (_req, res) => { @@ -263,7 +263,7 @@ describe('Response extensions', () => { }) }) - await makeFetch(app)('/').expect('Content-Disposition', 'attachment; filename="favicon.ico"') + await makeFetch(app)('/').expect('Content-Disposition', 'attachment; filename=favicon.ico') }) it('should set headers from options', async () => { const app = runServer(async (_req, res) => { @@ -275,7 +275,7 @@ describe('Response extensions', () => { }) await makeFetch(app)('/') - .expect('Content-Disposition', 'attachment; filename="favicon.ico"') + .expect('Content-Disposition', 'attachment; filename=favicon.ico') .expect('X-Custom-Header', 'Value') }) }) From 4fea22650f20bbbf60417e27250f1568f519014c Mon Sep 17 00:00:00 2001 From: Lordfirespeed <28568841+Lordfirespeed@users.noreply.github.com> Date: Fri, 16 Aug 2024 01:54:20 +0100 Subject: [PATCH 8/9] refactor(type-is): use `ContentType` to do heavy lifting of parsing/matching --- packages/request/src/util/request-type-is.ts | 4 +- packages/type-is/src/index.ts | 80 +++++++++----------- tests/modules/type-is.test.ts | 32 ++++---- 3 files changed, 58 insertions(+), 58 deletions(-) diff --git a/packages/request/src/util/request-type-is.ts b/packages/request/src/util/request-type-is.ts index 4eac876b..995c3141 100644 --- a/packages/request/src/util/request-type-is.ts +++ b/packages/request/src/util/request-type-is.ts @@ -1,6 +1,6 @@ -import { typeIs } from '@otterhttp/type-is' +import { type ContentType, typeIs } from '@otterhttp/type-is' import type { HasHeaders } from '../types' -export function requestTypeIs(req: HasHeaders, types: readonly string[]) { +export function requestTypeIs(req: HasHeaders, types: readonly (string | ContentType)[]) { return Boolean(typeIs(req.headers['content-type'], types)) } diff --git a/packages/type-is/src/index.ts b/packages/type-is/src/index.ts index c53aa98e..6e97d813 100644 --- a/packages/type-is/src/index.ts +++ b/packages/type-is/src/index.ts @@ -1,66 +1,50 @@ -import { type TypeParseable, format as formatType, parse as parseType } from '@otterhttp/content-type' +import { ContentType, type TypeParseable, parse as parseType } from '@otterhttp/content-type' import mime from 'mime' -function normalizeType(value: TypeParseable) { - // parse the type - const type = parseType(value) - type.parameters = {} - // reformat it - return formatType(type) -} - -function tryNormalizeType(value: TypeParseable | undefined) { +function tryParseType(value: TypeParseable | undefined) { if (!value) return null try { - return normalizeType(value) + return parseType(value) } catch (err) { return null } } -function mimeMatch(expected: string | null, actual: string | null): boolean { - // invalid type - if (expected == null) return false +function mimeMatch(actual: ContentType | null, expected: ContentType): boolean { + // invalid types if (actual == null) return false + if (expected == null) return false - // split types - const actualParts = actual.split('/') - const expectedParts = expected.split('/') - - // invalid format - if (actualParts.length !== 2 || expectedParts.length !== 2) return false - - // validate type - if (expectedParts[0] !== '*' && expectedParts[0] !== actualParts[0]) return false + // ensure top-level type matches + if (expected.type !== '*' && expected.type !== actual.type) return false - // validate suffix wildcard - if (expectedParts[1].slice(0, 2) === '*+') - return ( - expectedParts[1].length <= actualParts[1].length + 1 && - expectedParts[1].slice(1) === actualParts[1].slice(1 - expectedParts[1].length) - ) + // check for suffix wildcards & match + if (expected.subtype.startsWith('*+')) return actual.subtypeSuffix === expected.subtypeSuffix // validate subtype - if (expectedParts[1] !== '*' && expectedParts[1] !== actualParts[1]) return false + if (expected.subtype !== '*' && expected.subtype !== actual.subtype) return false return true } -function normalize(type: string): string | null { +function normalize(type: string): ContentType | null { // invalid type if (typeof type !== 'string') return null switch (type) { case 'urlencoded': - return 'application/x-www-form-urlencoded' + return ContentType.parse('application/x-www-form-urlencoded') case 'multipart': - return 'multipart/*' + return ContentType.parse('multipart/*') } // "+json" -> "*/*+json" expando - if (type[0] === '+') return `*/*${type}` + if (type[0] === '+') return ContentType.parse(`*/*${type}`) - return type.indexOf('/') === -1 ? mime.getType(type) : type + if (type.indexOf('/') !== -1) return ContentType.parse(type) + const inferredType = mime.getType(type) + if (inferredType == null) return null + return ContentType.parse(inferredType) } /** @@ -69,24 +53,34 @@ function normalize(type: string): string | null { * a special shortcut like `multipart` or `urlencoded`, * or a mime type. */ -export function typeIs(value: TypeParseable | undefined, types?: readonly string[]) { +export function typeIs( + value: TypeParseable | undefined, + types?: readonly (string | ContentType)[] +): ContentType | false { let i: number // remove parameters and normalize - const val = tryNormalizeType(value) + const parsedValue = tryParseType(value) // no type or invalid - if (!val) return false + if (!parsedValue) return false // no types, return the content type - if (!types || !types.length) return val + if (!types || !types.length) return parsedValue - let type: string + let type: ContentType | string + let normalizedType: ContentType | null = null for (i = 0; i < types.length; i++) { - if (mimeMatch(normalize((type = types[i])), val)) { - return type[0] === '+' || type.indexOf('*') !== -1 ? val : type - } + type = types[i] + normalizedType = typeof type === 'string' ? normalize(type) : type + if (normalizedType == null) continue + if (!mimeMatch(parsedValue, normalizedType)) continue + if (type[0] === '+') return parsedValue + if (normalizedType.hasWildcard()) return parsedValue + return normalizedType } // no matches return false } + +export type { ContentType } diff --git a/tests/modules/type-is.test.ts b/tests/modules/type-is.test.ts index 1d33f55a..17f1bbe3 100644 --- a/tests/modules/type-is.test.ts +++ b/tests/modules/type-is.test.ts @@ -8,19 +8,23 @@ describe('typeIs', () => { }) it('should return value if types are empty', () => { - expect(typeIs('application/json')).toBe('application/json') + expect(typeIs('application/json')).toMatchObject({ mediaType: 'application/json' }) }) it("shouldn't depend on case", () => { - expect(typeIs('Application/Json')).toBe('application/json') + expect(typeIs('Application/Json')).toMatchObject({ mediaType: 'application/json' }) }) - it('should return value if types are empty', () => { - expect(typeIs('application/json', ['application/json'])).toBe('application/json') + it('should return first matched value', () => { + expect(typeIs('application/json', ['application/json'])).toMatchObject({ mediaType: 'application/json' }) + }) + + it('should return value if matched type starts with plus', () => { + expect(typeIs('application/json', ['+json'])).toMatchObject({ mediaType: 'application/json' }) }) it('should return value if matched type starts with plus', () => { - expect(typeIs('application/ld+json', ['+json'])).toBe('application/ld+json') + expect(typeIs('application/ld+json', ['+json'])).toMatchObject({ mediaType: 'application/ld+json' }) }) it('should return false if there is no match', () => { @@ -32,11 +36,13 @@ describe('typeIs', () => { }) it('should return matched value for urlencoded shorthand', () => { - expect(typeIs('application/x-www-form-urlencoded', ['urlencoded'])).toBe('urlencoded') + expect(typeIs('application/x-www-form-urlencoded', ['urlencoded'])).toMatchObject({ + mediaType: 'application/x-www-form-urlencoded' + }) }) - it('should return matched value for urlencoded shorthand', () => { - expect(typeIs('multipart/form-data', ['multipart'])).toBe('multipart') + it('should return matched value for multipart shorthand', () => { + expect(typeIs('multipart/form-data', ['multipart'])).toMatchObject({ mediaType: 'multipart/form-data' }) }) it.each(['', false, null, undefined])( @@ -46,14 +52,14 @@ describe('typeIs', () => { } ) - it('should return false if expected type has wrong format', () => { - expect(typeIs('multipart/form-data', ['application/javascript/wrong'])).toBe(false) + it('should throw error if expected type has wrong format', () => { + expect(() => { + typeIs('multipart/form-data', ['application/javascript/wrong']) + }).toThrowError() }) + it('should return false if the input is not a string', () => { const value: Record = { 1: 'test' } expect(typeIs(value as any)).toBe(false) }) - it('should return the same type as input if the type is not normalized', () => { - expect(typeIs('text/html', ['file.html'])).toBe('file.html') - }) }) From 109be5fa30e560bcca82e99a600f2218da1e1d2a Mon Sep 17 00:00:00 2001 From: Lordfirespeed <28568841+Lordfirespeed@users.noreply.github.com> Date: Fri, 16 Aug 2024 01:57:41 +0100 Subject: [PATCH 9/9] chore: create changeset --- .changeset/nice-taxis-hear.md | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 .changeset/nice-taxis-hear.md diff --git a/.changeset/nice-taxis-hear.md b/.changeset/nice-taxis-hear.md new file mode 100644 index 00000000..38c15632 --- /dev/null +++ b/.changeset/nice-taxis-hear.md @@ -0,0 +1,9 @@ +--- +"@otterhttp/content-disposition": minor +"@otterhttp/content-type": minor +"@otterhttp/parameters": minor +"@otterhttp/type-is": major +--- + +Rely on `@otterhttp/parameters` for parsing HTTP header parameters +Refactor type-is to allow `@otterhttp/content-type` to do more heavy lifting; prefer returning `ContentType`s to strings