Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add "decompress" response utility #3423

Open
wants to merge 16 commits into
base: main
Choose a base branch
from
Open
6 changes: 6 additions & 0 deletions lib/core/constants.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
'use strict'

const nullBodyStatus = /** @type {const} */ ([101, 204, 205, 304])

const redirectStatus = /** @type {const} */ ([301, 302, 303, 307, 308])

/**
* @see https://developer.mozilla.org/docs/Web/HTTP/Headers
*/
Expand Down Expand Up @@ -137,6 +141,8 @@ for (let i = 0; i < wellknownHeaderNames.length; ++i) {
}

module.exports = {
nullBodyStatus,
redirectStatus,
wellknownHeaderNames,
headerNameLowerCasedRecord,
getHeaderNameAsBuffer
Expand Down
173 changes: 171 additions & 2 deletions lib/core/util.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,19 @@ const assert = require('node:assert')
const { kDestroyed, kBodyUsed, kListeners, kBody } = require('./symbols')
const { IncomingMessage } = require('node:http')
const stream = require('node:stream')
const zlib = require('node:zlib')
const net = require('node:net')
const { Blob } = require('node:buffer')
const nodeUtil = require('node:util')
const { stringify } = require('node:querystring')
const { EventEmitter: EE } = require('node:events')
const { InvalidArgumentError } = require('./errors')
const { headerNameLowerCasedRecord, getHeaderNameAsBuffer } = require('./constants')
const {
headerNameLowerCasedRecord,
getHeaderNameAsBuffer,
nullBodyStatus,
redirectStatus
} = require('./constants')
const { tree } = require('./tree')

const [nodeMajor, nodeMinor] = process.versions.node.split('.').map(v => Number(v))
Expand Down Expand Up @@ -882,6 +888,167 @@ const normalizedMethodRecords = {
Object.setPrototypeOf(normalizedMethodRecordsBase, null)
Object.setPrototypeOf(normalizedMethodRecords, null)

/**
* Decompress the given response body.
* @param {Request} request
* @param {Response} response
* @returns {ReadableStream<Uint8Array> | null}
*/
function decompress (request, response) {
if (response.body === null) {
return response.body
}

const willFollow =
response.headers.get('location') &&
request.redirect === 'follow' &&
redirectStatus.includes(response.status)

if (
request.method === 'HEAD' ||
request.method === 'CONNECT' ||
nullBodyStatus.includes(response.status) ||
willFollow
) {
return response.body
}

return decompressStream(
response.body,
response.headers.get('content-encoding')
)
}

/**
* Return the list of normalized codings based on
* the given `Content-Encoding` header value.
* @param {string} contentEncoding
* @returns {string[]}
*/
function getCodings (contentEncoding) {
// https://www.rfc-editor.org/rfc/rfc7231#section-3.1.2.1
// "All content-coding values are case-insensitive..."
const codings = contentEncoding
.toLowerCase()
.split(',')
.map((coding) => coding.trim())

return codings
}

// A Stream, which pipes the response to zlib.createInflate() or
// zlib.createInflateRaw() depending on the first byte of the Buffer.
// If the lower byte of the first byte is 0x08, then the stream is
// interpreted as a zlib stream, otherwise it's interpreted as a
// raw deflate stream.
class InflateStream extends stream.Transform {
#zlibOptions

/** @param {zlib.ZlibOptions} [zlibOptions] */
constructor (zlibOptions) {
super()
this.#zlibOptions = zlibOptions
}

_transform (chunk, encoding, callback) {
if (!this._inflateStream) {
if (chunk.length === 0) {
callback()
return
}
this._inflateStream = (chunk[0] & 0x0F) === 0x08
? zlib.createInflate(this.#zlibOptions)
: zlib.createInflateRaw(this.#zlibOptions)

this._inflateStream.on('data', this.push.bind(this))
this._inflateStream.on('end', () => this.push(null))
this._inflateStream.on('error', (err) => this.destroy(err))
}

this._inflateStream.write(chunk, encoding, callback)
}

_final (callback) {
if (this._inflateStream) {
this._inflateStream.end()
this._inflateStream = null
}
callback()
}
}

/**
* @param {zlib.ZlibOptions} [zlibOptions]
* @returns {InflateStream}
*/
function createInflate (zlibOptions) {
return new InflateStream(zlibOptions)
}

/**
* Return the list of transform streams necessary
* to decode a body stream of the given content encoding.
* @param {string} contentEncoding
* @returns {import('node:stream').Transform[]}
*/
function getDecoders (contentEncoding) {
const codings = getCodings(contentEncoding)
const decoders = []

for (let i = codings.length - 1; i >= 0; --i) {
const coding = codings[i]

// https://www.rfc-editor.org/rfc/rfc9112.html#section-7.2
if (coding === 'x-gzip' || coding === 'gzip') {
decoders.push(
zlib.createGunzip({
// Be less strict when decoding compressed responses, since sometimes
// servers send slightly invalid responses that are still accepted
// by common browsers.
// Always using Z_SYNC_FLUSH is what cURL does.
flush: zlib.constants.Z_SYNC_FLUSH,
finishFlush: zlib.constants.Z_SYNC_FLUSH
})
)
} else if (coding === 'deflate') {
decoders.push(createInflate({
flush: zlib.constants.Z_SYNC_FLUSH,
finishFlush: zlib.constants.Z_SYNC_FLUSH
}))
} else if (coding === 'br') {
decoders.push(zlib.createBrotliDecompress({
flush: zlib.constants.BROTLI_OPERATION_FLUSH,
finishFlush: zlib.constants.BROTLI_OPERATION_FLUSH
}))
} else {
decoders.length = 0
break
}
}

return decoders
}

/**
* Decompress the given stream based on the "Content-Encoding" response header.
* @param {ReadableStream<Uint8Array>} input
* @param {string | null} contentEncoding The value of the "Content-Encoding" response header.
*/
function decompressStream (input, contentEncoding) {
if (!contentEncoding) {
return input
}

const decoders = getDecoders(contentEncoding)

if (decoders.length === 0) {
// Return the stream as-is if there are no decoders for it.
return input
}

return stream.pipeline(input, ...decoders, () => {})
}

module.exports = {
kEnumerableProperty,
isDisturbed,
Expand Down Expand Up @@ -925,5 +1092,7 @@ module.exports = {
nodeMajor,
nodeMinor,
safeHTTPMethods: ['GET', 'HEAD', 'OPTIONS', 'TRACE'],
wrapRequestBody
wrapRequestBody,
decompress,
decompressStream
}
5 changes: 2 additions & 3 deletions lib/web/fetch/constants.js
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
'use strict'

const { nullBodyStatus, redirectStatus } = require('../../core/constants')

const corsSafeListedMethods = /** @type {const} */ (['GET', 'HEAD', 'POST'])
const corsSafeListedMethodsSet = new Set(corsSafeListedMethods)

const nullBodyStatus = /** @type {const} */ ([101, 204, 205, 304])

const redirectStatus = /** @type {const} */ ([301, 302, 303, 307, 308])
const redirectStatusSet = new Set(redirectStatus)

/**
Expand Down
75 changes: 17 additions & 58 deletions lib/web/fetch/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ const {
} = require('./response')
const { HeadersList } = require('./headers')
const { Request, cloneRequest, getRequestDispatcher, getRequestState } = require('./request')
const zlib = require('node:zlib')
const {
bytesMatch,
makePolicyContainer,
Expand Down Expand Up @@ -44,7 +43,6 @@ const {
clampAndCoarsenConnectionTimingInfo,
simpleRangeHeaderValue,
buildContentRange,
createInflate,
extractMimeType
} = require('./util')
const assert = require('node:assert')
Expand All @@ -57,9 +55,17 @@ const {
subresourceSet
} = require('./constants')
const EE = require('node:events')
const { Readable, pipeline, finished, isErrored, isReadable } = require('node:stream')
const { addAbortListener, bufferToLowerCasedHeaderName } = require('../../core/util')
const { dataURLProcessor, serializeAMimeType, minimizeSupportedMimeType } = require('./data-url')
const { Readable, finished, isErrored, isReadable } = require('node:stream')
const {
addAbortListener,
bufferToLowerCasedHeaderName,
decompress
} = require('../../core/util')
const {
dataURLProcessor,
serializeAMimeType,
minimizeSupportedMimeType
} = require('./data-url')
const { getGlobalDispatcher } = require('../../global')
const { webidl } = require('./webidl')
const { STATUS_CODES } = require('node:http')
Expand Down Expand Up @@ -2105,73 +2111,26 @@ async function httpNetworkFetch (
}

/** @type {string[]} */
let codings = []
let location = ''

const headersList = new HeadersList()

for (let i = 0; i < rawHeaders.length; i += 2) {
headersList.append(bufferToLowerCasedHeaderName(rawHeaders[i]), rawHeaders[i + 1].toString('latin1'), true)
}
const contentEncoding = headersList.get('content-encoding', true)
if (contentEncoding) {
// https://www.rfc-editor.org/rfc/rfc7231#section-3.1.2.1
// "All content-coding values are case-insensitive..."
codings = contentEncoding.toLowerCase().split(',').map((x) => x.trim())
}
location = headersList.get('location', true)

this.body = new Readable({ read: resume })

const decoders = []

const willFollow = location && request.redirect === 'follow' &&
redirectStatusSet.has(status)

// https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Encoding
if (codings.length !== 0 && request.method !== 'HEAD' && request.method !== 'CONNECT' && !nullBodyStatus.includes(status) && !willFollow) {
for (let i = codings.length - 1; i >= 0; --i) {
const coding = codings[i]
// https://www.rfc-editor.org/rfc/rfc9112.html#section-7.2
if (coding === 'x-gzip' || coding === 'gzip') {
decoders.push(zlib.createGunzip({
// Be less strict when decoding compressed responses, since sometimes
// servers send slightly invalid responses that are still accepted
// by common browsers.
// Always using Z_SYNC_FLUSH is what cURL does.
flush: zlib.constants.Z_SYNC_FLUSH,
finishFlush: zlib.constants.Z_SYNC_FLUSH
}))
} else if (coding === 'deflate') {
decoders.push(createInflate({
flush: zlib.constants.Z_SYNC_FLUSH,
finishFlush: zlib.constants.Z_SYNC_FLUSH
}))
} else if (coding === 'br') {
decoders.push(zlib.createBrotliDecompress({
flush: zlib.constants.BROTLI_OPERATION_FLUSH,
finishFlush: zlib.constants.BROTLI_OPERATION_FLUSH
}))
} else {
decoders.length = 0
break
}
}
}

const onError = this.onError.bind(this)

resolve({
status,
statusText,
headersList,
body: decoders.length
? pipeline(this.body, ...decoders, (err) => {
if (err) {
this.onError(err)
}
}).on('error', onError)
: this.body.on('error', onError)
body: decompress(request, {
status,
statusText,
headers: headersList,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

headersList is not an instance of Headers

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Technically, not, but its APIs are compatible, from what I can see. At least, the entire test suite hasn't proven me wrong.

I can construct Headers instance out of headersList but it may have performance implications. Would you advise me to do that?

body: this.body
}).on('error', onError)
})

return true
Expand Down
Loading
Loading