From 229ad1bb2ccf27eaee7fb0ebec5f4f2471041667 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=B2=20Ribaudo?= Date: Tue, 27 Aug 2024 17:58:04 +0200 Subject: [PATCH] Use the URL global instead of the deprecated url.parse The Node.js url.parse API (https://nodejs.org/api/url.html#urlparseurlstring-parsequerystring-slashesdenotehost) is deprecated because it's prone to security issues (to the point that Node.js doesn't even publish CVEs for it anymore). The official reccomendation is to instead use the global URL constructor, available both in Node.js and in browsers. Node.js filesystem APIs accept URL objects as parameter, so this also avoids a few URL->filepath conversions. --- src/display/node_stream.js | 71 ++++++-------------- test/test.mjs | 9 +-- test/unit/node_stream_spec.js | 8 +-- test/webserver.mjs | 119 ++++++++++++++++------------------ 4 files changed, 83 insertions(+), 124 deletions(-) diff --git a/src/display/node_stream.js b/src/display/node_stream.js index 3ffabffe0df13..d61b9a5850951 100644 --- a/src/display/node_stream.js +++ b/src/display/node_stream.js @@ -26,29 +26,20 @@ if (typeof PDFJSDev !== "undefined" && PDFJSDev.test("MOZCENTRAL")) { ); } -const fileUriRegex = /^file:\/\/\/[a-zA-Z]:\//; +const urlRegex = /^[a-z][a-z0-9\-+.]+:/i; -function parseUrl(sourceUrl) { - const url = NodePackages.get("url"); - const parsedUrl = url.parse(sourceUrl); - if (parsedUrl.protocol === "file:" || parsedUrl.host) { - return parsedUrl; - } - // Prepending 'file:///' to Windows absolute path. - if (/^[a-z]:[/\\]/i.test(sourceUrl)) { - return url.parse(`file:///${sourceUrl}`); +function parseUrlOrPath(sourceUrl) { + if (urlRegex.test(sourceUrl)) { + return new URL(sourceUrl); } - // Changes protocol to 'file:' if url refers to filesystem. - if (!parsedUrl.host) { - parsedUrl.protocol = "file:"; - } - return parsedUrl; + const url = NodePackages.get("url"); + return new URL(url.pathToFileURL(sourceUrl)); } class PDFNodeStream { constructor(source) { this.source = source; - this.url = parseUrl(source.url); + this.url = parseUrlOrPath(source.url); this.isHttp = this.url.protocol === "http:" || this.url.protocol === "https:"; // Check if url refers to filesystem. @@ -287,18 +278,6 @@ class BaseRangeReader { } } -function createRequestOptions(parsedUrl, headers) { - return { - protocol: parsedUrl.protocol, - auth: parsedUrl.auth, - host: parsedUrl.hostname, - port: parsedUrl.port, - path: parsedUrl.path, - method: "GET", - headers, - }; -} - class PDFNodeStreamFullReader extends BaseFullReader { constructor(stream) { super(stream); @@ -337,13 +316,15 @@ class PDFNodeStreamFullReader extends BaseFullReader { if (this._url.protocol === "http:") { const http = NodePackages.get("http"); this._request = http.request( - createRequestOptions(this._url, stream.httpHeaders), + this._url, + { headers: stream.httpHeaders }, handleResponse ); } else { const https = NodePackages.get("https"); this._request = https.request( - createRequestOptions(this._url, stream.httpHeaders), + this._url, + { headers: stream.httpHeaders }, handleResponse ); } @@ -386,13 +367,15 @@ class PDFNodeStreamRangeReader extends BaseRangeReader { if (this._url.protocol === "http:") { const http = NodePackages.get("http"); this._request = http.request( - createRequestOptions(this._url, this._httpHeaders), + this._url, + { headers: this._httpHeaders }, handleResponse ); } else { const https = NodePackages.get("https"); this._request = https.request( - createRequestOptions(this._url, this._httpHeaders), + this._url, + { headers: this._httpHeaders }, handleResponse ); } @@ -408,25 +391,18 @@ class PDFNodeStreamFsFullReader extends BaseFullReader { constructor(stream) { super(stream); - let path = decodeURIComponent(this._url.path); - - // Remove the extra slash to get right path from url like `file:///C:/` - if (fileUriRegex.test(this._url.href)) { - path = path.replace(/^\//, ""); - } - const fs = NodePackages.get("fs"); - fs.promises.lstat(path).then( + fs.promises.lstat(this._url).then( stat => { // Setting right content length. this._contentLength = stat.size; - this._setReadableStream(fs.createReadStream(path)); + this._setReadableStream(fs.createReadStream(this._url)); this._headersCapability.resolve(); }, error => { if (error.code === "ENOENT") { - error = new MissingPDFException(`Missing PDF "${path}".`); + error = new MissingPDFException(`Missing PDF "${this._url}".`); } this._storedError = error; this._headersCapability.reject(error); @@ -439,15 +415,10 @@ class PDFNodeStreamFsRangeReader extends BaseRangeReader { constructor(stream, start, end) { super(stream); - let path = decodeURIComponent(this._url.path); - - // Remove the extra slash to get right path from url like `file:///C:/` - if (fileUriRegex.test(this._url.href)) { - path = path.replace(/^\//, ""); - } - const fs = NodePackages.get("fs"); - this._setReadableStream(fs.createReadStream(path, { start, end: end - 1 })); + this._setReadableStream( + fs.createReadStream(this._url, { start, end: end - 1 }) + ); } } diff --git a/test/test.mjs b/test/test.mjs index 0d23a76f0e7e2..cabda05812a38 100644 --- a/test/test.mjs +++ b/test/test.mjs @@ -26,7 +26,6 @@ import path from "path"; import puppeteer from "puppeteer"; import readline from "readline"; import { translateFont } from "./font/ttxdriver.mjs"; -import url from "url"; import { WebServer } from "./webserver.mjs"; import yargs from "yargs"; @@ -670,8 +669,7 @@ function checkRefTestResults(browser, id, results) { }); } -function refTestPostHandler(req, res) { - var parsedUrl = url.parse(req.url, true); +function refTestPostHandler(parsedUrl, req, res) { var pathname = parsedUrl.pathname; if ( pathname !== "/tellMeToQuit" && @@ -691,7 +689,7 @@ function refTestPostHandler(req, res) { var session; if (pathname === "/tellMeToQuit") { - session = getSession(parsedUrl.query.browser); + session = getSession(parsedUrl.searchParams.get("browser")); monitorBrowserTimeout(session, null); closeSession(session.name); return; @@ -821,8 +819,7 @@ async function startIntegrationTest() { await Promise.all(sessions.map(session => closeSession(session.name))); } -function unitTestPostHandler(req, res) { - var parsedUrl = url.parse(req.url); +function unitTestPostHandler(parsedUrl, req, res) { var pathname = parsedUrl.pathname; if ( pathname !== "/tellMeToQuit" && diff --git a/test/unit/node_stream_spec.js b/test/unit/node_stream_spec.js index f3a2daa754545..775131caa3f50 100644 --- a/test/unit/node_stream_spec.js +++ b/test/unit/node_stream_spec.js @@ -24,17 +24,13 @@ if (!isNodeJS) { ); } -const path = await __non_webpack_import__("path"); const url = await __non_webpack_import__("url"); describe("node_stream", function () { let tempServer = null; - const pdf = url.parse( - encodeURI( - "file://" + path.join(process.cwd(), "./test/pdfs/tracemonkey.pdf") - ) - ).href; + const cwdURL = url.pathToFileURL(process.cwd()) + "/"; + const pdf = new URL("./test/pdfs/tracemonkey.pdf", cwdURL).href; const pdfLength = 1016315; beforeAll(function () { diff --git a/test/webserver.mjs b/test/webserver.mjs index d597aa2512d7d..e0295f3a2865e 100644 --- a/test/webserver.mjs +++ b/test/webserver.mjs @@ -21,6 +21,7 @@ import fs from "fs"; import fsPromises from "fs/promises"; import http from "http"; import path from "path"; +import { pathToFileURL } from "url"; const MIME_TYPES = { ".css": "text/css", @@ -42,7 +43,8 @@ const DEFAULT_MIME_TYPE = "application/octet-stream"; class WebServer { constructor({ root, host, port, cacheExpirationTime }) { - this.root = root || "."; + const cwdURL = pathToFileURL(process.cwd()) + "/"; + this.rootURL = new URL(`${root || "."}/`, cwdURL); this.host = host || "localhost"; this.port = port || 0; this.server = null; @@ -82,27 +84,10 @@ class WebServer { } async #handler(request, response) { - // Validate and parse the request URL. - const url = request.url.replaceAll("//", "/"); - const urlParts = /([^?]*)((?:\?(.*))?)/.exec(url); - let pathPart; - try { - // Guard against directory traversal attacks such as - // `/../../../../../../../etc/passwd`, which let you make GET requests - // for files outside of `this.root`. - pathPart = path.normalize(decodeURI(urlParts[1])); - // `path.normalize` returns a path on the basis of the current platform. - // Windows paths cause issues in `checkRequest` and underlying methods. - // Converting to a Unix path avoids platform checks in said functions. - pathPart = pathPart.replaceAll("\\", "/"); - } catch { - // If the URI cannot be decoded, a `URIError` is thrown. This happens for - // malformed URIs such as `http://localhost:8888/%s%s` and should be - // handled as a bad request. - response.writeHead(400); - response.end("Bad request", "utf8"); - return; - } + // URLs are normalized and automatically disallow directory traversal + // attacks. For example, http://HOST:PORT/../../../../../../../etc/passwd + // is equivalent to http://HOST:PORT/etc/passwd. + const url = new URL(`http://${this.host}:${this.port}${request.url}`); // Validate the request method and execute method hooks. const methodHooks = this.hooks[request.method]; @@ -111,24 +96,34 @@ class WebServer { response.end("Unsupported request method", "utf8"); return; } - const handled = methodHooks.some(hook => hook(request, response)); + const handled = methodHooks.some(hook => hook(url, request, response)); if (handled) { return; } // Check the request and serve the file/folder contents. - if (pathPart === "/favicon.ico") { - pathPart = "test/resources/favicon.ico"; + if (url.pathname === "/favicon.ico") { + url.pathname = "/test/resources/favicon.ico"; } - await this.#checkRequest(request, response, url, urlParts, pathPart); + await this.#checkRequest(request, response, url); } - async #checkRequest(request, response, url, urlParts, pathPart) { + async #checkRequest(request, response, url) { + const localURL = new URL(`.${url.pathname}`, this.rootURL); + // Check if the file/folder exists. - let filePath; try { - filePath = await fsPromises.realpath(path.join(this.root, pathPart)); - } catch { + await fsPromises.realpath(localURL); + } catch (e) { + if (e instanceof URIError) { + // If the URI cannot be decoded, a `URIError` is thrown. This happens + // for malformed URIs such as `http://localhost:8888/%s%s` and should be + // handled as a bad request. + response.writeHead(400); + response.end("Bad request", "utf8"); + return; + } + response.writeHead(404); response.end(); if (this.verbose) { @@ -140,7 +135,7 @@ class WebServer { // Get the properties of the file/folder. let stats; try { - stats = await fsPromises.stat(filePath); + stats = await fsPromises.stat(localURL); } catch { response.writeHead(500); response.end(); @@ -150,15 +145,14 @@ class WebServer { const isDir = stats.isDirectory(); // If a folder is requested, serve the directory listing. - if (isDir && !/\/$/.test(pathPart)) { - response.setHeader("Location", `${pathPart}/${urlParts[2]}`); + if (isDir && !/\/$/.test(url.pathname)) { + response.setHeader("Location", `${url.pathname}/${url.search}`); response.writeHead(301); response.end("Redirected", "utf8"); return; } if (isDir) { - const queryPart = urlParts[3]; - await this.#serveDirectoryIndex(response, pathPart, queryPart, filePath); + await this.#serveDirectoryIndex(response, url, localURL); return; } @@ -182,7 +176,7 @@ class WebServer { } this.#serveFileRange( response, - filePath, + localURL, fileSize, start, isNaN(end) ? fileSize : end + 1 @@ -194,19 +188,19 @@ class WebServer { if (this.verbose) { console.log(url); } - this.#serveFile(response, filePath, fileSize); + this.#serveFile(response, localURL, fileSize); } - async #serveDirectoryIndex(response, pathPart, queryPart, directory) { + async #serveDirectoryIndex(response, url, localUrl) { response.setHeader("Content-Type", "text/html"); response.writeHead(200); - if (queryPart === "frame") { + if (url.searchParams.has("frame")) { response.end( ` - + `, "utf8" @@ -216,7 +210,7 @@ class WebServer { let files; try { - files = await fsPromises.readdir(directory); + files = await fsPromises.readdir(localUrl); } catch { response.end(); return; @@ -228,13 +222,13 @@ class WebServer { -

Index of ${pathPart}

` +

Index of ${url.pathname}

` ); - if (pathPart !== "/") { + if (url.pathname !== "/") { response.write('..
'); } - const all = queryPart === "all"; + const all = url.searchParams.has("all"); const escapeHTML = untrusted => // Escape untrusted input so that it can safely be used in a HTML response // in HTML and in HTML attributes. @@ -247,13 +241,13 @@ class WebServer { for (const file of files) { let stat; - const item = pathPart + file; + const item = url.pathname + file; let href = ""; let label = ""; let extraAttributes = ""; try { - stat = fs.statSync(path.join(directory, file)); + stat = fs.statSync(new URL(file, localUrl)); } catch (ex) { href = encodeURI(item); label = `${file} (${ex})`; @@ -284,7 +278,7 @@ class WebServer { if (files.length === 0) { response.write("

No files found

"); } - if (!all && queryPart !== "side") { + if (!all && !url.searchParams.has("side")) { response.write( '

(only PDF files are shown, show all)

' ); @@ -292,8 +286,8 @@ class WebServer { response.end(""); } - #serveFile(response, filePath, fileSize) { - const stream = fs.createReadStream(filePath, { flags: "rs" }); + #serveFile(response, fileURL, fileSize) { + const stream = fs.createReadStream(fileURL, { flags: "rs" }); stream.on("error", error => { response.writeHead(500); response.end(); @@ -302,7 +296,7 @@ class WebServer { if (!this.disableRangeRequests) { response.setHeader("Accept-Ranges", "bytes"); } - response.setHeader("Content-Type", this.#getContentType(filePath)); + response.setHeader("Content-Type", this.#getContentType(fileURL)); response.setHeader("Content-Length", fileSize); if (this.cacheExpirationTime > 0) { const expireTime = new Date(); @@ -313,8 +307,8 @@ class WebServer { stream.pipe(response); } - #serveFileRange(response, filePath, fileSize, start, end) { - const stream = fs.createReadStream(filePath, { + #serveFileRange(response, fileURL, fileSize, start, end) { + const stream = fs.createReadStream(fileURL, { flags: "rs", start, end: end - 1, @@ -325,7 +319,7 @@ class WebServer { }); response.setHeader("Accept-Ranges", "bytes"); - response.setHeader("Content-Type", this.#getContentType(filePath)); + response.setHeader("Content-Type", this.#getContentType(fileURL)); response.setHeader("Content-Length", end - start); response.setHeader( "Content-Range", @@ -335,8 +329,8 @@ class WebServer { stream.pipe(response); } - #getContentType(filePath) { - const extension = path.extname(filePath).toLowerCase(); + #getContentType(fileURL) { + const extension = path.extname(fileURL.pathname).toLowerCase(); return MIME_TYPES[extension] || DEFAULT_MIME_TYPE; } } @@ -345,13 +339,14 @@ class WebServer { // It is here instead of test.js so that when the test will still complete as // expected if the user does "gulp server" and then visits // http://localhost:8888/test/unit/unit_test.html?spec=Cross-origin -function crossOriginHandler(request, response) { - if (request.url === "/test/pdfs/basicapi.pdf?cors=withCredentials") { - response.setHeader("Access-Control-Allow-Origin", request.headers.origin); - response.setHeader("Access-Control-Allow-Credentials", "true"); - } - if (request.url === "/test/pdfs/basicapi.pdf?cors=withoutCredentials") { - response.setHeader("Access-Control-Allow-Origin", request.headers.origin); +function crossOriginHandler(url, request, response) { + if (url.pathname === "/test/pdfs/basicapi.pdf") { + if (url.searchParams.get("cors") === "withCredentials") { + response.setHeader("Access-Control-Allow-Origin", request.headers.origin); + response.setHeader("Access-Control-Allow-Credentials", "true"); + } else if (url.searchParams.get("cors") === "withoutCredentials") { + response.setHeader("Access-Control-Allow-Origin", request.headers.origin); + } } }