From cdd33ea37eb1deaa45a70476d70cf7e3cda912b3 Mon Sep 17 00:00:00 2001 From: TheGiddyLimit Date: Mon, 2 Sep 2024 11:21:45 +0100 Subject: [PATCH] test: add un-sanitized HTML check --- _node/clean-html.js | 71 +------------ _node/html-cleaner-test-worker.js | 47 +++++++++ _node/html-cleaner.js | 165 ++++++++++++++++++++++++++++++ _test/test-html.js | 16 +++ package.json | 3 +- 5 files changed, 232 insertions(+), 70 deletions(-) create mode 100644 _node/html-cleaner-test-worker.js create mode 100644 _node/html-cleaner.js create mode 100644 _test/test-html.js diff --git a/_node/clean-html.js b/_node/clean-html.js index c2e7acdab7..17eae3c8ae 100644 --- a/_node/clean-html.js +++ b/_node/clean-html.js @@ -1,70 +1,3 @@ -import {ObjectWalker, Uf, Um, getCleanJson} from "5etools-utils"; -import sanitizeHtml from 'sanitize-html'; -import he from 'he'; -import fs from "fs"; +import {BrewCleanerHtml} from "./html-cleaner.js"; -class BrewCleanerHtml { - static _LOG_TAG = `HTML`; - - static _OPTS_SANITIZE = { - allowedTags: [ - // region Custom things which look like tags - "<$name$>", - // endregion - ], - allowedAttributes: {}, - }; - - static async _pUpdateDir (dir) { - Uf.listJsonFiles(dir) - .forEach(file => { - const fileData = Uf.readJsonSync(file); - - const {_meta, _test} = fileData; - delete fileData._meta; - delete fileData._test; - - const fileOut = ObjectWalker.walk({ - obj: fileData, - filePath: file, - primitiveHandlers: { - string: (str, {filePath}) => { - const clean = he.unescape( - sanitizeHtml( - str, - this._OPTS_SANITIZE, - ), - ); - - if (clean !== str) Um.info(this._LOG_TAG, `Sanitized:\n${str}\n${clean}`); - - return clean; - } - }, - isModify: true, - }); - - const out = {$schema: fileOut.$schema, _meta, _test}; - Object.assign(out, fileOut); - - fs.writeFileSync(file, getCleanJson(out)); - }); - } - - static async pRun () { - await Uf.pRunOnDirs( - async (dir) => { - Um.info(this._LOG_TAG, `Sanitizing HTML in dir "${dir}"...`); - await this._pUpdateDir(dir); - }, - { - isSerial: true, - }, - ); - Um.info(this._LOG_TAG, "Done!"); - } -} - -BrewCleanerHtml.pRun(); - -export {BrewCleanerHtml}; +await BrewCleanerHtml.pRun(); diff --git a/_node/html-cleaner-test-worker.js b/_node/html-cleaner-test-worker.js new file mode 100644 index 0000000000..b024b8b06e --- /dev/null +++ b/_node/html-cleaner-test-worker.js @@ -0,0 +1,47 @@ +import {isMainThread, parentPort} from "worker_threads"; +import {BrewCleanerHtml} from "./html-cleaner.js"; + +if (isMainThread) throw new Error(`Worker must not be started in main thread!`); + +let isCancelled = false; + +parentPort + .on("message", async msg => { + switch (msg.type) { + case "init": { + parentPort.postMessage({ + type: "ready", + payload: {}, + }); + + break; + } + + case "cancel": { + isCancelled = true; + break; + } + + case "work": { + if (isCancelled) { + parentPort.postMessage({ + type: "done", + payload: {}, + }); + return; + } + + const {messages = []} = BrewCleanerHtml.getFileMessages({file: msg.payload.file}); + + parentPort.postMessage({ + type: "done", + payload: { + isError: !!messages.length, + messages, + }, + }); + + break; + } + } + }); diff --git a/_node/html-cleaner.js b/_node/html-cleaner.js new file mode 100644 index 0000000000..e17689b98b --- /dev/null +++ b/_node/html-cleaner.js @@ -0,0 +1,165 @@ +import {getCleanJson, ObjectWalker, Uf, Um} from "5etools-utils"; +import he from "he"; +import sanitizeHtml from "sanitize-html"; +import fs from "fs"; +import os from "os"; +import path from "path"; +import url from "url"; +import {Worker} from "worker_threads"; +import {Deferred, WorkerList} from "5etools-utils/lib/WorkerList.js"; + +const __dirname = url.fileURLToPath(new URL(".", import.meta.url)); + +export class BrewCleanerHtml { + static _LOG_TAG = `HTML`; + + static _OPTS_SANITIZE = { + allowedTags: [ + // region Custom things which look like tags + "<$name$>", + // endregion + ], + allowedAttributes: {}, + }; + + static _getCleanFileMeta ({file}) { + const fileData = Uf.readJsonSync(file); + + const messages = []; + + const {_meta, _test} = fileData; + delete fileData._meta; + delete fileData._test; + + const fileOut = ObjectWalker.walk({ + obj: fileData, + filePath: file, + primitiveHandlers: { + string: (str, {filePath}) => { + const clean = he.unescape( + sanitizeHtml( + str, + this._OPTS_SANITIZE, + ), + ); + + if (clean !== str) { + const msg = `Sanitized:\n${str}\n${clean}`; + messages.push(msg); + Um.info(this._LOG_TAG, msg); + } + + return clean; + } + }, + isModify: true, + }); + + const out = {$schema: fileOut.$schema, _meta, _test}; + Object.assign(out, fileOut); + + return { + messages, + out, + }; + } + + static async _pUpdateDir (dir) { + Uf.listJsonFiles(dir) + .forEach(file => { + const {messages, out} = this._getCleanFileMeta({file}) + if (!messages?.length) return; + + messages.forEach(msg => Um.info(this._LOG_TAG, msg)); + + fs.writeFileSync(file, getCleanJson(out)); + }); + } + + static async pRun () { + await Uf.pRunOnDirs( + async (dir) => { + Um.info(this._LOG_TAG, `Sanitizing HTML in dir "${dir}"...`); + await this._pUpdateDir(dir); + }, + { + isSerial: true, + }, + ); + Um.info(this._LOG_TAG, "Done!"); + } + + static getFileMessages ({file}) { + return this._getCleanFileMeta({file}); + } + + static async pGetErrorsOnDirsWorkers ({isFailFast = false} = {}) { + Um.info(this._LOG_TAG, `Testing for HTML...`); + + const cntWorkers = Math.max(1, os.cpus().length - 1); + + const messages = []; + + const fileQueue = []; + Uf.runOnDirs((dir) => fileQueue.push(...Uf.listJsonFiles(dir))); + + const workerList = new WorkerList(); + + let cntFailures = 0; + const workers = [...new Array(cntWorkers)] + .map(() => { + // Relative `Worker` paths do not function in packages, so give an exact path + const worker = new Worker(path.join(__dirname, "html-cleaner-test-worker")); + + worker.on("message", (msg) => { + switch (msg.type) { + case "ready": + case "done": { + if (msg.payload.isError) { + messages.push(...msg.payload.messages); + + if (isFailFast) workers.forEach(worker => worker.postMessage({type: "cancel"})); + } + + if (worker.dIsActive) worker.dIsActive.resolve(); + workerList.add(worker); + + break; + } + } + }); + + worker.on("error", e => { + console.error(e); + cntFailures++; + }); + + worker.postMessage({ + type: "init", + payload: {}, + }); + + return worker; + }); + + while (fileQueue.length) { + if (isFailFast && messages.length) break; + + const file = fileQueue.shift(); + const worker = await workerList.get(); + + worker.dIsActive = new Deferred(); + worker.postMessage({ + type: "work", + payload: { + file, + }, + }); + } + + await Promise.all(workers.map(it => it.dIsActive?.promise)); + await Promise.all(workers.map(it => it.terminate())); + + return {messages, isUnknownError: !!cntFailures}; + } +} diff --git a/_test/test-html.js b/_test/test-html.js new file mode 100644 index 0000000000..1f3d6e859d --- /dev/null +++ b/_test/test-html.js @@ -0,0 +1,16 @@ +import {BrewCleanerHtml} from "../_node/html-cleaner.js"; +import {Um} from "5etools-utils"; + +const {messages, isUnknownError = false} = await BrewCleanerHtml.pGetErrorsOnDirsWorkers(); + +if (messages.length) { + console.error(`HTML test failed (${messages.length} failure${messages.length === 1 ? "" : "s"}).`); + process.exit(1); +} + +if (isUnknownError) { + console.error(`Unknown error when testing! (See above logs)`); + process.exit(1); +} + +if (!messages.length) Um.info("HTML", `HTML test passed.`); diff --git a/package.json b/package.json index a6005c893d..a3e6e01dfa 100644 --- a/package.json +++ b/package.json @@ -15,7 +15,8 @@ "test:file-locations": "test-file-locations", "test:file-names": "test-file-names", "test:img-directories": "node _test/test-img-dir.js", - "test": "npm run build:clean && npm run build:index && npm run test:json && npm run test:file-locations && npm run test:file-names && npm run test:img-directories && npm run test:file-contents" + "test:html": "node _test/test-html.js", + "test": "npm run build:clean && npm run build:index && npm run test:json && npm run test:file-locations && npm run test:file-names && npm run test:img-directories && npm run test:file-contents && npm run test:html" }, "repository": { "type": "git",