Skip to content

Commit

Permalink
test: add un-sanitized HTML check
Browse files Browse the repository at this point in the history
  • Loading branch information
TheGiddyLimit committed Sep 2, 2024
1 parent 5504a83 commit cdd33ea
Show file tree
Hide file tree
Showing 5 changed files with 232 additions and 70 deletions.
71 changes: 2 additions & 69 deletions _node/clean-html.js
Original file line number Diff line number Diff line change
@@ -1,70 +1,3 @@
import {ObjectWalker, Uf, Um, getCleanJson} from "5etools-utils";
import sanitizeHtml from 'sanitize-html';
import he from 'he';
import fs from "fs";
import {BrewCleanerHtml} from "./html-cleaner.js";

class BrewCleanerHtml {
static _LOG_TAG = `HTML`;

static _OPTS_SANITIZE = {
allowedTags: [
// region Custom things which look like tags
"<$name$>",
// endregion
],
allowedAttributes: {},
};

static async _pUpdateDir (dir) {
Uf.listJsonFiles(dir)
.forEach(file => {
const fileData = Uf.readJsonSync(file);

const {_meta, _test} = fileData;
delete fileData._meta;
delete fileData._test;

const fileOut = ObjectWalker.walk({
obj: fileData,
filePath: file,
primitiveHandlers: {
string: (str, {filePath}) => {
const clean = he.unescape(
sanitizeHtml(
str,
this._OPTS_SANITIZE,
),
);

if (clean !== str) Um.info(this._LOG_TAG, `Sanitized:\n${str}\n${clean}`);

return clean;
}
},
isModify: true,
});

const out = {$schema: fileOut.$schema, _meta, _test};
Object.assign(out, fileOut);

fs.writeFileSync(file, getCleanJson(out));
});
}

static async pRun () {
await Uf.pRunOnDirs(
async (dir) => {
Um.info(this._LOG_TAG, `Sanitizing HTML in dir "${dir}"...`);
await this._pUpdateDir(dir);
},
{
isSerial: true,
},
);
Um.info(this._LOG_TAG, "Done!");
}
}

BrewCleanerHtml.pRun();

export {BrewCleanerHtml};
await BrewCleanerHtml.pRun();
47 changes: 47 additions & 0 deletions _node/html-cleaner-test-worker.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import {isMainThread, parentPort} from "worker_threads";
import {BrewCleanerHtml} from "./html-cleaner.js";

if (isMainThread) throw new Error(`Worker must not be started in main thread!`);

let isCancelled = false;

parentPort
.on("message", async msg => {
switch (msg.type) {
case "init": {
parentPort.postMessage({
type: "ready",
payload: {},
});

break;
}

case "cancel": {
isCancelled = true;
break;
}

case "work": {
if (isCancelled) {
parentPort.postMessage({
type: "done",
payload: {},
});
return;
}

const {messages = []} = BrewCleanerHtml.getFileMessages({file: msg.payload.file});

parentPort.postMessage({
type: "done",
payload: {
isError: !!messages.length,
messages,
},
});

break;
}
}
});
165 changes: 165 additions & 0 deletions _node/html-cleaner.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
import {getCleanJson, ObjectWalker, Uf, Um} from "5etools-utils";
import he from "he";
import sanitizeHtml from "sanitize-html";
import fs from "fs";
import os from "os";
import path from "path";
import url from "url";
import {Worker} from "worker_threads";
import {Deferred, WorkerList} from "5etools-utils/lib/WorkerList.js";

const __dirname = url.fileURLToPath(new URL(".", import.meta.url));

export class BrewCleanerHtml {
static _LOG_TAG = `HTML`;

static _OPTS_SANITIZE = {
allowedTags: [
// region Custom things which look like tags
"<$name$>",
// endregion
],
allowedAttributes: {},
};

static _getCleanFileMeta ({file}) {
const fileData = Uf.readJsonSync(file);

const messages = [];

const {_meta, _test} = fileData;
delete fileData._meta;
delete fileData._test;

const fileOut = ObjectWalker.walk({
obj: fileData,
filePath: file,
primitiveHandlers: {
string: (str, {filePath}) => {
const clean = he.unescape(
sanitizeHtml(
str,
this._OPTS_SANITIZE,
),
);

if (clean !== str) {
const msg = `Sanitized:\n${str}\n${clean}`;
messages.push(msg);
Um.info(this._LOG_TAG, msg);
}

return clean;
}
},
isModify: true,
});

const out = {$schema: fileOut.$schema, _meta, _test};
Object.assign(out, fileOut);

return {
messages,
out,
};
}

static async _pUpdateDir (dir) {
Uf.listJsonFiles(dir)
.forEach(file => {
const {messages, out} = this._getCleanFileMeta({file})
if (!messages?.length) return;

messages.forEach(msg => Um.info(this._LOG_TAG, msg));

fs.writeFileSync(file, getCleanJson(out));
});
}

static async pRun () {
await Uf.pRunOnDirs(
async (dir) => {
Um.info(this._LOG_TAG, `Sanitizing HTML in dir "${dir}"...`);
await this._pUpdateDir(dir);
},
{
isSerial: true,
},
);
Um.info(this._LOG_TAG, "Done!");
}

static getFileMessages ({file}) {
return this._getCleanFileMeta({file});
}

static async pGetErrorsOnDirsWorkers ({isFailFast = false} = {}) {
Um.info(this._LOG_TAG, `Testing for HTML...`);

const cntWorkers = Math.max(1, os.cpus().length - 1);

const messages = [];

const fileQueue = [];
Uf.runOnDirs((dir) => fileQueue.push(...Uf.listJsonFiles(dir)));

const workerList = new WorkerList();

let cntFailures = 0;
const workers = [...new Array(cntWorkers)]
.map(() => {
// Relative `Worker` paths do not function in packages, so give an exact path
const worker = new Worker(path.join(__dirname, "html-cleaner-test-worker"));

worker.on("message", (msg) => {
switch (msg.type) {
case "ready":
case "done": {
if (msg.payload.isError) {
messages.push(...msg.payload.messages);

if (isFailFast) workers.forEach(worker => worker.postMessage({type: "cancel"}));
}

if (worker.dIsActive) worker.dIsActive.resolve();
workerList.add(worker);

break;
}
}
});

worker.on("error", e => {
console.error(e);
cntFailures++;
});

worker.postMessage({
type: "init",
payload: {},
});

return worker;
});

while (fileQueue.length) {
if (isFailFast && messages.length) break;

const file = fileQueue.shift();
const worker = await workerList.get();

worker.dIsActive = new Deferred();
worker.postMessage({
type: "work",
payload: {
file,
},
});
}

await Promise.all(workers.map(it => it.dIsActive?.promise));
await Promise.all(workers.map(it => it.terminate()));

return {messages, isUnknownError: !!cntFailures};
}
}
16 changes: 16 additions & 0 deletions _test/test-html.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import {BrewCleanerHtml} from "../_node/html-cleaner.js";
import {Um} from "5etools-utils";

const {messages, isUnknownError = false} = await BrewCleanerHtml.pGetErrorsOnDirsWorkers();

if (messages.length) {
console.error(`HTML test failed (${messages.length} failure${messages.length === 1 ? "" : "s"}).`);
process.exit(1);
}

if (isUnknownError) {
console.error(`Unknown error when testing! (See above logs)`);
process.exit(1);
}

if (!messages.length) Um.info("HTML", `HTML test passed.`);
3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
"test:file-locations": "test-file-locations",
"test:file-names": "test-file-names",
"test:img-directories": "node _test/test-img-dir.js",
"test": "npm run build:clean && npm run build:index && npm run test:json && npm run test:file-locations && npm run test:file-names && npm run test:img-directories && npm run test:file-contents"
"test:html": "node _test/test-html.js",
"test": "npm run build:clean && npm run build:index && npm run test:json && npm run test:file-locations && npm run test:file-names && npm run test:img-directories && npm run test:file-contents && npm run test:html"
},
"repository": {
"type": "git",
Expand Down

0 comments on commit cdd33ea

Please sign in to comment.