Skip to content

Commit

Permalink
Support custom JS preprocessing for db reports
Browse files Browse the repository at this point in the history
  • Loading branch information
siddharthvp committed Apr 13, 2024
1 parent a00fc83 commit cee29ed
Show file tree
Hide file tree
Showing 10 changed files with 209 additions and 21 deletions.
2 changes: 1 addition & 1 deletion botbase.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ export function emailOnError(err: Error, taskname: string, isFatal = true) {
sendMail(`${taskname} error`, `${taskname} task resulted in the error:\n\n${err.stack}\n`);
// exit normally
}
export function logFullError(err: Error, isFatal = true) {
export function logFullError(err: any, isFatal = true) {
if (err.request?.headers?.Authorization) {
err.request.headers.Authorization = '***';
}
Expand Down
5 changes: 2 additions & 3 deletions db-tabulator/MariadbMetadataStore.ts
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
import {TOOLS_DB_HOST, toolsdb} from "../db";
import {toolsdb} from "../db";
import {fetchQueriesForPage, Query} from "./app";
import {MetadataStore} from "./MetadataStore";
import {createLocalSSHTunnel, setDifference} from "../utils";
import {setDifference} from "../utils";
import * as crypto from "crypto";

export class MariadbMetadataStore implements MetadataStore {
db: toolsdb;

async init() {
await createLocalSSHTunnel(TOOLS_DB_HOST);
this.db = new toolsdb('dbreports_p');
}

Expand Down
43 changes: 34 additions & 9 deletions db-tabulator/app.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import { argv, bot, emailOnError, log, Mwn, TextExtractor } from "../botbase";
import { enwikidb, SQLError } from "../db";
import { Template } from "../../mwn/build/wikitext";
import { arrayChunk, createLogStream, lowerFirst, readFile, writeFile } from "../utils";
import { arrayChunk, createLogStream, lowerFirst, readFile, stripOuterNowikis, writeFile } from "../utils";
import {NS_CATEGORY, NS_FILE, NS_MAIN} from "../namespaces";
import { formatSummary } from "../reports/commons";
import {MetadataStore} from "./MetadataStore";
import {HybridMetadataStore} from "./HybridMetadataStore";
import {applyJsPreprocessing, processQueriesExternally} from "./preprocess";

export const BOT_NAME = 'SDZeroBot';
export const TEMPLATE = 'Database report';
Expand Down Expand Up @@ -40,18 +41,27 @@ export function getQueriesFromText(text: string, title: string): Query[] {
log(`[E] Failed to find template on ${title}`);
return [];
}
return templates.map((template, idx) => new Query(template, title, idx + 1));
return templates.map((template, idx) =>
new Query(template, title, idx + 1, !!template.getValue('js_preprocess')?.trim()));
}

// Called from the cronjob
export async function processQueries(allQueries: Record<string, Query[]>) {
await bot.batchOperation(Object.entries(allQueries), async ([page, queries]) => {
log(`[+] Processing page ${page}`);
await processQueriesForPage(queries);
if (queries.filter(q => q.needsExternalRun).length > 0) {
// Needs an external process for security
log(`[+] Processing page ${page} using child process`);
await processQueriesExternally(page);
} else {
log(`[+] Processing page ${page}`);
await processQueriesForPage(queries);
}
}, CONCURRENCY);
}

export async function fetchQueriesForPage(page: string): Promise<Query[]> {
if (argv.fake) {
return getQueriesFromText(readFile(FAKE_INPUT_FILE), 'Fake-Configs');
}
let text = (await bot.read(page, { redirects: false }))?.revisions?.[0]?.content;
if (!text) {
return [];
Expand Down Expand Up @@ -120,13 +130,18 @@ export class Query {
/** Invocation mode */
context: string;

/** Internal tracking for edit summary */
/** Internal tracking: for edit summary */
endNotFound = false;

constructor(template: Template, page: string, idxOnPage: number) {
/** Internal tracking: for queries with JS preprocessing enabled */
needsExternalRun = false;
needsForceKill = false;

constructor(template: Template, page: string, idxOnPage: number, external?: boolean) {
this.page = page;
this.template = template;
this.idx = idxOnPage;
this.needsExternalRun = external;
this.context = getContext();
}

Expand Down Expand Up @@ -155,8 +170,7 @@ export class Query {
getSql() {
let sql = this.getTemplateValue('sql');
if (/^\s*<nowiki ?>/.test(sql)) {
return sql.replace(/^\s*<nowiki ?>/, '')
.replace(/<\/nowiki ?>\s*$/, '');
return stripOuterNowikis(sql);
} else {
// @deprecated
return sql
Expand Down Expand Up @@ -238,6 +252,8 @@ export class Query {
}

this.config.silent = !!this.getTemplateValue('silent');

return this;
}

async runQuery() {
Expand Down Expand Up @@ -372,6 +388,15 @@ export class Query {
return String(value);
});
}
if (this.getTemplateValue('js_preprocess')) {
const jsCode = stripOuterNowikis(this.getTemplateValue('js_preprocess'));
try {
result = await applyJsPreprocessing(result, jsCode, this.toString(), this);
} catch (e) {
log(`[E] Error in applyJsPreprocessing`);
log(e);
}
}

// Add excerpts
for (let {srcIndex, destIndex, namespace, charLimit, charHardLimit} of this.config.excerpts) {
Expand Down
24 changes: 24 additions & 0 deletions db-tabulator/external-update.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import {argv, bot, emailOnError} from "../botbase";
import {metadataStore, fetchQueriesForPage, processQueriesForPage} from "./app";

/**
* Entry point invoked in a child Node.js process for queries
* with custom JS preprocessing enabled.
*/
(async function () {

process.chdir(__dirname);

await Promise.all([
bot.getTokensAndSiteInfo(),
metadataStore.init()
]);

const queries = await fetchQueriesForPage(argv.page);
await processQueriesForPage(queries);

if (queries.filter(q => q.needsForceKill).length > 0) {
process.send({ code: 'catastrophic-error' });
}

})().catch(e => emailOnError(e, 'db-tabulator-child'));
4 changes: 1 addition & 3 deletions db-tabulator/main.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import { argv, bot, emailOnError, log } from "../botbase";
import { closeTunnels, createLocalSSHTunnel, writeFile } from "../utils";
import { closeTunnels, writeFile } from "../utils";
import { checkShutoff, FAKE_OUTPUT_FILE, fetchQueries, processQueries, metadataStore } from "./app";
import { ENWIKI_DB_HOST } from "../db";

/**
* Specs:
Expand Down Expand Up @@ -37,7 +36,6 @@ import { ENWIKI_DB_HOST } from "../db";
await Promise.all([
bot.getTokensAndSiteInfo(),
metadataStore.init(),
createLocalSSHTunnel(ENWIKI_DB_HOST),
]);

if (argv.fake) {
Expand Down
117 changes: 117 additions & 0 deletions db-tabulator/preprocess.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
import {argv, log} from "../botbase";
import {sleep} from "../../mwn/build/utils";
import {fork} from "child_process";

const softTimeout = 1000;
const hardTimeout = 1500;
const processTimeout = 30000;

interface PreprocessContext {
warnings: Array<string>;
needsForceKill: boolean;
}

export async function processQueriesExternally(page: string) {
const controller = new AbortController();
await Promise.race([
new Promise<void>((resolve, reject) => {
const { signal } = controller;
const child = fork(
'external-update.js',
[page].concat(argv.fake ? ['--fake'] : []),
{
execArgv: ['--no-node-snapshot'], // required for node 20+
signal
}
);
child.on('message', (message: any) => {
if (message.code === 'catastrophic-error') {
controller.abort(); // This triggers exit event
}
});
child.on('error', (err) => {
log(`[E] Error from child process`);
log(err);
reject();
})
child.on('exit', () => resolve());
}),
sleep(processTimeout).then(() => {
log(`[E] Aborting child process as it took more than 30 seconds`);
// FIXME? looks necessary as some errors in child process cause it to never resolve/reject
controller.abort();
})
]);
}

export async function applyJsPreprocessing(rows: Record<string, string>[], jsCode: string, queryId: string,
ctx: PreprocessContext): Promise<Record<string, any>[]> {
log(`[+] Applying JS preprocessing for ${queryId}`);
let startTime = process.hrtime.bigint();

// Import dynamically as this has native dependencies
let {Isolate} = await import('isolated-vm');

const isolate = new Isolate({
memoryLimit: 16,
onCatastrophicError(msg) {
log(`[E] Catastrophic error in isolated-vm: ${msg}`);
ctx.needsForceKill = true;
}
});
const context = await isolate.createContext();
const jail = context.global;
await jail.set('__dbQueryResult', JSON.stringify(rows));

let result = rows;
let preProcessingComplete = false;
let doPreprocessing = async () => {
try {
// jsCode is expected to declare function preprocess(rows) {...}
let userCode = await isolate.compileScript(jsCode +
'\n ; JSON.stringify(preprocess(JSON.parse(__dbQueryResult))); \n');

let userCodeResult = await userCode.run(context, { timeout: softTimeout });
try {
if (typeof userCodeResult === 'string') { // returns undefined if non-transferable
let userCodeResultParsed = JSON.parse(userCodeResult);
if (Array.isArray(userCodeResultParsed)) {
result = userCodeResultParsed;
} else {
log(`[E] JS preprocessing for ${queryId} returned a non-array: ${userCodeResult.slice(0, 100)} ... Ignoring.`);
ctx.warnings.push(`JS preprocessing didn't return an array of rows, will be ignored`);
}
} else {
log(`[E] JS preprocessing for ${queryId} has an invalid return value: ${userCodeResult}. Ignoring.`);
ctx.warnings.push(`JS preprocessing must have a transferable return value`);
}
} catch (e) { // Shouldn't occur as we are the ones doing the JSON.stringify
log(`[E] JS preprocessing for ${queryId} returned a non-JSON: ${userCodeResult.slice(0, 100)}. Ignoring.`);
}
} catch (e) {
log(`[E] JS preprocessing for ${queryId} failed: ${e.toString()}`);
log(e);
ctx.warnings.push(`JS preprocessing failed: ${e.toString()}`);
} finally {
preProcessingComplete = true;
}
}

await Promise.race([
doPreprocessing(),

// In case isolated-vm timeout doesn't work
sleep(hardTimeout).then(() => {
if (!preProcessingComplete) {
log(`[E] Past ${hardTimeout/1000} second timeout, force-disposing isolate`);
isolate.dispose();
}
})
]);

let endTime = process.hrtime.bigint();
let timeTaken = Number(endTime - startTime) / 1e9;
log(`[+] JS preprocessing for ${queryId} took ${timeTaken.toFixed(3)} seconds, cpuTime: ${isolate.cpuTime}, wallTime: ${isolate.wallTime}.`);

return result;
}
12 changes: 12 additions & 0 deletions db-tabulator/test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import assert = require("assert");
import {NoMetadataStore} from "./NoMetadataStore";
import {Template} from "../../mwn/build/wikitext";
import {MwnDate} from "../../mwn";
import {applyJsPreprocessing} from "./preprocess";

describe('db-tabulator', () => {

Expand All @@ -27,4 +28,15 @@ describe('db-tabulator', () => {
assert.strictEqual(isUpdateDue(new bot.date().subtract(40, 'hour'), 2), true);
});

it('applyJsPreprocessing', async () => {
console.log(await applyJsPreprocessing(
[{id: '1', name: 'Main Page'}, {id: '2', name: "Talk:Main Page"}],
`function preprocess(rows) {
rows.forEach(row => {
row.id = parseInt(row.id) + 100;
})
return rows;
}`, 'test', { warnings: [], needsForceKill: false }));
})

});
14 changes: 11 additions & 3 deletions db-tabulator/web-endpoint.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
import * as express from "express";
import {checkShutoff, fetchQueriesForPage, metadataStore, processQueriesForPage, SHUTOFF_PAGE, TEMPLATE, SUBSCRIPTIONS_CATEGORY} from "./app";
import {
checkShutoff,
fetchQueriesForPage,
metadataStore,
SHUTOFF_PAGE,
TEMPLATE,
SUBSCRIPTIONS_CATEGORY,
processQueries
} from "./app";
import { createLogStream, mapPath } from "../utils";
import {bot, enwikidb} from "../botbase";
import {getRedisInstance} from "../redis";
Expand Down Expand Up @@ -78,8 +86,8 @@ router.get('/', async function (req, res, next) {
});
if (queries) {
log(`Started processing ${page}`);
try { // should never throw but still ...
await processQueriesForPage(queries);
try {
await processQueries({[page]: queries});
} finally {
redis.srem(redisKey, pgKey).catch(handleRedisError);
}
Expand Down
2 changes: 1 addition & 1 deletion jobs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
- {"schedule": "5 0 * * *", "name": "job-g13watch", mem: "256Mi", "command": "~/SDZeroBot/job reports/g13-watch/g13-watch.js", "image": "node18", "emails": "onfailure"}
- {"schedule": "15,45 * * * *", "name": "stream-check", mem: "128Mi", "command": "~/SDZeroBot/job eventstream-router/check.js", "image": "node18", "emails": "onfailure"}
- {"schedule": "20 * * * *", "name": "bot-monitor", mem: "256Mi", "command": "~/SDZeroBot/job bot-monitor/main.js", "image": "node18", "emails": "onfailure"}
- {"schedule": "25 1,5,9,13,17,21 * * *", "name": "db-tabulator", mem: "256Mi", "command": "~/SDZeroBot/job db-tabulator/main.js", "image": "node18", "emails": "onfailure"}
- {"schedule": "25 1,5,9,13,17,21 * * *", "name": "db-tabulator", mem: "512Mi", "command": "~/SDZeroBot/job db-tabulator/main.js", "image": "node18", "emails": "onfailure"}
- {"schedule": "8 16 * * *", "name": "gans-list", mem: "256Mi", "command": "~/SDZeroBot/job most-gans/gans-lister.js", "image": "node18", "emails": "onfailure"}
- {"schedule": "0 3 1 * *", "name": "gans-users", mem: "256Mi", "command": "~/SDZeroBot/job most-gans/update-entries.js", "image": "node18", "emails": "onfailure"}
- {"schedule": "0 4 * * *", "name": "shells", mem: "128Mi", "command": "~/SDZeroBot/job terminate-shell-pods.js", "image": "node18", "emails": "onfailure"}
7 changes: 6 additions & 1 deletion utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { spawn } from "child_process";
import { ENWIKI_DB_HOST, TOOLS_DB_HOST } from "./db";
import { REDIS_HOST } from "./redis";

export function readFile(file) {
export function readFile(file: string) {
try {
return fs.readFileSync(file).toString();
} catch (e) {
Expand Down Expand Up @@ -109,6 +109,11 @@ export function stringifyObject(obj) {
}
}

export function stripOuterNowikis(str: string): string {
return str.replace(/^\s*<nowiki ?>/, '')
.replace(/<\/nowiki ?>\s*$/, '');
}

export function makeSentence(list: string[]) {
var text = '';
for (let i = 0; i < list.length; i++) {
Expand Down

0 comments on commit cee29ed

Please sign in to comment.