Skip to content

Commit

Permalink
add generalised purge task, archive afc-specific purge task
Browse files Browse the repository at this point in the history
  • Loading branch information
siddharthvp committed Jan 1, 2024
1 parent 9023daa commit d812883
Show file tree
Hide file tree
Showing 6 changed files with 191 additions and 2 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/restart-services.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ jobs:
update:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- uses: garygrossgarten/github-action-ssh@915e492551885a89131e21d85f2e043c96abff80
with:
command: >-
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/toolforge-deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ jobs:
deploy:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- uses: garygrossgarten/github-action-ssh@915e492551885a89131e21d85f2e043c96abff80
with:
command: >-
Expand Down
2 changes: 2 additions & 0 deletions eventstream-router/main.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,15 @@ import dbTabulator from "../db-tabulator/eventstream-trigger";
import dbTabulatorMetadata from "../db-tabulator/eventstream-metadata-maintainer";
import shutoffsMonitor from "./routes/shutoffs-monitor";
import dykCountsTask from "./routes/dyk-counts";
import purger from "./routes/purger"

const routeClasses = [
gans,
dykCountsTask,
botActivityMonitor,
dbTabulatorMetadata,
shutoffsMonitor,
purger,
];

// debugging a single route example: -r "./test"
Expand Down
127 changes: 127 additions & 0 deletions eventstream-router/routes/purger.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
import {Route} from "../app";
import {RecentChangeStreamEvent} from "../RecentChangeStreamEvent";
import {bot} from "../../botbase";
import {arrayChunk, setDifference} from "../../utils";
import {Template} from "mwn/build/wikitext";
import {ActionQueue, BufferedQueue} from "../../queue";
import {ApiPurgeParams} from "mwn/build/api_params";

export default class Purger extends Route {
readonly name = "purger";

readonly CONF_PAGE = "User:SDZeroBot/Purge list";

scheduledPurges: Map<PurgeEntry, NodeJS.Timeout> = new Map();

purgeBatchQueue = new BufferedQueue<PurgeEntry>(2000, this.queuePurgeRequest.bind(this));
purgeRequestQueue = new ActionQueue<ApiPurgeParams>(1, this.executePurgeRequest.bind(this));

async init() {
super.init();
const entries = await this.parseEntries();
this.registerChanges(entries, new Set(), true);
}

filter(data: RecentChangeStreamEvent): boolean {
return data.wiki === 'enwiki' && data.title === this.CONF_PAGE;
}

async worker(data: RecentChangeStreamEvent) {
const entries = await this.parseEntries();

const addedEntries = setDifference(entries, new Set(this.scheduledPurges.keys()));
const removedEntries = setDifference(new Set(this.scheduledPurges.keys()), entries);

this.registerChanges(addedEntries, removedEntries);
}

registerChanges(addedEntries: Set<PurgeEntry>, removedEntries: Set<PurgeEntry>, onRestart = false) {
for (let entry of removedEntries) {
clearInterval(this.scheduledPurges.get(entry));
this.scheduledPurges.delete(entry);
}
for (let entry of addedEntries) {
if (!Number.isNaN(entry.intervalDays)) {
const interval = entry.intervalDays * 8.64e7;
this.scheduledPurges.set(entry, setInterval(() => this.purgeBatchQueue.push(entry), interval));
} else {
if (!onRestart) {
// no interval, so trigger a one-off purge
this.purgeBatchQueue.push(entry);
}
}
}
// XXX: if there are multiple {{database report}}s on a page, update of one would trigger unnecessary
// one-off purges of pages in other query results.
// If we purge only newly added links, we may miss pages which actually need to be re-purged.
}

async queuePurgeRequest(entries: Array<PurgeEntry>) {
// 4 permutations
[
entries.filter(e => e.forceLinkUpdate && e.forceRecursiveLinkUpdate),
entries.filter(e => e.forceLinkUpdate && !e.forceRecursiveLinkUpdate),
entries.filter(e => !e.forceLinkUpdate && !e.forceRecursiveLinkUpdate),
entries.filter(e => !e.forceLinkUpdate && e.forceRecursiveLinkUpdate),
].forEach(batch => {
const subBatches = arrayChunk(batch, 100);
subBatches.forEach(subBatch => {
this.purgeRequestQueue.push({
action: 'purge',
titles: subBatch.map(e => e.page),
forcelinkupdate: subBatch[0].forceLinkUpdate,
forcerecursivelinkupdate: subBatch[0].forceRecursiveLinkUpdate
});
});
});
}

async executePurgeRequest(purgeParams: ApiPurgeParams) {
try {
await bot.request(purgeParams);
this.log(`[V] Purged titles ${purgeParams.titles}`);
this.log(`[+] Purged batch of ${purgeParams.titles.length} pages`);
await bot.sleep(2000); // Sleep interval between successive purges
} catch (e) {
this.log(`[V] Failed to purge titles ${purgeParams.titles}`);
this.log(`[E] Failed to purge batch of ${purgeParams.titles.length} pages`);
this.log(e);
}
}

async parseEntries() {
const text = (await bot.read(this.CONF_PAGE)).revisions[0].content;
const entries = bot.Wikitext.parseTemplates(text, {
namePredicate: name => name === '/purge'
});
this.log(`[V] Parsed ${entries.length} titles from ${this.CONF_PAGE}`);

const existingEntries = Object.fromEntries(
[...this.scheduledPurges.keys()].map(e => [e.serialize(), e])
);
return new Set(entries.map(e => {
const entry = new PurgeEntry(e);
// return reference to existing entry if present, as that facilitates easy setDifference
return existingEntries[entry.serialize()] ?? entry;
}));
}

}

class PurgeEntry {
page: string
intervalDays: number
forceLinkUpdate: boolean
forceRecursiveLinkUpdate: boolean
constructor(entry: Template) {
this.page = entry.getParam(1).value;
this.intervalDays = parseInt(entry.getParam('interval')?.value);

// any non-empty value represents true!
this.forceLinkUpdate = Boolean(entry.getParam('forcelinkupdate')?.value);
this.forceRecursiveLinkUpdate = Boolean(entry.getParam('forcerecursivelinkupdate')?.value);
}
serialize() {
return `${this.page}__${this.intervalDays}__${this.forceLinkUpdate}__${this.forceRecursiveLinkUpdate}`;
}
}
File renamed without changes.
60 changes: 60 additions & 0 deletions queue.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/**
* Queue for actions to be performed with a limited concurrency.
*/
export class ActionQueue<T> {
action: (e: T) => Promise<any>;
parallelism: number;
pendingQueue: Array<T> = [];
running = 0;

constructor(parallelism: number, action: (e: T) => Promise<any>) {
this.parallelism = parallelism;
this.action = action;
}

push(e: T) {
this.pendingQueue.push(e);
this.trigger();
}

trigger() {
while (this.running < this.parallelism && this.pendingQueue.length) {
const element = this.pendingQueue.shift();
this.running++;
Promise.resolve(this.action(element)).finally(() => {
this.running--;
this.trigger();
});
}
}

}

/**
* Queue for items occurring together in time to be grouped into batches.
*/
export class BufferedQueue<T> {
duration: number;
currentBatch: Array<T> = [];
currentBatchTimeout: NodeJS.Timeout;
batchConsumer: (batch: Array<T>) => Promise<any>;

constructor(duration: number, batchConsumer: (batch: Array<T>) => Promise<any>) {
this.duration = duration;
this.batchConsumer = batchConsumer;
}

push(e: T) {
this.currentBatch.push(e);
if (this.currentBatchTimeout) {
clearTimeout(this.currentBatchTimeout);
}
this.currentBatchTimeout = setTimeout(this.finalizeBatch.bind(this), this.duration)
}

finalizeBatch() {
this.batchConsumer(this.currentBatch)
this.currentBatch = [];
clearTimeout(this.currentBatchTimeout);
}
}

0 comments on commit d812883

Please sign in to comment.