Skip to content

Commit

Permalink
add web endpoint and cronjob to track category sizes
Browse files Browse the repository at this point in the history
  • Loading branch information
siddharthvp committed Nov 4, 2024
1 parent c05478d commit d8b3114
Show file tree
Hide file tree
Showing 8 changed files with 249 additions and 0 deletions.
43 changes: 43 additions & 0 deletions category-counts/main.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import {bot, log} from "../botbase";
import {ApiQueryCategoryInfoParams} from "types-mediawiki/api_params";
import {ElasticDataStore} from "../elasticsearch";
import {getKey, normalizeCategory} from "./util";

(async function () {
const countStore = new ElasticDataStore('category-counts-enwiki');
await bot.getTokensAndSiteInfo();

const pg = await bot.read('User:SDZeroBot/Category counter');
const text = pg.revisions[0].content;

const templates = new bot.Wikitext(text).parseTemplates({
namePredicate: name => name === 'User:SDZeroBot/Category counter/cat',
});

const names = templates.map(t => t.getParam(1).value);
const namesNorm = names.map(name => normalizeCategory(name)).filter(Boolean);

for await (let json of bot.massQueryGen({
action: 'query',
titles: namesNorm,
prop: 'categoryinfo'
} as ApiQueryCategoryInfoParams)) {

for (let pg of json.query.pages) {
if (pg.missing) continue;

const count = pg.categoryinfo.size;
const date = new bot.Date().format('YYYY-MM-DD', 'utc');

const key = getKey(pg.title)
try {
await countStore.append(key, {
[date]: count
});
} catch (e) {
log(`[E] Failed to insert count of ${count} for ${key}`);
log(e);
}
}
}
})();
20 changes: 20 additions & 0 deletions category-counts/util.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import {bot} from "../botbase";
import {NS_CATEGORY} from "../namespaces";

export function normalizeCategory(name: string) {
if (!name) {
return null;
}
const title = bot.Title.newFromText(name, NS_CATEGORY);
if (title) {
return title.toText();
}
return null;
}

/**
* Pass in validated category names only.
*/
export function getKey(category: string) {
return bot.Title.newFromText(category, NS_CATEGORY).getMain();
}
30 changes: 30 additions & 0 deletions category-counts/web-endpoint.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import * as express from "express";
import 'express-async-errors';
import {ElasticDataStore} from "../elasticsearch";
import {getKey, normalizeCategory} from "./util";

const router = express.Router();

const countStore = new ElasticDataStore('category-counts-enwiki');

router.get('/raw', async (req, res) => {
let category = normalizeCategory(req.query.category as string);
if (!category) {
return res.status(400).render('webservice/views/oneline', {
text: 'Missing URL parameter "category"'
})
}
const key = getKey(category);

if (!await countStore.exists(key)) { // TODO: optimize away this query
return res.status(404).render('webservice/views/oneline', {
text: 'No data found for [[' + category + ']]'
});
}

const result = await countStore.get(key);

return res.status(200).type('json').send(result);
});

export default router;
65 changes: 65 additions & 0 deletions elasticsearch.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import {Client} from "@elastic/elasticsearch";
import {onToolforge} from "./utils";
import {AuthManager} from "./botbase";
import * as RequestParams from "@elastic/elasticsearch/api/requestParams";

export const elastic = new Client({
node: onToolforge() ? 'http://elasticsearch.svc.tools.eqiad1.wikimedia.cloud:80' : 'http://localhost:9200/',
auth: onToolforge() ? AuthManager.get('elasticsearch') : {},
});

export const cirrus = new Client({
node: onToolforge() ? 'https://cloudelastic.wikimedia.org:8243/': 'http://localhost:4719',
});

export class ElasticDataStore {
private readonly index: string;
constructor(index: string) {
this.index = index;
}
async get(id: string, field?: string) {
const query: RequestParams.Get = {
index: this.index,
id: id
}
if (field) {
query._source = [field];
}
return elastic.get(query).then(result => result.body._source);
}
async create(id: string, body: any) {
await elastic.index({
index: this.index,
id: id,
body: body
});
}
async exists(id: string) {
return elastic.exists({
index: this.index,
id: id,
}).then(result => result.body);
}
async update(id: string, body: any) {
await elastic.update({
index: this.index,
id: id,
body: {
doc: body
}
});
}
async append(id: string, body: any) {
if (!await this.exists(id)) {
await this.create(id, body);
} else {
await this.update(id, body);
}
}
async delete(id: string) {
await elastic.delete({
index: this.index,
id: id
});
}
}
1 change: 1 addition & 0 deletions jobs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,4 @@
- {"schedule": "25 1,5,9,13,17,21 * * *", "name": "db-tabulator", mem: "512Mi", "command": "~/SDZeroBot/job db-tabulator/main.js", "image": "node18", "emails": "onfailure"}
- {"schedule": "8 16 * * *", "name": "gans-list", mem: "256Mi", "command": "~/SDZeroBot/job most-gans/gans-lister.js", "image": "node18", "emails": "onfailure"}
- {"schedule": "0 4 * * *", "name": "shells", mem: "128Mi", "command": "~/SDZeroBot/job terminate-shell-pods.js", "image": "node18", "emails": "onfailure"}
- {"schedule": "15 1 * * *", "name": "cat-count", mem: "256Mi", "command": "~/SDZeroBot/job category-counter/main.js", "image": "node18", "emails": "onfailure"}
87 changes: 87 additions & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
"url": "github.com/siddharthvp/SDZeroBot"
},
"dependencies": {
"@elastic/elasticsearch": "^7.17.14",
"@kubernetes/client-node": "^0.18.1",
"@types/async-redis": "^1.1.1",
"@types/cookie-parser": "^1.4.2",
Expand Down
2 changes: 2 additions & 0 deletions webservice/route-registry.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import gitsync from "./routes/gitsync";
import botMonitorRouter from '../bot-monitor/web-endpoint'
import gitlabRouter from './routes/gitlab';
import autoSqlRouter from "../db-tabulator/autosql/web-endpoint";
import categoryCountRouter from "../category-counts/web-endpoint";

export function registerRoutes(app: express.Router) {
app.use('/', indexRouter);
Expand All @@ -25,4 +26,5 @@ export function registerRoutes(app: express.Router) {
app.use('/gitsync', gitsync);
app.use('/bot-monitor', botMonitorRouter);
app.use('/gitlab', gitlabRouter);
app.use('/category-counts', categoryCountRouter);
}

0 comments on commit d8b3114

Please sign in to comment.