From d8b3114bf68ab27050af52f80831326d281c2038 Mon Sep 17 00:00:00 2001 From: Siddharth VP Date: Mon, 4 Nov 2024 23:14:20 +0530 Subject: [PATCH] add web endpoint and cronjob to track category sizes --- category-counts/main.ts | 43 ++++++++++++++++ category-counts/util.ts | 20 ++++++++ category-counts/web-endpoint.ts | 30 ++++++++++++ elasticsearch.ts | 65 ++++++++++++++++++++++++ jobs.yml | 1 + package-lock.json | 87 +++++++++++++++++++++++++++++++++ package.json | 1 + webservice/route-registry.ts | 2 + 8 files changed, 249 insertions(+) create mode 100644 category-counts/main.ts create mode 100644 category-counts/util.ts create mode 100644 category-counts/web-endpoint.ts create mode 100644 elasticsearch.ts diff --git a/category-counts/main.ts b/category-counts/main.ts new file mode 100644 index 0000000..2317a78 --- /dev/null +++ b/category-counts/main.ts @@ -0,0 +1,43 @@ +import {bot, log} from "../botbase"; +import {ApiQueryCategoryInfoParams} from "types-mediawiki/api_params"; +import {ElasticDataStore} from "../elasticsearch"; +import {getKey, normalizeCategory} from "./util"; + +(async function () { + const countStore = new ElasticDataStore('category-counts-enwiki'); + await bot.getTokensAndSiteInfo(); + + const pg = await bot.read('User:SDZeroBot/Category counter'); + const text = pg.revisions[0].content; + + const templates = new bot.Wikitext(text).parseTemplates({ + namePredicate: name => name === 'User:SDZeroBot/Category counter/cat', + }); + + const names = templates.map(t => t.getParam(1).value); + const namesNorm = names.map(name => normalizeCategory(name)).filter(Boolean); + + for await (let json of bot.massQueryGen({ + action: 'query', + titles: namesNorm, + prop: 'categoryinfo' + } as ApiQueryCategoryInfoParams)) { + + for (let pg of json.query.pages) { + if (pg.missing) continue; + + const count = pg.categoryinfo.size; + const date = new bot.Date().format('YYYY-MM-DD', 'utc'); + + const key = getKey(pg.title) + try { + await countStore.append(key, { + [date]: count + }); + } catch (e) { + log(`[E] Failed to insert count of ${count} for ${key}`); + log(e); + } + } + } +})(); diff --git a/category-counts/util.ts b/category-counts/util.ts new file mode 100644 index 0000000..9b13f3e --- /dev/null +++ b/category-counts/util.ts @@ -0,0 +1,20 @@ +import {bot} from "../botbase"; +import {NS_CATEGORY} from "../namespaces"; + +export function normalizeCategory(name: string) { + if (!name) { + return null; + } + const title = bot.Title.newFromText(name, NS_CATEGORY); + if (title) { + return title.toText(); + } + return null; +} + +/** + * Pass in validated category names only. + */ +export function getKey(category: string) { + return bot.Title.newFromText(category, NS_CATEGORY).getMain(); +} diff --git a/category-counts/web-endpoint.ts b/category-counts/web-endpoint.ts new file mode 100644 index 0000000..fb8f87c --- /dev/null +++ b/category-counts/web-endpoint.ts @@ -0,0 +1,30 @@ +import * as express from "express"; +import 'express-async-errors'; +import {ElasticDataStore} from "../elasticsearch"; +import {getKey, normalizeCategory} from "./util"; + +const router = express.Router(); + +const countStore = new ElasticDataStore('category-counts-enwiki'); + +router.get('/raw', async (req, res) => { + let category = normalizeCategory(req.query.category as string); + if (!category) { + return res.status(400).render('webservice/views/oneline', { + text: 'Missing URL parameter "category"' + }) + } + const key = getKey(category); + + if (!await countStore.exists(key)) { // TODO: optimize away this query + return res.status(404).render('webservice/views/oneline', { + text: 'No data found for [[' + category + ']]' + }); + } + + const result = await countStore.get(key); + + return res.status(200).type('json').send(result); +}); + +export default router; diff --git a/elasticsearch.ts b/elasticsearch.ts new file mode 100644 index 0000000..405f2e7 --- /dev/null +++ b/elasticsearch.ts @@ -0,0 +1,65 @@ +import {Client} from "@elastic/elasticsearch"; +import {onToolforge} from "./utils"; +import {AuthManager} from "./botbase"; +import * as RequestParams from "@elastic/elasticsearch/api/requestParams"; + +export const elastic = new Client({ + node: onToolforge() ? 'http://elasticsearch.svc.tools.eqiad1.wikimedia.cloud:80' : 'http://localhost:9200/', + auth: onToolforge() ? AuthManager.get('elasticsearch') : {}, +}); + +export const cirrus = new Client({ + node: onToolforge() ? 'https://cloudelastic.wikimedia.org:8243/': 'http://localhost:4719', +}); + +export class ElasticDataStore { + private readonly index: string; + constructor(index: string) { + this.index = index; + } + async get(id: string, field?: string) { + const query: RequestParams.Get = { + index: this.index, + id: id + } + if (field) { + query._source = [field]; + } + return elastic.get(query).then(result => result.body._source); + } + async create(id: string, body: any) { + await elastic.index({ + index: this.index, + id: id, + body: body + }); + } + async exists(id: string) { + return elastic.exists({ + index: this.index, + id: id, + }).then(result => result.body); + } + async update(id: string, body: any) { + await elastic.update({ + index: this.index, + id: id, + body: { + doc: body + } + }); + } + async append(id: string, body: any) { + if (!await this.exists(id)) { + await this.create(id, body); + } else { + await this.update(id, body); + } + } + async delete(id: string) { + await elastic.delete({ + index: this.index, + id: id + }); + } +} diff --git a/jobs.yml b/jobs.yml index 3c69f99..9be49b3 100644 --- a/jobs.yml +++ b/jobs.yml @@ -25,3 +25,4 @@ - {"schedule": "25 1,5,9,13,17,21 * * *", "name": "db-tabulator", mem: "512Mi", "command": "~/SDZeroBot/job db-tabulator/main.js", "image": "node18", "emails": "onfailure"} - {"schedule": "8 16 * * *", "name": "gans-list", mem: "256Mi", "command": "~/SDZeroBot/job most-gans/gans-lister.js", "image": "node18", "emails": "onfailure"} - {"schedule": "0 4 * * *", "name": "shells", mem: "128Mi", "command": "~/SDZeroBot/job terminate-shell-pods.js", "image": "node18", "emails": "onfailure"} +- {"schedule": "15 1 * * *", "name": "cat-count", mem: "256Mi", "command": "~/SDZeroBot/job category-counter/main.js", "image": "node18", "emails": "onfailure"} diff --git a/package-lock.json b/package-lock.json index f996ee6..32848c0 100644 --- a/package-lock.json +++ b/package-lock.json @@ -9,6 +9,7 @@ "version": "1.0.0", "license": "MIT", "dependencies": { + "@elastic/elasticsearch": "^7.17.14", "@kubernetes/client-node": "^0.18.1", "@types/async-redis": "^1.1.1", "@types/cookie-parser": "^1.4.2", @@ -92,6 +93,44 @@ "node": ">=6.0.0" } }, + "node_modules/@elastic/elasticsearch": { + "version": "7.17.14", + "resolved": "https://registry.npmjs.org/@elastic/elasticsearch/-/elasticsearch-7.17.14.tgz", + "integrity": "sha512-6uQ1pVXutwz1Krwooo67W+3K8BwH1ASMh1WoHTpomUzw8EXecXN5lHIJ9EPqTHuv1WqR2LKkSJyagcq0HYUJpg==", + "license": "Apache-2.0", + "dependencies": { + "debug": "^4.3.1", + "hpagent": "^0.1.1", + "ms": "^2.1.3", + "secure-json-parse": "^2.4.0" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/@elastic/elasticsearch/node_modules/debug": { + "version": "4.3.7", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.7.tgz", + "integrity": "sha512-Er2nc/H7RrMXZBFCEim6TCmMk02Z8vLC2Rbi1KEBggpo0fS6l0S1nnapwmIi3yW/+GOJap1Krg4w0Hg80oCqgQ==", + "license": "MIT", + "dependencies": { + "ms": "^2.1.3" + }, + "engines": { + "node": ">=6.0" + }, + "peerDependenciesMeta": { + "supports-color": { + "optional": true + } + } + }, + "node_modules/@elastic/elasticsearch/node_modules/ms": { + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", + "license": "MIT" + }, "node_modules/@eslint/eslintrc": { "version": "0.4.0", "resolved": "https://registry.npmjs.org/@eslint/eslintrc/-/eslintrc-0.4.0.tgz", @@ -3034,6 +3073,12 @@ "he": "bin/he" } }, + "node_modules/hpagent": { + "version": "0.1.2", + "resolved": "https://registry.npmjs.org/hpagent/-/hpagent-0.1.2.tgz", + "integrity": "sha512-ePqFXHtSQWAFXYmj+JtOTHr84iNrII4/QRlAAPPE+zqnKy4xJo7Ie1Y4kC7AdB+LxLxSTTzBMASsEcy0q8YyvQ==", + "license": "MIT" + }, "node_modules/html-encoding-sniffer": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/html-encoding-sniffer/-/html-encoding-sniffer-2.0.1.tgz", @@ -5401,6 +5446,12 @@ "node": ">=10" } }, + "node_modules/secure-json-parse": { + "version": "2.7.0", + "resolved": "https://registry.npmjs.org/secure-json-parse/-/secure-json-parse-2.7.0.tgz", + "integrity": "sha512-6aU+Rwsezw7VR8/nyvKTx8QpWH9FrcYiXXlqC4z5d5XQBDRqtbfsRjnwGyqbi3gddNtWHuEk9OANUotL26qKUw==", + "license": "BSD-3-Clause" + }, "node_modules/semver": { "version": "5.7.2", "resolved": "https://registry.npmjs.org/semver/-/semver-5.7.2.tgz", @@ -6780,6 +6831,32 @@ "resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.24.5.tgz", "integrity": "sha512-EOv5IK8arwh3LI47dz1b0tKUb/1uhHAnHJOrjgtQMIpu1uXd9mlFrJg9IUgGUgZ41Ch0K8REPTYpO7B76b4vJg==" }, + "@elastic/elasticsearch": { + "version": "7.17.14", + "resolved": "https://registry.npmjs.org/@elastic/elasticsearch/-/elasticsearch-7.17.14.tgz", + "integrity": "sha512-6uQ1pVXutwz1Krwooo67W+3K8BwH1ASMh1WoHTpomUzw8EXecXN5lHIJ9EPqTHuv1WqR2LKkSJyagcq0HYUJpg==", + "requires": { + "debug": "^4.3.1", + "hpagent": "^0.1.1", + "ms": "^2.1.3", + "secure-json-parse": "^2.4.0" + }, + "dependencies": { + "debug": { + "version": "4.3.7", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.7.tgz", + "integrity": "sha512-Er2nc/H7RrMXZBFCEim6TCmMk02Z8vLC2Rbi1KEBggpo0fS6l0S1nnapwmIi3yW/+GOJap1Krg4w0Hg80oCqgQ==", + "requires": { + "ms": "^2.1.3" + } + }, + "ms": { + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==" + } + } + }, "@eslint/eslintrc": { "version": "0.4.0", "resolved": "https://registry.npmjs.org/@eslint/eslintrc/-/eslintrc-0.4.0.tgz", @@ -9095,6 +9172,11 @@ "integrity": "sha512-F/1DnUGPopORZi0ni+CvrCgHQ5FyEAHRLSApuYWMmrbSwoN2Mn/7k+Gl38gJnR7yyDZk6WLXwiGod1JOWNDKGw==", "dev": true }, + "hpagent": { + "version": "0.1.2", + "resolved": "https://registry.npmjs.org/hpagent/-/hpagent-0.1.2.tgz", + "integrity": "sha512-ePqFXHtSQWAFXYmj+JtOTHr84iNrII4/QRlAAPPE+zqnKy4xJo7Ie1Y4kC7AdB+LxLxSTTzBMASsEcy0q8YyvQ==" + }, "html-encoding-sniffer": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/html-encoding-sniffer/-/html-encoding-sniffer-2.0.1.tgz", @@ -10908,6 +10990,11 @@ "xmlchars": "^2.2.0" } }, + "secure-json-parse": { + "version": "2.7.0", + "resolved": "https://registry.npmjs.org/secure-json-parse/-/secure-json-parse-2.7.0.tgz", + "integrity": "sha512-6aU+Rwsezw7VR8/nyvKTx8QpWH9FrcYiXXlqC4z5d5XQBDRqtbfsRjnwGyqbi3gddNtWHuEk9OANUotL26qKUw==" + }, "semver": { "version": "5.7.2", "resolved": "https://registry.npmjs.org/semver/-/semver-5.7.2.tgz", diff --git a/package.json b/package.json index ef3c701..6d21ab9 100644 --- a/package.json +++ b/package.json @@ -14,6 +14,7 @@ "url": "github.com/siddharthvp/SDZeroBot" }, "dependencies": { + "@elastic/elasticsearch": "^7.17.14", "@kubernetes/client-node": "^0.18.1", "@types/async-redis": "^1.1.1", "@types/cookie-parser": "^1.4.2", diff --git a/webservice/route-registry.ts b/webservice/route-registry.ts index 7672530..a260ca1 100644 --- a/webservice/route-registry.ts +++ b/webservice/route-registry.ts @@ -11,6 +11,7 @@ import gitsync from "./routes/gitsync"; import botMonitorRouter from '../bot-monitor/web-endpoint' import gitlabRouter from './routes/gitlab'; import autoSqlRouter from "../db-tabulator/autosql/web-endpoint"; +import categoryCountRouter from "../category-counts/web-endpoint"; export function registerRoutes(app: express.Router) { app.use('/', indexRouter); @@ -25,4 +26,5 @@ export function registerRoutes(app: express.Router) { app.use('/gitsync', gitsync); app.use('/bot-monitor', botMonitorRouter); app.use('/gitlab', gitlabRouter); + app.use('/category-counts', categoryCountRouter); }