-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
eventstream-router: add first task: g13-watch
- Loading branch information
1 parent
59de68f
commit 829ed76
Showing
9 changed files
with
286 additions
and
15 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
import {fs, bot, mysql, argv} from '../botbase'; | ||
import {streamLog} from './utils'; | ||
const {preprocessDraftForExtract} = require('../tasks/commons'); | ||
const TextExtractor = require('../TextExtractor')(bot); | ||
const auth = require('../.auth'); | ||
|
||
let log, pool; | ||
|
||
export async function init() { | ||
log = streamLog.bind(fs.createWriteStream('./g13-watch.out', {flags: 'a', encoding: 'utf8'})); | ||
|
||
log(`[S] Started`); | ||
await bot.getSiteInfo(); | ||
pool = await initDb(); | ||
} | ||
|
||
async function initDb() { | ||
// Create a pool, but almost all the time only one connection will be used | ||
// Each pool connection is released immediately after use | ||
const pool = mysql.createPool({ | ||
host: 'tools.db.svc.eqiad.wmflabs', | ||
user: auth.db_user, | ||
password: auth.db_password, | ||
port: 3306, | ||
database: 's54328__g13watch_p', | ||
waitForConnections: true, | ||
connectionLimit: 5 | ||
}); | ||
|
||
await pool.execute(` | ||
CREATE TABLE IF NOT EXISTS g13( | ||
name VARCHAR(255) UNIQUE, | ||
description VARCHAR(255), | ||
excerpt BLOB, | ||
size INT, | ||
ts TIMESTAMP NOT NULL | ||
) COLLATE 'utf8_unicode_ci' | ||
`); // use utf8_unicode_ci so that MariaDb allows a varchar(255) field to have unique constraint | ||
// max index column size is 767 bytes. 255*3 = 765 bytes with utf8, 255*4 = 1020 bytes with utf8mb4 | ||
|
||
return pool; | ||
} | ||
|
||
export function filter(data) { | ||
return data.wiki === 'enwiki' && | ||
data.type === 'categorize' && | ||
data.title === 'Category:Candidates for speedy deletion as abandoned drafts or AfC submissions'; | ||
} | ||
|
||
export async function worker(data) { | ||
let match = /^\[\[:(.*?)\]\] added/.exec(data.comment); | ||
if (!match) { | ||
return; | ||
} | ||
|
||
let title = match[1]; | ||
// data.timestamp is *seconds* since epoch | ||
// This date object will be passed to db | ||
let ts = data.timestamp ? new bot.date(data.timestamp * 1000) : null; | ||
log(`[+] Page ${title} at ${ts}`); | ||
let pagedata = await bot.read(title, { | ||
prop: 'revisions|description', | ||
rvprop: 'content|size' | ||
}); | ||
let text = pagedata?.revisions?.[0]?.content ?? null; | ||
let size = pagedata?.revisions?.[0].size ?? null; | ||
let desc = pagedata?.description ?? null; | ||
if (desc && desc.size > 255) { | ||
desc = desc.slice(0, 250) + ' ...'; | ||
} | ||
let extract = TextExtractor.getExtract(text, 300, 550, preprocessDraftForExtract); | ||
|
||
let conn; | ||
try { | ||
conn = await pool.getConnection(); | ||
await conn.execute(`INSERT INTO g13 VALUES(?, ?, ?, ?, ?)`, [title, desc, extract, size, ts]); | ||
} catch (err) { | ||
if (err.code === 'ER_DUP_ENTRY') { | ||
log(`[W] ${title} entered category more than once`); | ||
return; | ||
} | ||
log(err); | ||
} finally { | ||
await conn.release(); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
// Type definitions for eventsource 1.1 | ||
// Project: http://github.com/EventSource/eventsource | ||
// Definitions by: Scott Lee Davis <https://github.com/scottleedavis> | ||
// Ali Afroozeh <https://github.com/afroozeh> | ||
// Pedro Gámez <https://github.com/snakedrak> | ||
// Akuukis <https://github.com/Akuukis> | ||
// Definitions: https://github.com/DefinitelyTyped/DefinitelyTyped | ||
|
||
// eventsource uses DOM dependencies which are absent in a browserless environment like node.js. | ||
// to avoid compiler errors this monkey patch is used. See more details in: | ||
// - sinon: https://github.com/DefinitelyTyped/DefinitelyTyped/issues/11351 | ||
// - rxjs: https://github.com/ReactiveX/rxjs/issues/1986 | ||
/// <reference path="./dom-monkeypatch.d.ts" /> | ||
|
||
declare class EventSource { | ||
static readonly CLOSED: number; | ||
static readonly CONNECTING: number; | ||
static readonly OPEN: number; | ||
|
||
constructor(url: string, eventSourceInitDict?: EventSource.EventSourceInitDict); | ||
|
||
readonly CLOSED: number; | ||
readonly CONNECTING: number; | ||
readonly OPEN: number; | ||
readonly url: string; | ||
readonly readyState: number; | ||
readonly withCredentials: boolean; | ||
onopen: (evt: MessageEvent) => any; | ||
onmessage: (evt: MessageEvent) => any; | ||
onerror: (evt: MessageEvent) => any; | ||
addEventListener(type: string, listener: EventListener): void; | ||
dispatchEvent(evt: Event): boolean; | ||
removeEventListener(type: string, listener?: EventListener): void; | ||
close(): void; | ||
} | ||
|
||
declare namespace EventSource { | ||
enum ReadyState { CONNECTING = 0, OPEN = 1, CLOSED = 2 } | ||
|
||
interface EventSourceInitDict { | ||
withCredentials?: boolean; | ||
headers?: object; | ||
proxy?: string; | ||
https?: object; | ||
rejectUnauthorized?: boolean; | ||
} | ||
} | ||
|
||
export = EventSource; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
{ | ||
"scripts": { | ||
"start": "jstart -N stream -mem 2g ~/bin/node ~/SDZeroBot/eventstream-router/main.js", | ||
"stop": "jstop stream", | ||
"restart": "npm run stop && sleep 10 && npm run start" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
{ | ||
"compilerOptions": { | ||
"target": "ES2020", | ||
"lib": [ | ||
"ES2020" | ||
], | ||
"module": "CommonJS", | ||
"types": ["node", "mocha"], | ||
"sourceMap": true | ||
}, | ||
"ts-node": { | ||
"logError": true | ||
} | ||
} |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters