forked from spencermountain/dumpster-dive
-
Notifications
You must be signed in to change notification settings - Fork 0
/
scratch.js
30 lines (27 loc) · 869 Bytes
/
scratch.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
const dumpster = require('./src');
const drop = require('./src/lib/drop-db');
//144mb → 2.5 minutes = 57mb per worker per minute
const path = '/Users/spencer/data/wikipedia/enwiki-latest-pages-articles.xml';
// const path = '/Users/spencer/data/wikipedia/simplewiki-latest-pages-articles.xml'
// const path = './tests/smallwiki-latest-pages-articles.xml'; //3s
// const path = './tests/tinywiki-latest-pages-articles.xml'; //2s
const dbName = path.match(/\/([a-z-]+)-latest-pages/)[1];
const options = {
file: path,
db: dbName
// skip_redirects: false,
// skip_disambig: false,
// missing_templates: true,
// custom: function(doc) {
// console.log('+++' + doc.title() + '+++');
// return {
// title: doc.title()
// };
// }
// batch_size: 1
// workers: 2
};
//delete all pages
drop(options).then(() => {
dumpster(options);
});