-
Notifications
You must be signed in to change notification settings - Fork 1
/
index.js
91 lines (75 loc) · 2.38 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
const fs = require('fs');
const path = require('path');
const lunr = require('lunr');
const csvx = require('csv-parse');
const esql = require('sql-extra');
const COLUMNS = {code: 'TEXT', name: 'TEXT', scie: 'TEXT', desc: 'TEXT'};
const OPTIONS = {pk: 'code', index: true, tsvector: {
code: 'A', name: 'B', scie: 'B', desc: 'B'
}};
var corpus = new Map();
var index = null;
var ready = null;
function csv() {
return path.join(__dirname, 'index.csv');
}
function sqlCorpus(tab, opt) {
return esql.setupTable(tab, COLUMNS, corpus.values(), Object.assign({}, OPTIONS, opt));
}
async function sqlCsv(tab, opt) {
var opt = Object.assign({}, OPTIONS, opt);
var stream = fs.createReadStream(csv()).pipe(csvx.parse({columns: true, comment: '#'}));
var a = esql.createTable(tab, COLUMNS, opt);
a = await esql.insertInto.stream(tab, stream, opt, a);
a = esql.setupTable.index(tab, COLUMNS, opt, a);
return a;
}
async function sql(tab='descriptions', opt={}) {
if (index) return sqlCorpus(tab, opt);
return await sqlCsv(tab, opt);
}
function loadCorpus() {
return new Promise((fres) => {
var stream = fs.createReadStream(csv()).pipe(csvx.parse({columns: true, comment: '#'}));
stream.on('data', r => corpus.set(r.code, r));
stream.on('end', fres);
});
}
function createIndex() {
return lunr(function() {
this.ref('code');
this.field('code');
this.field('name');
this.field('scie');
this.field('desc');
for (var r of corpus.values()) {
var {code, name, scie, desc} = r;
name = name.replace(/^(\w+),/g, '$1 $1 $1 $1,');
desc = desc.replace(/\[.*?\]/g, '').replace(/\w+\.\s([\w\',\/\(\)\- ]+)[;\.]?/g, '$1');
desc = desc.replace(/[,\/\(\)\- ]+/g, ' ').trim();
this.add({code, name, scie, desc});
}
});
}
async function load() {
if (ready) await ready;
if (index) return corpus;
ready = loadCorpus();
await ready;
index = createIndex();
return corpus;
}
function descriptions(txt) {
if (!index) { load(); return []; }
var a = [], txt = txt.replace(/\W/g, ' ');
var ms = index.search(txt), max = 0;
for (var m of ms)
max = Math.max(max, Object.keys(m.matchData.metadata).length);
for (var m of ms)
if (Object.keys(m.matchData.metadata).length===max) a.push(corpus.get(m.ref));
return a;
}
descriptions.load = load;
descriptions.csv = csv;
descriptions.sql = sql;
module.exports = descriptions;