From b796bc06554ee86c1ef583bcc51c9241ff9ea3bd Mon Sep 17 00:00:00 2001 From: Allan Chain <36528777+AllanChain@users.noreply.github.com> Date: Thu, 7 Nov 2024 16:29:12 +0800 Subject: [PATCH] feat(cite): enable reference autocomplete Adapted from https://github.com/BYRIO/overleaf/ --- develop/dev.env | 1 + develop/docker-compose.dev.yml | 11 + develop/docker-compose.yml | 8 + server-ce/runit/references-overleaf/run | 9 + services/references/Dockerfile | 24 ++ services/references/app.js | 40 ++++ services/references/app/js/BibParser.js | 209 ++++++++++++++++++ .../app/js/ReferencesAPIController.js | 36 +++ .../references/config/settings.defaults.cjs | 9 + services/references/package.json | 35 +++ services/web/config/settings.defaults.js | 3 + 11 files changed, 385 insertions(+) create mode 100755 server-ce/runit/references-overleaf/run create mode 100644 services/references/Dockerfile create mode 100644 services/references/app.js create mode 100644 services/references/app/js/BibParser.js create mode 100644 services/references/app/js/ReferencesAPIController.js create mode 100644 services/references/config/settings.defaults.cjs create mode 100644 services/references/package.json diff --git a/develop/dev.env b/develop/dev.env index f817987f8e..9541863177 100644 --- a/develop/dev.env +++ b/develop/dev.env @@ -18,3 +18,4 @@ WEBPACK_HOST=webpack WEB_API_PASSWORD=overleaf WEB_API_USER=overleaf WEB_HOST=web +REFERENCES_HOST=references diff --git a/develop/docker-compose.dev.yml b/develop/docker-compose.dev.yml index 4432a24162..022d3394e0 100644 --- a/develop/docker-compose.dev.yml +++ b/develop/docker-compose.dev.yml @@ -79,6 +79,17 @@ services: - ../services/history-v1/knexfile.js:/overleaf/services/history-v1/knexfile.js - ../services/history-v1/migrations:/overleaf/services/history-v1/migrations + references: + command: ["node", "--watch", "app.js"] + environment: + - NODE_OPTIONS=--inspect=0.0.0.0:9229 + ports: + - "127.0.0.1:9236:9229" + volumes: + - ../services/references/app:/overleaf/services/references/app + - ../services/references/config:/overleaf/services/references/config + - ../services/references/app.js:/overleaf/services/references/app.js + notifications: command: ["node", "--watch", "app.js"] environment: diff --git a/develop/docker-compose.yml b/develop/docker-compose.yml index f9053c21ce..dfd49b697f 100644 --- a/develop/docker-compose.yml +++ b/develop/docker-compose.yml @@ -96,6 +96,13 @@ services: volumes: - mongo-data:/data/db + references: + build: + context: .. + dockerfile: services/references/Dockerfile + env_file: + - dev.env + notifications: build: context: .. @@ -161,6 +168,7 @@ services: - filestore - history-v1 - notifications + - references - project-history - real-time - spelling diff --git a/server-ce/runit/references-overleaf/run b/server-ce/runit/references-overleaf/run new file mode 100755 index 0000000000..151ac5d323 --- /dev/null +++ b/server-ce/runit/references-overleaf/run @@ -0,0 +1,9 @@ +#!/bin/bash + +NODE_PARAMS="" +if [ "$DEBUG_NODE" == "true" ]; then + echo "running debug - references" + NODE_PARAMS="--inspect=0.0.0.0:30060" +fi + +NODE_CONFIG_DIR=/overleaf/services/references/config exec /sbin/setuser www-data /usr/bin/node $NODE_PARAMS /overleaf/services/references/app.js >> /var/log/overleaf/references.log 2>&1 diff --git a/services/references/Dockerfile b/services/references/Dockerfile new file mode 100644 index 0000000000..8b1087e5fd --- /dev/null +++ b/services/references/Dockerfile @@ -0,0 +1,24 @@ +FROM node:18.20.2 AS base + +WORKDIR /overleaf/services/references + +# Google Cloud Storage needs a writable $HOME/.config for resumable uploads +# (see https://googleapis.dev/nodejs/storage/latest/File.html#createWriteStream) +RUN mkdir /home/node/.config && chown node:node /home/node/.config + +FROM base AS app + +COPY package.json package-lock.json /overleaf/ +COPY services/references/package.json /overleaf/services/references/ +COPY libraries/ /overleaf/libraries/ +COPY patches/ /overleaf/patches/ + +RUN cd /overleaf && npm ci --quiet + +COPY services/references/ /overleaf/services/references/ + +FROM app +USER node + +CMD ["node", "--expose-gc", "app.js"] + diff --git a/services/references/app.js b/services/references/app.js new file mode 100644 index 0000000000..7133e7efa5 --- /dev/null +++ b/services/references/app.js @@ -0,0 +1,40 @@ +import '@overleaf/metrics/initialize.js' + +import express from 'express' +import Settings from '@overleaf/settings' +import logger from '@overleaf/logger' +import metrics from '@overleaf/metrics' +import ReferencesAPIController from './app/js/ReferencesAPIController.js' +import bodyParser from 'body-parser' + +const app = express() +metrics.injectMetricsRoute(app) + +app.use(bodyParser.json({ limit: '2mb' })) +app.use(metrics.http.monitor(logger)) + +app.post('/project/:project_id/index', ReferencesAPIController.index) +app.get('/status', (req, res) => res.send({ status: 'references api is up' })) + +const settings = + Settings.internal && Settings.internal.references + ? Settings.internal.references + : undefined +const host = settings && settings.host ? settings.host : 'localhost' +const port = settings && settings.port ? settings.port : 3006 + +logger.debug('Listening at', { host, port }) + +const server = app.listen(port, host, function (error) { + if (error) { + throw error + } + logger.info({ host, port }, 'references HTTP server starting up') +}) + +process.on('SIGTERM', () => { + server.close(() => { + logger.info({ host, port }, 'references HTTP server closed') + metrics.close() + }) +}) diff --git a/services/references/app/js/BibParser.js b/services/references/app/js/BibParser.js new file mode 100644 index 0000000000..6404794bcd --- /dev/null +++ b/services/references/app/js/BibParser.js @@ -0,0 +1,209 @@ +"use strict"; + +// Grammar implemented here: +// bibtex -> (string | entry)*; +// string -> '@STRING' kv_left key_equals_value kv_right; +// entry -> '@' key kv_left key ',' key_value_list kv_right; +// key_value_list -> key_equals_value (',' key_equals_value)* ','?; +// key_equals_value -> key '=' value; +// value -> value_quotes | value_braces | key; +// value_quotes -> '"' .*? '"'; // not quite +// value_braces -> '{' .*? '"'; // not quite +// kv_left -> '(' | '{' +// kv_right -> ')' | '}' +function BibtexParser() { + this._entries = {}; + this._comments = []; + this._strings = {}; + this.input = ''; + this.config = { + upperKeys: false + }; + this._pos = 0; + var pairs = { + '{': '}', + '(': ')', + '"': '"' + }; + var regs = { + atKey: /@([a-zA-Z0-9_:\\./-]+)\s*/, + enLeft: /^([\{\(])\s*/, + enRight: function enRight(left) { + return new RegExp("^(\\".concat(pairs[left], ")\\s*")); + }, + entryId: /^\s*([^@={}",\s]+)\s*,\s*/, + key: /^([a-zA-Z0-9_:\\./-]+)\s*=\s*/, + vLeft: /^([\{"])\s*/, + vRight: function vRight(left) { + return new RegExp("^(\\".concat(pairs[left], ")\\s*")); + }, + inVLeft: /^(\{)\s*/, + inVRight: function inVRight(left) { + return new RegExp("^(\\".concat(pairs[left], ")\\s*")); + }, + value: /^[\{"]((?:[^\{\}]|\n)*?(?:(?:[^\{\}]|\n)*?\{(?:[^\{\}]|\n)*?\})*?(?:[^\{\}]|\n)*?)[\}"]\s*,?\s*/, + word: /^([^\{\}"\s]+)\s*/, + comma: /^(,)\s*/, + quota: /^(")\s*/ + }; + + this.setInput = function (t) { + this.input = t; + }; + + this.matchFirst = function (reg) { + var notMove = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : false; + var result = this.input.slice(this._pos).match(reg); + + if (result) { + if (!notMove) { + // console.log("!@#!@#", result[1]); + this._pos += result.index + result[0].length; + } + + return { + success: true, + text: result[1], + index: result.index, + step: result[0].length + }; + } else { + return { + success: false + }; + } + }; + + this.assert = function (obj) { + for (var key in obj) { + if (obj[key] === undefined) { + throw "[BibParser:ERROR] ".concat(key, " not found at ").concat(this._pos); + } + } + }; + + this.getValue = function () { + var stack = []; + var values = []; + + var _this$matchFirst = this.matchFirst(regs.vLeft), + vLeft = _this$matchFirst.text; + + this.assert({ + vLeft: vLeft + }); + stack.push(vLeft); + + while (stack.length > 0) { + if (this.matchFirst(regs.inVLeft, true).success) { + var _this$matchFirst2 = this.matchFirst(regs.inVLeft), + inVLeft = _this$matchFirst2.text; + + stack.push(inVLeft); + values.push(inVLeft); + } else if (this.matchFirst(regs.inVRight(stack[stack.length - 1]), true).success) { + values.push(this.matchFirst(regs.inVRight(stack[stack.length - 1])).text); + stack.pop(); + } else if (this.matchFirst(regs.word, true).success) { + values.push(this.matchFirst(regs.word).text); + } else if (this.matchFirst(regs.quota, true).success) { + values.push(this.matchFirst(regs.quota).text); + } else { + throw "[BibParser:ERROR] stack overflow at ".concat(this._pos); + } + } + + values.pop(); + this.matchFirst(regs.comma); + return values; + }; + + this.string = function () { + var _this$matchFirst3 = this.matchFirst(regs.key), + key = _this$matchFirst3.text; + + this.assert({ + key: key + }); + + var _this$matchFirst4 = this.matchFirst(regs.value), + value = _this$matchFirst4.text; + + this.assert({ + value: value + }); + this._strings[key] = value; + }; + + this.preamble = function () {}; + + this.comment = function () {}; + + this.entry = function (head) { + var _this$matchFirst5 = this.matchFirst(regs.entryId), + entryId = _this$matchFirst5.text; + + this.assert({ + entryId: entryId + }); + var entry = {}; + + while (this.matchFirst(regs.key, true).success) { + var _this$matchFirst6 = this.matchFirst(regs.key), + key = _this$matchFirst6.text; + + var value = this.getValue(); + entry[key] = value.join(' '); // if(key === 'author'){ + // const {text:value} = this.matchFirst(regs.value); + // this.assert({value}); + // entry[key] = value; + // } else { + // const {text:value} = this.matchFirst(regs.value); + // this.assert({value}); + // entry[key] = value; + // } + } + + entry.$type = head; + this._entries[entryId] = entry; + }; + + this.parse = function () { + while (this.matchFirst(regs.atKey, true).success) { + var _this$matchFirst7 = this.matchFirst(regs.atKey), + head = _this$matchFirst7.text; + + var _this$matchFirst8 = this.matchFirst(regs.enLeft), + enLeft = _this$matchFirst8.text; + + this.assert({ + enLeft: enLeft + }); + + if (head.toUpperCase() == 'STRING') { + this.string(); + } else if (head.toUpperCase() == 'PREAMBLE') { + this.preamble(); + } else if (head.toUpperCase() == 'COMMENT') { + this.comment(); + } else { + this.entry(head); + } + + var _this$matchFirst9 = this.matchFirst(regs.enRight(enLeft)), + enRight = _this$matchFirst9.text; + + this.assert({ + enRight: enRight + }); + } + }; +} //Runs the parser + + +export function bibParse(input) { + var b = new BibtexParser(); + b.setInput(input); + b.parse(); + return b._entries; +} diff --git a/services/references/app/js/ReferencesAPIController.js b/services/references/app/js/ReferencesAPIController.js new file mode 100644 index 0000000000..c94112e714 --- /dev/null +++ b/services/references/app/js/ReferencesAPIController.js @@ -0,0 +1,36 @@ +import logger from '@overleaf/logger' +import { bibParse } from './BibParser.js' + +// req: { allUrls: string[], fullIndex: boolean } +// res: { keys: string[]} +export default { + index(req, res) { + const { docUrls, fullIndex } = req.body; + Promise.all(docUrls.map(async (docUrl) => { + try { + const response = await fetch(docUrl); + if (!response.ok) { + throw new Error(`HTTP error! status: ${response.status}`); + } + return response.text(); + } catch (error) { + logger.error({ error }, "Failed to fetch document from URL: " + docUrl); + return null; + } + })).then((responses) => { + const keys = []; + for (const body of responses) { + if (!body) continue + try { + const result = bibParse(body); + const resultKeys = Object.keys(result); + keys.push(...resultKeys); + } catch(error) { + logger.error({error}, "skip the file.") + } + } + logger.info({ keys }, "all keys"); + res.send({ keys }) + }) + } +} diff --git a/services/references/config/settings.defaults.cjs b/services/references/config/settings.defaults.cjs new file mode 100644 index 0000000000..e099730574 --- /dev/null +++ b/services/references/config/settings.defaults.cjs @@ -0,0 +1,9 @@ +module.exports = { + internal: { + references: { + port: 3006, + host: process.env.REFERENCES_HOST || '127.0.0.1', + }, + }, +} + diff --git a/services/references/package.json b/services/references/package.json new file mode 100644 index 0000000000..d822fd54b5 --- /dev/null +++ b/services/references/package.json @@ -0,0 +1,35 @@ +{ + "name": "@overleaf/references", + "author": "Tackoil", + "description": "references", + "private": true, + "main": "app.js", + "type": "module", + "scripts": { + "start": "node $NODE_APP_OPTIONS app.js" + }, + "version": "0.1.0", + "dependencies": { + "@overleaf/fetch-utils": "*", + "@overleaf/logger": "*", + "@overleaf/metrics": "*", + "@overleaf/o-error": "*", + "@overleaf/redis-wrapper": "*", + "@overleaf/settings": "*", + "async": "^3.2.5", + "body-parser": "^1.20.3", + "bunyan": "^1.8.15", + "express": "^4.21.0", + "ioredis": "^4.16.1", + "lodash": "^4.17.19" + }, + "devDependencies": { + "chai": "^4.3.6", + "chai-as-promised": "^7.1.1", + "mocha": "^10.2.0", + "mongodb": "^6.1.0", + "sandboxed-module": "^2.0.4", + "sinon": "^9.2.4", + "typescript": "^5.0.4" + } +} diff --git a/services/web/config/settings.defaults.js b/services/web/config/settings.defaults.js index 61fbd03bb1..7b8fbf3366 100644 --- a/services/web/config/settings.defaults.js +++ b/services/web/config/settings.defaults.js @@ -259,6 +259,9 @@ module.exports = { notifications: { url: `http://${process.env.NOTIFICATIONS_HOST || '127.0.0.1'}:3042`, }, + references: { + url: `http://${process.env.REFERENCES_HOST || 'localhost'}:3006`, + }, webpack: { url: `http://${process.env.WEBPACK_HOST || '127.0.0.1'}:3808`, },