diff --git a/app.arc b/app.arc index 828684a3a..1608ccbb8 100644 --- a/app.arc +++ b/app.arc @@ -156,6 +156,8 @@ availabilityZoneCount 3 volumeSize 10 dedicatedMasterCount 3 dedicatedMasterType t3.small.search +# Use OpenSearch in sandbox mode; default is Elasticsearch. +sandboxEngine opensearch @plugins plugin-remix diff --git a/app/routes/_gcn.circulars._archive._index/route.tsx b/app/routes/_gcn.circulars._archive._index/route.tsx index 88e1a7e11..eeb1793d0 100644 --- a/app/routes/_gcn.circulars._archive._index/route.tsx +++ b/app/routes/_gcn.circulars._archive._index/route.tsx @@ -38,12 +38,14 @@ import CircularsHeader from './CircularsHeader' import CircularsIndex from './CircularsIndex' import { DateSelector } from './DateSelectorMenu' import Hint from '~/components/Hint' +import { feature } from '~/lib/env.server' import { getFormDataString } from '~/lib/utils' import { useFeature } from '~/root' import searchImg from 'nasawds/src/img/usa-icons-bg/search--white.svg' export async function loader({ request: { url } }: LoaderFunctionArgs) { + const useNLP = feature('CIRCULARS_USE_NLP') const { searchParams } = new URL(url) const query = searchParams.get('query') || undefined if (query) { @@ -59,6 +61,7 @@ export async function loader({ request: { url } }: LoaderFunctionArgs) { limit, startDate, endDate, + useNLP, }) return { page, ...results } diff --git a/app/routes/_gcn.circulars/circulars.server.ts b/app/routes/_gcn.circulars/circulars.server.ts index 7812d8fdb..cafdd40fa 100644 --- a/app/routes/_gcn.circulars/circulars.server.ts +++ b/app/routes/_gcn.circulars/circulars.server.ts @@ -127,41 +127,57 @@ export async function search({ limit, startDate, endDate, + useNLP, }: { query?: string page?: number limit?: number startDate?: string endDate?: string + useNLP?: boolean }): Promise<{ items: CircularMetadata[] totalPages: number totalItems: number }> { const client = await getSearch() - const [startTime, endTime] = getValidDates(startDate, endDate) - const { - body: { - hits: { - total: { value: totalItems }, - hits, - }, - }, - } = await client.search({ - index: 'circulars', - body: { - query: { + const get_model_id_request = { + path: '/_ingest/pipeline', + } + + let model_id = '' + try { + const resp = await client.http.get(get_model_id_request) + + if (resp && resp.statusCode == 200) { + model_id = + resp.body['nlp-ingest-pipeline'].processors[0].text_embedding.model_id + } else { + console.log( + 'Error. Could not deploy model. Returned with response: ', + resp + ) + } + } catch (e) { + console.log('Error: ', e) + } + + const nlpSearchQuery = query + ? { bool: { - must: query - ? { - multi_match: { - query, - fields: ['submitter', 'subject', 'body'], + must: [ + { + neural: { + circular_embedding: { + query_text: query, + model_id, + k: 100, }, - } - : undefined, + }, + }, + ], filter: { range: { createdOn: { @@ -171,7 +187,42 @@ export async function search({ }, }, }, + } + : { match_all: {} } + + const searchQuery = { + bool: { + must: query + ? { + multi_match: { + query, + fields: ['submitter', 'subject', 'body'], + }, + } + : undefined, + filter: { + range: { + createdOn: { + gte: startTime, + lte: endTime, + }, + }, + }, + }, + } + + const chosenQuery = useNLP ? nlpSearchQuery : searchQuery + const { + body: { + hits: { + total: { value: totalItems }, + hits, }, + }, + } = await client.search({ + index: 'circulars', + body: { + query: chosenQuery, fields: ['subject'], _source: false, sort: { diff --git a/package-lock.json b/package-lock.json index 6a3064e8a..48aa5a38a 100644 --- a/package-lock.json +++ b/package-lock.json @@ -68,7 +68,7 @@ "@aws-sdk/util-dynamodb": "^3.319.0", "@babel/preset-env": "^7.23.6", "@babel/preset-typescript": "^7.23.3", - "@nasa-gcn/architect-plugin-search": "^1.1.0", + "@nasa-gcn/architect-plugin-search": "github:nasa-gcn/architect-plugin-search", "@nasa-gcn/eslint-config-gitignore": "^0.0.1", "@remix-run/dev": "^2.5.1", "@remix-run/eslint-config": "^2.5.1", @@ -5612,6 +5612,12 @@ "node": ">=6.9.0" } }, + "node_modules/@balena/dockerignore": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/@balena/dockerignore/-/dockerignore-1.0.2.tgz", + "integrity": "sha512-wMue2Sy4GAVTk6Ic4tJVcnfdau+gx2EnG7S+uAEe+TWJFqE4YoWN4/H8MSLj4eYJKxGg26lZwboEniNiNwZQ6Q==", + "dev": true + }, "node_modules/@bcoe/v8-coverage": { "version": "0.2.3", "resolved": "https://registry.npmjs.org/@bcoe/v8-coverage/-/v8-coverage-0.2.3.tgz", @@ -6865,12 +6871,15 @@ }, "node_modules/@nasa-gcn/architect-plugin-search": { "version": "1.1.0", - "resolved": "https://registry.npmjs.org/@nasa-gcn/architect-plugin-search/-/architect-plugin-search-1.1.0.tgz", - "integrity": "sha512-vPAOUCTtKBwOKnU86k8XL7ZA9+o7bDtAwu4z9PvSSMwBToG2n4rwO9amNsoBbjC6OxzIxKzBkwNZPU7eRB0XlQ==", + "resolved": "git+ssh://git@github.com/nasa-gcn/architect-plugin-search.git#00d2b77de37dcc64407018228ac0bd6f4728e8e6", "dev": true, + "license": "Apache-2.0", "dependencies": { + "@nasa-gcn/architect-functions-search": "^1.0.0", "@opensearch-project/opensearch": "^2.2.0", + "dockerode": "^4.0.0", "env-paths": "^3.0.0", + "lodash": "^4.17.21", "make-fetch-happen": "^11.0.3", "rimraf": "^4.1.2", "tar": "^6.1.13", @@ -10342,6 +10351,15 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/asn1": { + "version": "0.2.6", + "resolved": "https://registry.npmjs.org/asn1/-/asn1-0.2.6.tgz", + "integrity": "sha512-ix/FxPn0MDjeyJ7i/yoHGFt/EX6LyNbxSEhPPXODPL+KB0VPk86UYfL0lMdy+KCnv+fmvIzySwaK5COwqVbWTQ==", + "dev": true, + "dependencies": { + "safer-buffer": "~2.1.0" + } + }, "node_modules/ast-types-flow": { "version": "0.0.7", "resolved": "https://registry.npmjs.org/ast-types-flow/-/ast-types-flow-0.0.7.tgz", @@ -10675,6 +10693,15 @@ "url": "https://github.com/sponsors/wooorm" } }, + "node_modules/bcrypt-pbkdf": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/bcrypt-pbkdf/-/bcrypt-pbkdf-1.0.2.tgz", + "integrity": "sha512-qeFIXtP4MSoi6NLqO12WfqARWWuCKi2Rn/9hJLEmtB5yTNr9DqFWkJRCf2qShWzPeAMRnOgCrq0sg/KLv5ES9w==", + "dev": true, + "dependencies": { + "tweetnacl": "^0.14.3" + } + }, "node_modules/before-after-hook": { "version": "2.2.3", "resolved": "https://registry.npmjs.org/before-after-hook/-/before-after-hook-2.2.3.tgz", @@ -10983,6 +11010,16 @@ "node": ">=0.2.0" } }, + "node_modules/buildcheck": { + "version": "0.0.6", + "resolved": "https://registry.npmjs.org/buildcheck/-/buildcheck-0.0.6.tgz", + "integrity": "sha512-8f9ZJCUXyT1M35Jx7MkBgmBMo3oHTTBIPLiY9xyL0pl3T5RwcPEY8cUHr5LBNfu/fk6c2T4DJZuVM/8ZZT2D2A==", + "dev": true, + "optional": true, + "engines": { + "node": ">=10.0.0" + } + }, "node_modules/builtins": { "version": "5.0.1", "resolved": "https://registry.npmjs.org/builtins/-/builtins-5.0.1.tgz", @@ -11728,6 +11765,21 @@ "rimraf": "bin.js" } }, + "node_modules/cpu-features": { + "version": "0.0.9", + "resolved": "https://registry.npmjs.org/cpu-features/-/cpu-features-0.0.9.tgz", + "integrity": "sha512-AKjgn2rP2yJyfbepsmLfiYcmtNn/2eUvocUyM/09yB0YDiz39HteK/5/T4Onf0pmdYDMgkBoGvRLvEguzyL7wQ==", + "dev": true, + "hasInstallScript": true, + "optional": true, + "dependencies": { + "buildcheck": "~0.0.6", + "nan": "^2.17.0" + }, + "engines": { + "node": ">=10.0.0" + } + }, "node_modules/create-jest": { "version": "29.7.0", "resolved": "https://registry.npmjs.org/create-jest/-/create-jest-29.7.0.tgz", @@ -12314,6 +12366,107 @@ "url": "https://github.com/sponsors/wooorm" } }, + "node_modules/docker-modem": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/docker-modem/-/docker-modem-5.0.1.tgz", + "integrity": "sha512-vqrE/nrweCyzmCpVpdFRC41qS+tfTF+IoUKlTZr52O82urbUqdfyJBGWMvT01pYUprWepLr8IkyVTEWJKRTQSg==", + "dev": true, + "dependencies": { + "debug": "^4.1.1", + "readable-stream": "^3.5.0", + "split-ca": "^1.0.1", + "ssh2": "^1.11.0" + }, + "engines": { + "node": ">= 8.0" + } + }, + "node_modules/docker-modem/node_modules/readable-stream": { + "version": "3.6.2", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz", + "integrity": "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==", + "dev": true, + "dependencies": { + "inherits": "^2.0.3", + "string_decoder": "^1.1.1", + "util-deprecate": "^1.0.1" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/dockerode": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/dockerode/-/dockerode-4.0.0.tgz", + "integrity": "sha512-3LF7/3MPz5+9RsUo91rD0MCcx0yxjC9bnbtgtVjOLKyKxlZSJ7/Kk3OPAgARlwlWHqXwAGYhmkAHYx7IwD0tJQ==", + "dev": true, + "dependencies": { + "@balena/dockerignore": "^1.0.2", + "docker-modem": "^5.0.0", + "tar-fs": "~2.0.1" + }, + "engines": { + "node": ">= 8.0" + } + }, + "node_modules/dockerode/node_modules/chownr": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/chownr/-/chownr-1.1.4.tgz", + "integrity": "sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg==", + "dev": true + }, + "node_modules/dockerode/node_modules/pump": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.0.tgz", + "integrity": "sha512-LwZy+p3SFs1Pytd/jYct4wpv49HiYCqd9Rlc5ZVdk0V+8Yzv6jR5Blk3TRmPL1ft69TxP0IMZGJ+WPFU2BFhww==", + "dev": true, + "dependencies": { + "end-of-stream": "^1.1.0", + "once": "^1.3.1" + } + }, + "node_modules/dockerode/node_modules/readable-stream": { + "version": "3.6.2", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz", + "integrity": "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==", + "dev": true, + "dependencies": { + "inherits": "^2.0.3", + "string_decoder": "^1.1.1", + "util-deprecate": "^1.0.1" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/dockerode/node_modules/tar-fs": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-2.0.1.tgz", + "integrity": "sha512-6tzWDMeroL87uF/+lin46k+Q+46rAJ0SyPGz7OW7wTgblI273hsBqk2C1j0/xNadNLKDTUL9BukSjB7cwgmlPA==", + "dev": true, + "dependencies": { + "chownr": "^1.1.1", + "mkdirp-classic": "^0.5.2", + "pump": "^3.0.0", + "tar-stream": "^2.0.0" + } + }, + "node_modules/dockerode/node_modules/tar-stream": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-2.2.0.tgz", + "integrity": "sha512-ujeqbceABgwMZxEJnk2HDY2DlnUZ+9oEcb1KzTVfYHio0UE6dG71n60d8D2I4qNvleWrrXpmjpt7vZeF1LnMZQ==", + "dev": true, + "dependencies": { + "bl": "^4.0.3", + "end-of-stream": "^1.4.1", + "fs-constants": "^1.0.0", + "inherits": "^2.0.3", + "readable-stream": "^3.1.1" + }, + "engines": { + "node": ">=6" + } + }, "node_modules/doctrine": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/doctrine/-/doctrine-3.0.0.tgz", @@ -20836,6 +20989,13 @@ "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==" }, + "node_modules/nan": { + "version": "2.18.0", + "resolved": "https://registry.npmjs.org/nan/-/nan-2.18.0.tgz", + "integrity": "sha512-W7tfG7vMOGtD30sHoZSSc/JVYiyDPEyQVso/Zz+/uQd0B0L46gtC+pHha5FFMRpil6fm/AoEcRWyOVi4+E/f8w==", + "dev": true, + "optional": true + }, "node_modules/nanoid": { "version": "3.3.7", "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.7.tgz", @@ -24310,12 +24470,36 @@ "react": ">=17.0.1" } }, + "node_modules/split-ca": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/split-ca/-/split-ca-1.0.1.tgz", + "integrity": "sha512-Q5thBSxp5t8WPTTJQS59LrGqOZqOsrhDGDVm8azCqIBjSBd7nd9o2PM+mDulQQkh8h//4U6hFZnc/mul8t5pWQ==", + "dev": true + }, "node_modules/sprintf-js": { "version": "1.0.3", "resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.0.3.tgz", "integrity": "sha512-D9cPgkvLlV3t3IzL0D0YLvGA9Ahk4PcvVwUbN0dSGr1aP0Nrt4AEnTUbuGvquEC0mA64Gqt1fzirlRs5ibXx8g==", "dev": true }, + "node_modules/ssh2": { + "version": "1.14.0", + "resolved": "https://registry.npmjs.org/ssh2/-/ssh2-1.14.0.tgz", + "integrity": "sha512-AqzD1UCqit8tbOKoj6ztDDi1ffJZ2rV2SwlgrVVrHPkV5vWqGJOVp5pmtj18PunkPJAuKQsnInyKV+/Nb2bUnA==", + "dev": true, + "hasInstallScript": true, + "dependencies": { + "asn1": "^0.2.6", + "bcrypt-pbkdf": "^1.0.2" + }, + "engines": { + "node": ">=10.16.0" + }, + "optionalDependencies": { + "cpu-features": "~0.0.8", + "nan": "^2.17.0" + } + }, "node_modules/ssri": { "version": "10.0.5", "resolved": "https://registry.npmjs.org/ssri/-/ssri-10.0.5.tgz", @@ -25269,6 +25453,12 @@ "integrity": "sha512-Xni35NKzjgMrwevysHTCArtLDpPvye8zV/0E4EyYn43P7/7qvQwPh9BGkHewbMulVntbigmcT7rdX3BNo9wRJg==", "dev": true }, + "node_modules/tweetnacl": { + "version": "0.14.5", + "resolved": "https://registry.npmjs.org/tweetnacl/-/tweetnacl-0.14.5.tgz", + "integrity": "sha512-KXXFFdAbFXY4geFIwoyNK+f5Z1b7swfXABfL7HXCmoIWMKU3dmS26672A4EeQtDzLKy7SXmfBu51JolvEKwtGA==", + "dev": true + }, "node_modules/type": { "version": "1.2.0", "resolved": "https://registry.npmjs.org/type/-/type-1.2.0.tgz", diff --git a/package.json b/package.json index 6972e5193..613b17e13 100644 --- a/package.json +++ b/package.json @@ -87,7 +87,7 @@ "@aws-sdk/util-dynamodb": "^3.319.0", "@babel/preset-env": "^7.23.6", "@babel/preset-typescript": "^7.23.3", - "@nasa-gcn/architect-plugin-search": "^1.1.0", + "@nasa-gcn/architect-plugin-search": "github:nasa-gcn/architect-plugin-search", "@nasa-gcn/eslint-config-gitignore": "^0.0.1", "@remix-run/dev": "^2.5.1", "@remix-run/eslint-config": "^2.5.1", diff --git a/postdeploy-search.js b/postdeploy-search.js new file mode 100644 index 000000000..ec1af2884 --- /dev/null +++ b/postdeploy-search.js @@ -0,0 +1,266 @@ +/* +This file contains API calls to the OpenSearch cluster to register and deploy an ml model to the opensearch cluster. +It also creates a neural ingest pipeline to allow for ingesting of documents into a K-Nearest Neighbors index. +*/ +export default async function (client) { + //Set cluster settings + const cluster_settings_request = { + path: '/_cluster/settings', + body: { + persistent: { + plugins: { + ml_commons: { + only_run_on_ml_node: 'false', + model_access_control_enabled: 'true', + native_memory_threshold: '99', + }, + }, + }, + }, + } + + try { + const resp = await client.http.put(cluster_settings_request) + + if (resp && resp.statusCode == 200) { + console.log('Updated ML-related cluster settings.') + } else { + console.log( + 'Error. Could not update cluster settings. Returned with response: ', + resp + ) + return + } + } catch (e) { + console.log('Error: ', e) + return + } + + //Register model group + const register_model_group_request = { + path: '/_plugins/_ml/model_groups/_register', + body: { + name: 'NLP_model_group', + description: 'A model group for NLP models', + }, + } + + let model_group_id + try { + const resp = await client.http.post(register_model_group_request) + + if (resp && resp.statusCode == 200) { + model_group_id = resp.body.model_group_id + console.log(`Registered model group with id: ${model_group_id}`) + } else { + console.log( + 'Error. Could not register model group. Returned with response: ', + resp + ) + return + } + } catch (e) { + console.log('Error: ', e) + return + } + + //Register model to model group + const register_model_request = { + path: '/_plugins/_ml/models/_register', + body: { + name: 'huggingface/sentence-transformers/all-MiniLM-L6-v2', + version: '1.0.1', + model_group_id, + model_format: 'TORCH_SCRIPT', + }, + } + + let task_id + try { + const resp = await client.http.post(register_model_request) + + if (resp && resp.statusCode == 200) { + task_id = resp.body.task_id + console.log('Registering model to model group. Task ID is: ', task_id) + } else { + console.log( + 'Error. Could not register model to model group. Returned with response: ', + resp + ) + return + } + } catch (e) { + console.log('Error: ', e) + return + } + + //Check status of model registration + const check_model_registration_request = { + path: `_plugins/_ml/tasks/${task_id}`, + } + + let model_id = '' + try { + let resp + do { + resp = await client.http.get(check_model_registration_request) + + if (resp && resp.statusCode == 200) { + console.log('Checking model registration status...') + + if (resp.body.state === 'COMPLETED') { + model_id = resp.body.model_id + console.log('Model registration completed. Model ID: ', model_id) + } else { + await new Promise((resolve) => setTimeout(resolve, 2000)) // Wait for 2 seconds before checking again + } + } else { + console.log( + 'Error. Could not check model registration status. Returned with response: ', + resp + ) + return + } + } while (resp.body.state !== 'COMPLETED') + } catch (e) { + console.log('Error: ', e) + return + } + + //Deploy model + const deploy_model_request = { + path: `/_plugins/_ml/models/${model_id}/_deploy`, + } + + try { + const resp = await client.http.post(deploy_model_request) + + if (resp && resp.statusCode == 200) { + task_id = resp.body.task_id + console.log('Deploying model. Task ID is: ', task_id) + } else { + console.log( + 'Error. Could not deploy model. Returned with response: ', + resp + ) + return + } + } catch (e) { + console.log('Error: ', e) + return + } + + //Check status of model deployment + const check_model_deployment_request = { + path: `_plugins/_ml/tasks/${task_id}`, + } + + try { + let resp + do { + resp = await client.http.get(check_model_deployment_request) + + if (resp && resp.statusCode == 200) { + console.log('Checking model deployment status...') + + if (resp.body.state === 'COMPLETED') { + model_id = resp.body.model_id + console.log('Model deployment completed. Model ID: ', model_id) + } else { + await new Promise((resolve) => setTimeout(resolve, 2000)) // Wait for 2 seconds before checking again + } + } else { + console.log( + 'Error. Could not check model deployment status. Returned with response: ', + resp + ) + return + } + } while (resp.body.state !== 'COMPLETED') + } catch (e) { + console.log('Error: ', e) + return + } + + //Create neural ingest pipeline + const pipeline_name = 'nlp-ingest-pipeline' + const create_ingest_pipeline_request = { + path: `/_ingest/pipeline/${pipeline_name}`, + body: { + description: 'An NLP ingest pipeline', + processors: [ + { + text_embedding: { + model_id, + field_map: { + body: 'circular_embedding', + }, + }, + }, + ], + }, + } + + try { + const resp = await client.http.put(create_ingest_pipeline_request) + + if (resp && resp.statusCode == 200) { + console.log('Successfully created neural ingest pipeline.') + } else { + console.log( + 'Error. Could not create neural ingest pipeline. Returned with response: ', + resp + ) + return + } + } catch (e) { + console.log('Error: ', e) + return + } + + //Create knn index + await client.indices.create({ + index: 'circulars', + body: { + settings: { + 'index.knn': true, + default_pipeline: pipeline_name, + }, + mappings: { + properties: { + subject: { + type: 'text', + }, + submittedHow: { + type: 'text', + }, + bibcode: { + type: 'text', + }, + createdOn: { + type: 'long', + }, + circularId: { + type: 'integer', + }, + submitter: { + type: 'text', + }, + circular_embedding: { + type: 'knn_vector', + dimension: 384, + method: { + engine: 'lucene', + space_type: 'l2', + name: 'hnsw', + parameters: {}, + }, + }, + body: { + type: 'text', + }, + }, + }, + }, + }) +}