diff --git a/support-services/docker-compose.yml b/support-services/docker-compose.yml index 6debc3c808..4ec22fde91 100644 --- a/support-services/docker-compose.yml +++ b/support-services/docker-compose.yml @@ -43,6 +43,8 @@ services: retries: 10 volumes: - elasticsearch_data:/usr/share/elasticsearch/data + ports: + - "9200:9200" kibana: image: kibana:7.11.1 diff --git a/tools/package.json b/tools/package.json new file mode 100644 index 0000000000..3dbc1ca591 --- /dev/null +++ b/tools/package.json @@ -0,0 +1,3 @@ +{ + "type": "module" +} diff --git a/tools/register-es-pipelines.js b/tools/register-es-pipelines.js new file mode 100644 index 0000000000..d559aa988b --- /dev/null +++ b/tools/register-es-pipelines.js @@ -0,0 +1,223 @@ +import { program } from 'commander' + +program + .name('register-es-pipelines') + .description( + 'Lets you register ElasticSearch pipelines to improve the search experience in GeoNetwork-UI. Trigger a reindexation of the catalog after using.' + ) +program + .command('register') + .description('Register pipelines') + .option('--host ', 'ElasticSearch host', 'http://localhost:9200/') + .option( + '--records-index ', + 'Name of the index used by GeoNetwork for records', + 'gn-records' + ) + .action((options) => { + const esUrl = options.host + const recordsIndex = options.recordsIndex + registerPipelines(esUrl, recordsIndex) + }) +program + .command('clear') + .description('Clear all registered pipelines') + .option( + '--host ', + 'ElasticSearch host, default is http://localhost:9090/', + 'http://localhost:9200/' + ) + .action((options) => { + const esUrl = options.host || 'http://localhost:9200/' + clearPipelines(esUrl) + }) + +program.parse(process.argv) + +const VERSION = 100 // increment on changes + +const GEONETWORK_UI_PIPELINE = { + description: 'GeoNetwork-UI pipeline', + version: VERSION, + processors: [ + { + // compute the metadata quality score + script: { + lang: 'painless', + source: ` +int total=8; +int ok=0; +if(ctx.resourceTitleObject != null && ctx.resourceTitleObject.default != null && ctx.resourceTitleObject.default != '') { + ok++ +} +if(ctx.resourceAbstractObject != null && ctx.resourceAbstractObject.default != null && ctx.resourceAbstractObject.default != '') { + ok++ +} +if(ctx.contact != null && ctx.contact.length > 0 && ctx.contact[0].organisation != null && ctx.contact[0].organisation != '') { + ok++ +} +if(ctx.contact != null && ctx.contact.length > 0 && ctx.contact[0].email != null && ctx.contact[0].email != '') { + ok++ +} +if(ctx.cl_topic != null && ctx.cl_topic.length > 0) { + ok++ +} +if(ctx.tag != null && ctx.tag.length > 0) { + ok++ +} +if(ctx.cl_maintenanceAndUpdateFrequency != null && ctx.cl_maintenanceAndUpdateFrequency.length > 0) { + ok++ +} +if(ctx.MD_LegalConstraintsUseLimitationObject != null && ctx.MD_LegalConstraintsUseLimitationObject.length > 0) { + ok++ +} +ctx.qualityScore = ok * 100 / total;`, + }, + }, + { + // generate human-readable data formats + script: { + lang: 'painless', + source: ` +if (!ctx.containsKey('format')) return; +if (ctx.format == null) return; +ctx.originalFormat = ctx.format; +for(int i = ctx.format.length - 1; i >= 0; i--) { + String format = ctx.format[i].toLowerCase(); + if (format.contains('shp') || format.contains('shapefile')) { + ctx.format[i] = 'ESRI Shapefile' + } else if (format.contains('ogc:w') || format.contains('esri:rest') || format.contains('tms')) { + ctx.format[i] = 'Service' + } else if (format.contains('pdf')) { + ctx.format[i] = 'PDF' + } else if (format.contains('png') || format.contains('jpg') || format.contains('jpeg') || format.contains('bmp')) { + ctx.format[i] = 'Image' + } else if (format.contains('excel') || format.contains('xls') || format.contains('vnd.oasis.opendocument.spreadsheet') || format.contains('vnd.openxmlformats-officedocument.spreadsheetml.sheet')) { + ctx.format[i] = 'Excel' + } else if (format.contains('geopackage') || format.contains('gpkg')) { + ctx.format[i] = 'Geopackage' + } else if (format.contains('postgis')) { + ctx.format[i] = 'PostGIS' + } else if (format.contains('gml')) { + ctx.format[i] = 'GML' + } else if (format.contains('kml')) { + ctx.format[i] = 'KML' + } else if (format.contains('xml')) { + ctx.format[i] = 'XML' + } else if (format.contains('html')) { + ctx.format[i] = 'HTML' + } else if (format.contains('geo+json') || format.contains('geojson')) { + ctx.format[i] = 'GeoJSON' + } else if (format.contains('json')) { + ctx.format[i] = 'JSON' + } else if (format.contains('csv')) { + ctx.format[i] = 'CSV' + } else if (format.contains('rtf')) { + ctx.format[i] = 'RTF' + } else if (format.contains('text')) { + ctx.format[i] = 'Text' + } else if (format.contains('zip')) { + ctx.format[i] = 'ZIP' + } else if (format != null) { + ctx.format.remove(i); + // uncomment this to show unrecognized formats + // ctx.format[i] = 'unknown: ' + ctx.format[i]; + } +}`, + }, + }, + ], +} + +async function registerPipeline(esHost, name, payload) { + console.log(`adding ${name} pipeline...`) + + await fetch(`${esHost}_ingest/pipeline/${name}`, { + method: 'PUT', + body: JSON.stringify(payload), + headers: { + 'Content-Type': 'application/json', + }, + }) + .then((resp) => resp.json()) + .then((result) => { + if (result.acknowledged !== true) { + console.error(result) + throw new Error('something went wrong') + } + }) + + console.log(`${name} pipeline was successfully registered!`) +} + +async function clearPipeline(esHost, name) { + console.log(`clearing ${name} pipeline...`) + + await fetch(`${esHost}_ingest/pipeline/${name}`, { + method: 'DELETE', + }) + .then((resp) => resp.json()) + .then((result) => { + if (result.acknowledged !== true) { + console.error(result) + throw new Error('something went wrong') + } + }) + + console.log(`${name} pipeline was successfully cleaned!`) +} + +async function setDefaultPipeline(esHost, recordsIndex, name) { + console.log(`setting ${name} as default pipeline...`) + + await fetch(`${esHost}${recordsIndex}/_settings`, { + method: 'PUT', + body: JSON.stringify({ 'index.default_pipeline': name }), + headers: { + 'Content-Type': 'application/json', + }, + }) + .then((resp) => resp.json()) + .then((result) => { + if (result.acknowledged !== true) { + console.error(result) + throw new Error('something went wrong') + } + }) + + console.log(`${name} pipeline was successfully set as default!`) +} + +async function registerPipelines(esHost, recordsIndex) { + console.log('querying currently registered pipelines...') + + const pipelines = await fetch(`${esHost}_ingest/pipeline`).then((resp) => + resp.json() + ) + + const names = Object.keys(pipelines) + names.forEach((name) => { + console.log(` + > ${name}`) + console.log(` ${pipelines[name].description}`) + }) + + console.log('') + await registerPipeline(esHost, 'geonetwork-ui', GEONETWORK_UI_PIPELINE) + + console.log('') + await setDefaultPipeline(esHost, recordsIndex, 'geonetwork-ui') +} + +async function clearPipelines(esHost) { + const pipelines = await fetch(`${esHost}_ingest/pipeline`).then((resp) => + resp.json() + ) + + if (!('geonetwork-ui' in pipelines)) { + console.log('No geonetwork-ui pipelines found, exiting') + return + } + + await clearPipeline(esHost, 'geonetwork-ui') +}