From 995035fdb69eb0bcdaaadebaced950350ec72812 Mon Sep 17 00:00:00 2001 From: Olivia Guyot Date: Fri, 20 Oct 2023 17:16:08 +0200 Subject: [PATCH] feat(es): add a script for registering pipelines Includes the pipeline for the metadata quaility score, as well as the human readable format --- pipelines.js | 158 ++++++++++++++++++++++++++++ support-services/docker-compose.yml | 2 + 2 files changed, 160 insertions(+) create mode 100644 pipelines.js diff --git a/pipelines.js b/pipelines.js new file mode 100644 index 0000000000..ebff0c3746 --- /dev/null +++ b/pipelines.js @@ -0,0 +1,158 @@ +const ES_URL = 'http://localhost:9200/' +const GN_RECORDS_INDEX = 'gn-records' +const VERSION = 100 // increment on changes + +const GEONETWORK_UI_PIPELINE = { + description: 'GeoNetwork-UI pipeline', + version: VERSION, + processors: [ + { + // compute the metadata quality score + script: { + lang: 'painless', + source: ` +int total=8; +int ok=0; +if(ctx.resourceTitleObject != null && ctx.resourceTitleObject.default != null && ctx.resourceTitleObject.default != '') { + ok++ +} +if(ctx.resourceAbstractObject != null && ctx.resourceAbstractObject.default != null && ctx.resourceAbstractObject.default != '') { + ok++ +} +if(ctx.contact != null && ctx.contact.length > 0 && ctx.contact[0].organisation != null && ctx.contact[0].organisation != '') { + ok++ +} +if(ctx.contact != null && ctx.contact.length > 0 && ctx.contact[0].email != null && ctx.contact[0].email != '') { + ok++ +} +if(ctx.cl_topic != null && ctx.cl_topic.length > 0) { + ok++ +} +if(ctx.tag != null && ctx.tag.length > 0) { + ok++ +} +if(ctx.cl_maintenanceAndUpdateFrequency != null && ctx.cl_maintenanceAndUpdateFrequency.length > 0) { + ok++ +} +if(ctx.MD_LegalConstraintsUseLimitationObject != null && ctx.MD_LegalConstraintsUseLimitationObject.length > 0) { + ok++ +} +ctx.qualityScore = ok * 100 / total;`, + }, + }, + { + // generate human-readable data formats + script: { + lang: 'painless', + source: ` +ctx.originalFormat = ctx.format; +for(int i = 0; i < ctx.format.length; i++) { + String format = ctx.format[i].toLowerCase(); + if (format.contains('shp') || format.contains('shapefile')) { + ctx.format[i] = 'ESRI Shapefile' + } else if (format.contains('ogc:w') || format.contains('esri:rest') || format.contains('tms')) { + ctx.format[i] = 'Service' + } else if (format.contains('pdf')) { + ctx.format[i] = 'PDF' + } else if (format.contains('png') || format.contains('jpg') || format.contains('jpeg') || format.contains('bmp')) { + ctx.format[i] = 'Image' + } else if (format.contains('excel') || format.contains('xls') || format.contains('vnd.oasis.opendocument.spreadsheet') || format.contains('vnd.openxmlformats-officedocument.spreadsheetml.sheet')) { + ctx.format[i] = 'Excel' + } else if (format.contains('zip')) { + ctx.format[i] = 'ZIP' + } else if (format.contains('geopackage') || format.contains('gpkg')) { + ctx.format[i] = 'Geopackage' + } else if (format.contains('postgis')) { + ctx.format[i] = 'PostGIS' + } else if (format.contains('gml')) { + ctx.format[i] = 'GML' + } else if (format.contains('kml')) { + ctx.format[i] = 'KML' + } else if (format.contains('xml')) { + ctx.format[i] = 'XML' + } else if (format.contains('html')) { + ctx.format[i] = 'HTML' + } else if (format.contains('geo+json') || format.contains('geojson')) { + ctx.format[i] = 'GeoJSON' + } else if (format.contains('json')) { + ctx.format[i] = 'JSON' + } else if (format.contains('csv')) { + ctx.format[i] = 'CSV' + } else if (format.contains('rtf')) { + ctx.format[i] = 'RTF' + } else if (format.contains('text')) { + ctx.format[i] = 'Text' + } else { + // comment this to hide unrecognized formats + ctx.format[i] = 'unknown: ' + ctx.format[i]; + } +}`, + }, + }, + ], +} + +async function registerPipeline(name, payload) { + console.log(`adding ${name} pipeline...`) + + await fetch(`${ES_URL}_ingest/pipeline/${name}`, { + method: 'PUT', + body: JSON.stringify(payload), + headers: { + 'Content-Type': 'application/json', + }, + }) + .then((resp) => resp.json()) + .then((result) => { + if (result.acknowledged !== true) { + console.error(result) + throw new Error('something went wrong') + } + }) + + console.log(`${name} pipeline was successfully registered!`) +} + +async function setDefaultPipeline(name) { + console.log(`setting ${name} as default pipeline...`) + + await fetch(`${ES_URL}${GN_RECORDS_INDEX}/_settings`, { + method: 'PUT', + body: JSON.stringify({ 'index.default_pipeline': name }), + headers: { + 'Content-Type': 'application/json', + }, + }) + .then((resp) => resp.json()) + .then((result) => { + if (result.acknowledged !== true) { + console.error(result) + throw new Error('something went wrong') + } + }) + + console.log(`${name} pipeline was successfully set as default!`) +} + +async function registerPipelines() { + console.log('querying currently registered pipelines...') + + const pipelines = await fetch('http://localhost:9200/_ingest/pipeline').then( + (resp) => resp.json() + ) + + const names = Object.keys(pipelines) + names.forEach((name) => { + console.log(` + > ${name}`) + console.log(` ${pipelines[name].description}`) + }) + + console.log('') + await registerPipeline('geonetwork-ui', GEONETWORK_UI_PIPELINE) + + console.log('') + await setDefaultPipeline('geonetwork-ui') +} + +registerPipelines() diff --git a/support-services/docker-compose.yml b/support-services/docker-compose.yml index 6debc3c808..4ec22fde91 100644 --- a/support-services/docker-compose.yml +++ b/support-services/docker-compose.yml @@ -43,6 +43,8 @@ services: retries: 10 volumes: - elasticsearch_data:/usr/share/elasticsearch/data + ports: + - "9200:9200" kibana: image: kibana:7.11.1