Skip to content

Commit

Permalink
feat(es): add a script and CLI for registering pipelines
Browse files Browse the repository at this point in the history
Includes the pipeline for the metadata quality score, as well
as the human readable format
  • Loading branch information
jahow committed Nov 20, 2023
1 parent effe02a commit 253cfa1
Show file tree
Hide file tree
Showing 3 changed files with 228 additions and 0 deletions.
2 changes: 2 additions & 0 deletions support-services/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ services:
retries: 10
volumes:
- elasticsearch_data:/usr/share/elasticsearch/data
ports:
- "9200:9200"

kibana:
image: kibana:7.11.1
Expand Down
3 changes: 3 additions & 0 deletions tools/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"type": "module"
}
223 changes: 223 additions & 0 deletions tools/register-es-pipelines.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,223 @@
import { program } from 'commander'

program
.name('register-es-pipelines')
.description(
'Lets you register ElasticSearch pipelines to improve the search experience in GeoNetwork-UI. Trigger a reindexation of the catalog after using.'
)
program
.command('register')
.description('Register pipelines')
.option('--host <value>', 'ElasticSearch host', 'http://localhost:9200/')
.option(
'--records-index <value>',
'Name of the index used by GeoNetwork for records',
'gn-records'
)
.action((options) => {
const esUrl = options.host
const recordsIndex = options.recordsIndex
registerPipelines(esUrl, recordsIndex)
})
program
.command('clear')
.description('Clear all registered pipelines')
.option(
'--host <value>',
'ElasticSearch host, default is http://localhost:9090/',
'http://localhost:9200/'
)
.action((options) => {
const esUrl = options.host || 'http://localhost:9200/'
clearPipelines(esUrl)
})

program.parse(process.argv)

const VERSION = 100 // increment on changes

const GEONETWORK_UI_PIPELINE = {
description: 'GeoNetwork-UI pipeline',
version: VERSION,
processors: [
{
// compute the metadata quality score
script: {
lang: 'painless',
source: `
int total=8;
int ok=0;
if(ctx.resourceTitleObject != null && ctx.resourceTitleObject.default != null && ctx.resourceTitleObject.default != '') {
ok++
}
if(ctx.resourceAbstractObject != null && ctx.resourceAbstractObject.default != null && ctx.resourceAbstractObject.default != '') {
ok++
}
if(ctx.contact != null && ctx.contact.length > 0 && ctx.contact[0].organisation != null && ctx.contact[0].organisation != '') {
ok++
}
if(ctx.contact != null && ctx.contact.length > 0 && ctx.contact[0].email != null && ctx.contact[0].email != '') {
ok++
}
if(ctx.cl_topic != null && ctx.cl_topic.length > 0) {
ok++
}
if(ctx.tag != null && ctx.tag.length > 0) {
ok++
}
if(ctx.cl_maintenanceAndUpdateFrequency != null && ctx.cl_maintenanceAndUpdateFrequency.length > 0) {
ok++
}
if(ctx.MD_LegalConstraintsUseLimitationObject != null && ctx.MD_LegalConstraintsUseLimitationObject.length > 0) {
ok++
}
ctx.qualityScore = ok * 100 / total;`,
},
},
{
// generate human-readable data formats
script: {
lang: 'painless',
source: `
if (!ctx.containsKey('format')) return;
if (ctx.format == null) return;
ctx.originalFormat = ctx.format;
for(int i = ctx.format.length - 1; i >= 0; i--) {
String format = ctx.format[i].toLowerCase();
if (format.contains('shp') || format.contains('shapefile')) {
ctx.format[i] = 'ESRI Shapefile'
} else if (format.contains('ogc:w') || format.contains('esri:rest') || format.contains('tms')) {
ctx.format[i] = 'Service'
} else if (format.contains('pdf')) {
ctx.format[i] = 'PDF'
} else if (format.contains('png') || format.contains('jpg') || format.contains('jpeg') || format.contains('bmp')) {
ctx.format[i] = 'Image'
} else if (format.contains('excel') || format.contains('xls') || format.contains('vnd.oasis.opendocument.spreadsheet') || format.contains('vnd.openxmlformats-officedocument.spreadsheetml.sheet')) {
ctx.format[i] = 'Excel'
} else if (format.contains('geopackage') || format.contains('gpkg')) {
ctx.format[i] = 'Geopackage'
} else if (format.contains('postgis')) {
ctx.format[i] = 'PostGIS'
} else if (format.contains('gml')) {
ctx.format[i] = 'GML'
} else if (format.contains('kml')) {
ctx.format[i] = 'KML'
} else if (format.contains('xml')) {
ctx.format[i] = 'XML'
} else if (format.contains('html')) {
ctx.format[i] = 'HTML'
} else if (format.contains('geo+json') || format.contains('geojson')) {
ctx.format[i] = 'GeoJSON'
} else if (format.contains('json')) {
ctx.format[i] = 'JSON'
} else if (format.contains('csv')) {
ctx.format[i] = 'CSV'
} else if (format.contains('rtf')) {
ctx.format[i] = 'RTF'
} else if (format.contains('text')) {
ctx.format[i] = 'Text'
} else if (format.contains('zip')) {
ctx.format[i] = 'ZIP'
} else if (format != null) {
ctx.format.remove(i);
// uncomment this to show unrecognized formats
// ctx.format[i] = 'unknown: ' + ctx.format[i];
}
}`,
},
},
],
}

async function registerPipeline(esHost, name, payload) {
console.log(`adding ${name} pipeline...`)

await fetch(`${esHost}_ingest/pipeline/${name}`, {
method: 'PUT',
body: JSON.stringify(payload),
headers: {
'Content-Type': 'application/json',
},
})
.then((resp) => resp.json())
.then((result) => {
if (result.acknowledged !== true) {
console.error(result)
throw new Error('something went wrong')
}
})

console.log(`${name} pipeline was successfully registered!`)
}

async function clearPipeline(esHost, name) {
console.log(`clearing ${name} pipeline...`)

await fetch(`${esHost}_ingest/pipeline/${name}`, {
method: 'DELETE',
})
.then((resp) => resp.json())
.then((result) => {
if (result.acknowledged !== true) {
console.error(result)
throw new Error('something went wrong')
}
})

console.log(`${name} pipeline was successfully cleaned!`)
}

async function setDefaultPipeline(esHost, recordsIndex, name) {
console.log(`setting ${name} as default pipeline...`)

await fetch(`${esHost}${recordsIndex}/_settings`, {
method: 'PUT',
body: JSON.stringify({ 'index.default_pipeline': name }),
headers: {
'Content-Type': 'application/json',
},
})
.then((resp) => resp.json())
.then((result) => {
if (result.acknowledged !== true) {
console.error(result)
throw new Error('something went wrong')
}
})

console.log(`${name} pipeline was successfully set as default!`)
}

async function registerPipelines(esHost, recordsIndex) {
console.log('querying currently registered pipelines...')

const pipelines = await fetch(`${esHost}_ingest/pipeline`).then((resp) =>
resp.json()
)

const names = Object.keys(pipelines)
names.forEach((name) => {
console.log(`
> ${name}`)
console.log(` ${pipelines[name].description}`)
})

console.log('')
await registerPipeline(esHost, 'geonetwork-ui', GEONETWORK_UI_PIPELINE)

console.log('')
await setDefaultPipeline(esHost, recordsIndex, 'geonetwork-ui')
}

async function clearPipelines(esHost) {
const pipelines = await fetch(`${esHost}_ingest/pipeline`).then((resp) =>
resp.json()
)

if (!('geonetwork-ui' in pipelines)) {
console.log('No geonetwork-ui pipelines found, exiting')
return
}

await clearPipeline(esHost, 'geonetwork-ui')
}

0 comments on commit 253cfa1

Please sign in to comment.