Skip to content

Commit

Permalink
Elasticsearch provider: only highlight text fields
Browse files Browse the repository at this point in the history
  • Loading branch information
Alejandro Hernandez committed Jan 29, 2024
1 parent d26122b commit 2f4e503
Show file tree
Hide file tree
Showing 9 changed files with 182 additions and 195 deletions.
8 changes: 8 additions & 0 deletions .changeset/wet-maps-shout.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
---
'contexture-elasticsearch': minor
---

Remove `subFields` configuration in the schema. Instead only send fields of type
`text` for highlighting. This both simplifies the API and reduces payload to
elastic, as well as fixing an issue where non-text top-level fields such as
`keyword` type fields were being highlighted when they should not be.
Original file line number Diff line number Diff line change
Expand Up @@ -14,31 +14,24 @@ We assume that users want to highlight all the fields present in the query. The

#### 1. Sub-fields

Whitelisted sub-fields are sent for highlighting, since they could be present in the query:
All sub-fields of type `text` are sent for highlighting, since they could be present in the query:

<details>

<summary>schema.json</summary>

```jsonc
{
"elasticsearch": {
"subFields": {
// `{field}.keyword` will *not* be sent for highlighting.
"keyword": { "highlight": false },
// `{field}.subfield` will be sent for highlighting.
"subfield": { "highlight": true }
}
},
"fields": {
// `state` will be sent for highlighting.
"state": {
"elasticsearch": {
"mapping": {
"fields": {
"keyword": {},
// `state.keyword` will not be sent for highlighting.
"keyword": { "type": "keyword" },
// `state.subfield` will be sent for highlighting.
"subfield": {}
"subfield": { "type": "text" }
}
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import _ from 'lodash/fp.js'
import F from 'futil'
import { minimatch } from 'minimatch'
import { CartesianProduct } from 'js-combinatorics'
import { isLeafField, isBlobField, isArrayOfObjectsField } from './util.js'
import { getFieldType, isBlobField, isArrayOfObjectsField } from './util.js'

/*
* Expand schema paths with wildcards into a list of paths without wildcards.
Expand All @@ -14,7 +13,7 @@ let expandGlobs = (schema, globs) => {
let fieldsNames = _.keys(schema.fields)

let expandGlob = (glob) =>
isLeafField(schema.fields[glob])
getFieldType(schema.fields[glob])
? [glob]
: minimatch.match(fieldsNames, `${glob}*`)

Expand All @@ -29,7 +28,7 @@ let expandGlobs = (schema, globs) => {
* Add given paths to source with includes/excludes lists. Paths get added to
* source.includes and removed from source.excludes as necessary.
*
* Returns added paths.
* Returns object with source includes, excludes, and added paths.
*/
export let addPathsToRequestSource = (schema, source = {}, pathsToAdd = []) => {
// There's nothing to add.
Expand Down Expand Up @@ -69,64 +68,80 @@ export let addPathsToRequestSource = (schema, source = {}, pathsToAdd = []) => {
return F.omitBlank({ ...result, addedPaths })
}

/*
* Names of all subfields that can be highlighted.
/**
* Map a field's subfields to a structure that looks like a top-level field.
*/
let getHighlightSubFieldsNames = (schema) =>
_.keys(_.pickBy('highlight', schema.elasticsearch?.subFields))
let createTopLevelSubFields = (field) =>
F.mapValuesIndexed(
(subField, subFieldName) =>
F.omitBlank({
// Reuse the parent multi-field `subType` so that we can generate the
// correct highlighting configuration.
subType: field.subType,
elasticsearch: F.omitBlank({
mapping: F.omitBlank({
...subField,
copy_to: _.map(
(path) => `${path}.${subFieldName}`,
field.elasticsearch.mapping?.copy_to
),
}),
}),
}),
field.elasticsearch?.mapping?.fields
)

/*
* Paths of all group fields and their subfields that can be highlighted.
/**
* Returns object of all subfields in a schema.
*/
export let getHighlightGroupFieldsPaths = _.memoize((schema) => {
let subFieldsNames = getHighlightSubFieldsNames(schema)
return _.flatMap((field) => {
let copy_to = field.elasticsearch?.mapping?.copy_to
if (_.isEmpty(copy_to)) return []
let subFieldTuples = [...new CartesianProduct(copy_to, subFieldsNames)]
let product = [...copy_to, ..._.map(_.join('.'), subFieldTuples)]
return product
}, schema.fields)
}, _.get('elasticsearch.index'))

let isGroupFieldPath = _.curry((schema, path) =>
_.find(_.eq(path), getHighlightGroupFieldsPaths(schema))
)
let getSchemaSubFields = (schema) =>
F.reduceIndexed(
(acc, field, path) =>
F.mergeOn(
acc,
_.mapKeys((k) => `${path}.${k}`, createTopLevelSubFields(field))
),
{},
schema.fields
)

/*
* Object of all fields and their subfields that can be highlighted.
/**
* Returns object of all group fields and their subfields in a schema.
*/
export let getAllHighlightFields = _.memoize((schema) => {
let subFieldsNames = getHighlightSubFieldsNames(schema)
return F.reduceIndexed(
(acc, field, path) => {
if (!isLeafField(field) || isGroupFieldPath(schema, path)) {
return acc
}
acc[path] = field
let subFields = _.pick(
subFieldsNames,
field.elasticsearch?.mapping?.fields
let getSchemaGroupFields = _.memoize((schema) => {
let groupFields = _.pick(
_.uniq(
_.flatMap(
(field) => field.elasticsearch?.mapping?.copy_to ?? [],
schema.fields
)
for (let name in subFields) {
acc[`${path}.${name}`] = F.omitBlank({
subType: field.subType,
elasticsearch: F.omitBlank({
mapping: F.omitBlank({
...subFields[name],
copy_to: _.map(
(path) => `${path}.${name}`,
field.elasticsearch.mapping?.copy_to
),
}),
}),
})
}
return acc
},
{},
),
schema.fields
)
return {
...groupFields,
...getSchemaSubFields({ fields: groupFields }),
}
}, _.get('elasticsearch.index'))

/*
* Return object of all fields and their subfields that can be highlighted.
*/
export let getAllHighlightFields = _.memoize((schema) => {
let groupFields = getSchemaGroupFields(schema)

let canHighlightField = (field, path) =>
// Only highlight text fields.
getFieldType(field) === 'text' &&
// Omit group fields from highlighting. We assume users want to
// highlight fields that were copied over instead of the group fields
// themselves.
!_.has(path, groupFields)

return F.pickByIndexed(canHighlightField, {
...schema.fields,
...getSchemaSubFields(schema),
})
}, _.get('elasticsearch.index'))

let collectKeysAndValues = (f, coll) =>
Expand All @@ -146,8 +161,10 @@ let blobConfiguration = {
* Get configuration for highlight fields to send in the elastic request.
*/
export let getRequestHighlightFields = (schema, node) => {
let groupFields = getSchemaGroupFields(schema)

let groupFieldsInQuery = collectKeysAndValues(
isGroupFieldPath(schema),
F.getIn(groupFields),
node._meta?.relevantFilters
)

Expand Down
Loading

0 comments on commit 2f4e503

Please sign in to comment.