From f52cc44fc7bc468df752091ca60da5c76d9cea9e Mon Sep 17 00:00:00 2001 From: Alejandro Hernandez Date: Wed, 1 Nov 2023 18:38:16 -0400 Subject: [PATCH 01/30] Revamp highlighting --- .../example-types/results/highlighting.d.ts | 193 -------- .../src/example-types/results/highlighting.js | 346 +++----------- .../results/highlighting.test.js | 452 +++--------------- .../src/example-types/results/index.js | 123 +++-- packages/server/src/utils.js | 1 + 5 files changed, 188 insertions(+), 927 deletions(-) delete mode 100644 packages/provider-elasticsearch/src/example-types/results/highlighting.d.ts diff --git a/packages/provider-elasticsearch/src/example-types/results/highlighting.d.ts b/packages/provider-elasticsearch/src/example-types/results/highlighting.d.ts deleted file mode 100644 index 67e463759..000000000 --- a/packages/provider-elasticsearch/src/example-types/results/highlighting.d.ts +++ /dev/null @@ -1,193 +0,0 @@ -/* - * Unfortunately we do not support Typescript in this repo yet, but types are - * still very useful for documentation purposes. - * - * # Links of interest - * - * - Elastic's [canonical documentation on highlighting](https://www.elastic.co/guide/en/elasticsearch/reference/current/highlighting.html) - */ - -/** Can end in a wildcard (`*`) */ -type FieldName = string - -/** Should not end in a wildcard */ -type LiteralFieldName = string - -/** - * Highlighting configuration set on schemas - */ -type SchemaHighlightConfig = { - /** - * Names of fields that should be replaced by their highlighted results. For - * example, given `inline=["name"]`, `_source.name` will be replaced by - * `highlight.name` in the following elasticsearch hit - * - * ``` - * { - * _source: { name: "Johny", hobby: "Fishing" }, - * highlight: { name: "Johny" }, - * } - * ``` - * - * If a field name ends in a wildcard, all fields with the same prefix as - * the former will be replaced by their highlighted result. - * - * If a field is listed here, is highlighted (present in `hit.highlight`), and - * not present in the result node's `include` array, it will be added to - * `additionalFields` in the hit. For example, given `include=["name", "hobby"]` - * the following hit - * - * ``` - * { - * _source: { name: "Johny", hobby: "Fishing" }, - * highlight: { address: "123 Avenue" }, - * } - * ``` - * - * will be mapped to - * - * ``` - * { - * _source: { name: "Johny", hobby: "Fishing" }, - * highlight: { address: "123 Avenue" }, - * additionalFields: [{ label: "address", value: "123 Avenue" }] - * } - * ``` - */ - inline: Array - - /** - * Used to replace inline highlighted results in unrelated `_source` fields. - * - * Keys are `_source` fields and values are `highlight` results. For example, - * given `inlineAliases={"name":"address"}` the following hit - * - * ``` - * { - * _source: { name: "Johny", hobby: "Fishing" }, - * highlight: { address: "123 Avenue" }, - * } - * ``` - * - * will be mapped to - * - * ``` - * { - * _source: { name:"123 Avenue", hobby: "Fishing" }, - * highlight: { address: "123 Avenue" }, - * } - * ``` - * - * `inline` has priority, so `inlineAliases` only applies if the field is not - * in `inline` OR if there's no highlight result for the `inline` field. - */ - inlineAliases: Record - - /** - * Fields in this list will be copied from a hit's `highlight` to - * `additionalFields`. For example, given `additional=["name"]`, the following - * elasticsearch hit - * - * ``` - * { - * _source: { name: "Johny", hobby: "Fishing" }, - * highlight: { name: "Johny" }, - * } - * ``` - * - * will be mapped to - * - * ``` - * { - * _source: { name: "Johny", hobby: "Fishing" }, - * highlight: { name: "Johny" }, - * additionalFields: [{ label: "name", value: "123 Johny" }] - * } - * ``` - * - * Fields in `inline`, `nested`, or `additionalExclusions` will not be copied - * over to `additionalFields` even if included here. - */ - additional: Array - - /** - * Fields in this list will be omitted from `additionalFields` even if they - * are included in `additional`. - */ - additionalExclusions: Array - - /** - * @deprecated - * This is not currently used for anything - */ - additionalFields: any - - /** - * In the following context, a nested field is a field contained inside of an - * array field. - * - * Highlighting results inside of an array field is tricky because elastic - * only returns highlighted items instead of all items of the array field and - * it provides no way of knowing the highlighted items indexes in the array. - * This is the behavior even when - * [number_of_fragments](https://www.elastic.co/guide/en/elasticsearch/reference/current/highlighting.html#highlighting-settings) - * is set to 0. There are two known solutions: - * - * 1. Change the type of the array field to `nested` and send a - * [nested query](https://www.elastic.co/guide/en/elasticsearch/reference/current/inner-hits.html#nested-inner-hits) - * and elastic will include the array index in the results. Also see - * [this comment](https://github.com/elastic/elasticsearch/issues/7416#issuecomment-1650617883). - * Keep in mind that changing a field type to `nested` will index each item - * in the array, so it is costly to make big arrays `nested`. - * 2. For every item in the array, replace field value with highlighted - * value if their text content matches. Notice that `number_of_fragments` - * has to 0 to be able to compare items in full. - * - * See https://github.com/elastic/elasticsearch/issues/7416 for more info. - * - * Fields listed here will be assumed to be inside of an array field and - * approach 2. will be used to inline the highlighted results in the original - * array value. - * - * NOTE: Currently, all fields specified here should belong to the same array. - * In the future we may make the API more flexible to handle for multiple - * array fields. - */ - nested: Array - - /** - * Name for the array field containing all the `nested` fields. Required when - * `nested` is provided. - */ - nestedPath: string - - /** - * Whether to remove non-highlighted values from the `_source` array field - * after inline substitution of highlighted results is done. - */ - filterNested: boolean -} - -type Schema = { - elasticsearch: { - highlight: SchemaHighlightConfig - } -} - -// https://www.elastic.co/guide/en/elasticsearch/reference/current/highlighting.html#override-global-settings -type ElasticHighlightConfig = any - -type Node = { - // TODO - showOtherMatches: boolean - /** - * Per-node highlighting configuration. If `false`, highlighting is disabled. - */ - highlight: - | false - | { - fields: Record - /** Override schema's `elasticsearch.highlight` with this */ - override: SchemaHighlightConfig - } -} diff --git a/packages/provider-elasticsearch/src/example-types/results/highlighting.js b/packages/provider-elasticsearch/src/example-types/results/highlighting.js index 4dc8f16fd..a2d78dbb8 100644 --- a/packages/provider-elasticsearch/src/example-types/results/highlighting.js +++ b/packages/provider-elasticsearch/src/example-types/results/highlighting.js @@ -1,289 +1,79 @@ -import F from 'futil' -import _ from 'lodash/fp.js' - -export let anyRegexesMatch = (regexes, criteria) => - !!_.find((pattern) => new RegExp(pattern).test(criteria), regexes) - -export let replaceHighlightTagRegex = (nodeHighlight) => { - let { pre_tags, post_tags } = nodeHighlight - return new RegExp(_.join('|', _.concat(pre_tags, post_tags)), 'g') -} - -export let containsHighlightTagRegex = (nodeHighlight) => { - let { pre_tags, post_tags } = nodeHighlight - let tagRegexes = _.map( - ([pre, post]) => `${pre}.+?${post}`, - _.zip(pre_tags, post_tags) - ) - return new RegExp(_.join('|', tagRegexes)) -} - -// Convert the fields array to object map where we only pick the first key from the objects -// Highlight fields can be either strings or objects with a single key which value is the ES highlights object config -// If the highlight field is specific as a string only then it uses the default highlights config -export let arrayToHighlightsFieldMap = _.flow( - _.map(F.when(_.isString, (x) => ({ [x]: {} }))), - F.ifElse(_.isEmpty, _.always({}), _.mergeAll) -) - -// Replace _source value with highlighted result for `fieldName` -let inlineHighlightInSource = (hit, fieldName) => { - if (fieldName.endsWith('.*')) { - // Get the root key e.g. "documents" from "documents.*" - let root = fieldName.split('.*')[0] - // Get all the highlights that start with the root key - let matchedKeys = _.filter( - (key) => _.startsWith(`${root}.`, key), - _.keys(hit.highlight) - ) - _.each((key) => F.setOn(key, hit.highlight[key], hit._source), matchedKeys) - } else { - let highlights = hit.highlight[fieldName] - if (highlights) { - F.setOn( - fieldName, - highlights.length > 1 ? highlights : highlights[0], - hit._source - ) - } - } -} - -let getAdditionalFields = ({ schemaHighlight, hit, include, inlineKeys }) => { - let additionalFields = [] - let { additional, additionalExclusions, inline, nested, nestedPath } = - schemaHighlight - - F.eachIndexed((highlightedValue, fieldName) => { - // Whether `fieldName` is matched by any field name in `additional` - let additionalMatches = anyRegexesMatch(additional, fieldName) - - // Exclude explicit exclusions, inline, and nested highlight fields - let additionalExclusionMatches = - anyRegexesMatch(additionalExclusions, fieldName) || - anyRegexesMatch(inline, fieldName) || - anyRegexesMatch(nested, fieldName) - - // Whether there is an include array and `fieldName` is contained in - // `inline` but is not in `include` - let inlineButNotIncluded = - include && _.includes(fieldName, _.difference(inlineKeys, include)) - - if ( - inlineButNotIncluded || - (additionalMatches && !additionalExclusionMatches) - ) { - additionalFields.push({ - label: fieldName, - value: highlightedValue[0], - }) - } - - if ( - _.includes(fieldName, nested) && - _.isArray(highlightedValue) && - !_.includes(nestedPath, fieldName) - ) { - additionalFields.push({ - label: fieldName, - value: highlightedValue, - }) - } - }, hit.highlight) - - return additionalFields -} - -let handleNested = ({ - schemaHighlight, - nodeHighlight, - hit, - additionalFields, -}) => { - let { nested, nestedPath, filterNested } = schemaHighlight - let replaceTagRegex = replaceHighlightTagRegex(nodeHighlight) - let containsTagRegex = containsHighlightTagRegex(nodeHighlight) - - F.eachIndexed((highlightedValue, fieldName) => { - if ( - _.includes(fieldName, nested) && - !_.find({ label: fieldName }, additionalFields) - ) { - // Clarify [{a}, {b}] case and not [a,b] case. See - // https://github.com/elastic/elasticsearch/issues/7416 - // TODO: We can support arrays of scalars as long as we make sure that - // `number_of_fragments` is 0 for the highlighted field so that we can - // compare the array items in full. - if (fieldName === nestedPath) { - throw new Error('Arrays of scalars not supported') - } +// https://stackoverflow.com/questions/70177601/does-elasticsearch-provide-highlighting-on-copy-to-field-in-their-newer-versio +// https://github.com/elastic/elasticsearch/issues/5172 - let field = fieldName.replace(`${nestedPath}.`, '') - - // For arrays, strip the highlighting wrapping and compare to the array - // contents to match up - for (let val of highlightedValue) { - let originalValue = val.replace(replaceTagRegex, '') - let childItem = _.find( - // TODO: Remove this asap - (item) => _.trim(_.get(field, item)) === _.trim(originalValue), - _.get(nestedPath, hit._source) - ) - if (childItem) F.setOn(field, val, childItem) - } +import _ from 'lodash/fp.js' +import F from 'futil' - if (filterNested) { - let filtered = _.filter( - (arrayField) => containsTagRegex.test(_.get(field, arrayField)), - _.get(nestedPath, hit._source) - ) - F.setOn(nestedPath, filtered, hit._source) +// https://www.elastic.co/guide/en/elasticsearch/reference/8.10/query-dsl.html +const foo = new Set([ + // Full-text queries + 'intervals', + 'match', + 'match_bool_prefix', + 'match_phrase', + 'match_phrase_prefix', + // Term-level queries + 'fuzzy', + 'prefix', + 'regexp', + 'term', + 'terms', + 'terms_set', + 'wildcard', +]) + +const bar = new Set([ + 'query_string', + 'simple_query_string', + 'combined_fields', + 'multi_match', +]) + +/** + * Extract fields relevant for highlighting from an Elastic query DSL. + * + * This function walks the query, looking for whitelisted keys that correspond + * to elastic query names such as "fuzzy" and "match". As such, it may return + * extra fields that do not exist in the index mappings. For example, given the + * following query + * + * ``` + * { "match": { "match": { "query": "city" } } }` + * ``` + * + * this function will return `["match", "query"]` which is incorrect as "query" + * is not a field. This is a reasonable tradeoff to avoid a more comprehensive + * parser and keep the implementation simple. + */ +export const getHighlightFieldsFromQuery = F.reduceTree()( + (fields, query, key) => { + if (_.isPlainObject(query)) { + if (foo.has(key)) { + fields.push(..._.keys(query)) } - } - }, hit.highlight) -} - -// TODO: Support multiple nestedPaths... -// TODO: Support Regex and Function basis for all options -// TODO: Make this function pure, do not mutate `hit._source` -export let highlightResults = ({ - schemaHighlight, // The schema highlight configuration - nodeHighlight, // The result node's highlight configuration - hit, // The ES result - include, // The columns to return -}) => { - let { inline, inlineAliases, nestedPath, filterNested } = schemaHighlight - let inlineKeys = _.keys(arrayToHighlightsFieldMap(inline)) - - let additionalFields = getAdditionalFields({ - schemaHighlight, - hit, - include, - inlineKeys, - }) - - // TODO: Make this function pure, do not mutate `hit._source` - handleNested({ - schemaHighlight, - nodeHighlight, - hit, - additionalFields, - }) - - // TODO: Do not mutate `hit._source` - if (filterNested && _.isEmpty(hit.highlight)) { - F.setOn(nestedPath, [], hit._source) - } - - // Copy over all inline highlighted fields - if (hit.highlight) { - for (let field of inlineKeys) { - // TODO: Make this function pure, do not mutate `hit._source` - inlineHighlightInSource(hit, field) - } - - // Do the field replacement for the inlineAliases fields - for (let [field, mapToField] of _.toPairs(inlineAliases)) { - // if we have a highlight result matching the inlineAliases TO field - if (hit.highlight[mapToField]) { - // if the field is only in inlineAliases OR it is in both but not inlined/highlighted already by the inline section - if ( - !_.includes(field, inlineKeys) || - (_.includes(field, inlineKeys) && !hit.highlight[field]) - ) { - // TODO: Do not mutate `hit._source` - F.setOn(field, hit.highlight[mapToField][0], hit._source) + // Use https://github.com/bripkens/lucene if we decide to parse lucene query + // strings. + if (bar.has(key)) { + fields.push(...(query.fields ?? [])) + if (query.default_field) { + fields.push(query.default_field) } } } - } - - return { additionalFields } -} - -const mergeReplacingArrays = _.mergeWith((target, src) => { - if (_.isArray(src)) return src -}) - -export let getHighlightSettings = (schema, node) => { - // Users can opt-out of highlighting by setting `node.highlight` to `false` - // explicitly. - // TODO: Reconsider if it makes more sense to opt-in instead of opt-out since - // highlighting decreases performance. - let shouldHighlight = - node.highlight !== false && _.isPlainObject(schema.elasticsearch?.highlight) - - // Highlighting starts with defaults in the schema first - if (shouldHighlight) { - // Result nodes can override schema highlighting configuration - let schemaHighlight = mergeReplacingArrays( - schema.elasticsearch.highlight, - node.highlight - ) - - let showOtherMatches = _.getOr(false, 'showOtherMatches', node) - let schemaInline = _.getOr([], 'inline', schemaHighlight) - - // Get field names from `inlineAliases` that are also in `node.include` - let schemaInlineAliases = _.flow( - _.getOr({}, 'inlineAliases'), - _.entries, - _.filter(([k]) => _.includes(k, node.include)), - _.flatten - )(schemaHighlight) - - // Add field names from `node.highlight.fields` to - // `schema.elasticsearch.highlight.inline` so we have them as targets for - // highlight replacement - schemaHighlight = _.set( - 'inline', - _.concat(schemaInline, _.keys(node.highlight?.fields)), - schemaHighlight - ) + return fields + }, + [] +) - // Convert the highlight fields from array to an object map - let fields = _.flow( - _.pick(['inline', 'additionalFields', 'nested']), // Get the highlight fields we will be working with - _.values, - _.flatten, - _.concat(schemaInlineAliases), // Include the provided field aliases if any - _.uniq, - arrayToHighlightsFieldMap, // Convert the array to object map so we can simply _.pick again - (filtered) => - showOtherMatches - ? // Highlight on all fields specified in the initial _.pick above. - filtered - : // Only highlight on the fields listed in the node include section and their aliases (if any) - _.pick(_.concat(node.include, schemaInlineAliases), filtered) - )(schemaHighlight) +export const expandFieldWildcards = _.curry(() => []) - // Properties we support as part of the highlighting configuration that - // elastic does not have knowledge of. - let nonElasticProperties = [ - 'inline', - 'inlineAliases', - 'additional', - 'additionalExclusions', - 'additionalFields', - 'nested', - 'nestedPath', - 'filterNested', - ] +// Also expand `FieldGroup.All.exact` into `description.exact`, `title.exact`, etc... +export const expandFieldGroups = _.curry(() => []) - let nodeHighlight = _.merge( - { - // The default schema highlighting settings w/o the fields - pre_tags: [''], - post_tags: [''], - require_field_match: false, - number_of_fragments: 0, - fields, - }, - _.omit(nonElasticProperties, node.highlight) - ) +// For each field, produce some default configuration based on the mappings (ex. limiting number of fragments for big text blobs) +// - Syncing code should set `{ index_options: 'offsets', meta: { subtype: 'bigtext' } }` +export const makeHighlightConfig = _.curry(() => []) - return { schemaHighlight, nodeHighlight } - } +export const mergeHighlightResults = _.curry(() => []) - return {} -} +export const inlineHighlightResults = _.curry(() => []) diff --git a/packages/provider-elasticsearch/src/example-types/results/highlighting.test.js b/packages/provider-elasticsearch/src/example-types/results/highlighting.test.js index c324eedf7..2952b001a 100644 --- a/packages/provider-elasticsearch/src/example-types/results/highlighting.test.js +++ b/packages/provider-elasticsearch/src/example-types/results/highlighting.test.js @@ -1,396 +1,60 @@ -import { - highlightResults, - arrayToHighlightsFieldMap, - anyRegexesMatch, - replaceHighlightTagRegex, - containsHighlightTagRegex, -} from './highlighting.js' - -let nodeHighlight = { - pre_tags: [''], - post_tags: [''], -} - -describe('highlighting', () => { - describe('highlightResults', () => { - it('should work with includes', () => { - let schemaHighlight = { inline: ['title', 'description', 'summary'] } - let hit = { - _source: { - summary: 'Chromebooks', - dueDate: '2018-10-11', - agencyID: 77985, - agencyStateCode: 'TX', - title: 'Chromebooks', - bidNumber: '8934908', - agencyName: 'Mission Consolidated Independent School Dist. 908', - }, - highlight: { summary: ['a'], description: ['b'], title: ['c'] }, - } - let include = ['title'] - let result = highlightResults({ - schemaHighlight, - nodeHighlight, - hit, - include, - }) - expect(result).toEqual({ - additionalFields: [ - { - label: 'summary', - value: 'a', - }, - { - label: 'description', - value: 'b', - }, - ], - }) - }) - it('should work without includes', () => { - let schemaHighlight = { inline: ['title', 'description', 'summary'] } - let hit = { - _source: { - summary: 'Chromebooks', - dueDate: '2018-10-11', - agencyID: 77985, - agencyStateCode: 'TX', - title: 'Chromebooks', - bidNumber: '8934908', - agencyName: 'Mission Consolidated Independent School Dist. 908', - }, - highlight: { summary: ['a'], description: ['b'], title: ['c'] }, - } - let result = highlightResults({ schemaHighlight, nodeHighlight, hit }) - expect(result).toEqual({ - additionalFields: [], - }) - }) - it('should work with inline', () => { - let schemaHighlight = { - inline: ['title', 'description'], - } - let hit = { - _source: { - title: '...', - description: '...', - }, - highlight: { title: ['foo'], description: ['bar'] }, - } - let result = highlightResults({ schemaHighlight, nodeHighlight, hit }) - expect(hit._source).toEqual({ - title: 'foo', - description: 'bar', - }) - expect(result).toEqual({ - additionalFields: [], - }) - }) - it('should work with inline and .* object', () => { - let schemaHighlight = { - inline: [ - 'title', - 'description', - { - 'documents.*': { - file0: { - number_of_fragments: 1, - }, - }, - }, - ], - } - let hit = { - _source: { - title: '...', - description: '...', - }, - highlight: { - title: ['foo'], - description: ['bar'], - 'documents.file0.parseBoxText': ['fooBar'], - }, - } - let result = highlightResults({ schemaHighlight, nodeHighlight, hit }) - expect(hit._source).toEqual({ - title: 'foo', - description: 'bar', - documents: { - file0: { - parseBoxText: ['fooBar'], - }, - }, - }) - expect(result).toEqual({ - additionalFields: [], - }) - }) - it('should work with inline and object', () => { - let schemaHighlight = { - inline: [ - 'title', - 'description', - { - documents: { - number_of_fragments: 1, - }, - }, - ], - } - let hit = { - _source: { - title: '...', - description: '...', - }, - highlight: { - title: ['foo'], - description: ['bar'], - documents: ['fooBar'], - }, - } - let result = highlightResults({ schemaHighlight, nodeHighlight, hit }) - expect(hit._source).toEqual({ - title: 'foo', - description: 'bar', - documents: 'fooBar', - }) - expect(result).toEqual({ - additionalFields: [], - }) - }) - it('should work with inlineAliases', () => { - let schemaHighlight = { - inline: ['title', 'description'], - inlineAliases: { - description: 'description.exact', - }, - } - let hit = { - _source: { - title: '...', - description: '...', - }, - highlight: { - title: ['foo'], - 'description.exact': ['bar'], - }, - } - let result = highlightResults({ schemaHighlight, nodeHighlight, hit }) - expect(hit._source).toEqual({ - title: 'foo', - description: 'bar', - }) - expect(result).toEqual({ - additionalFields: [], - }) - }) - it('inline should precede inlineAliases', () => { - let schemaHighlight = { - inline: ['description'], - inlineAliases: { - description: 'description.exact', - }, - } - let hit = { - _source: { - description: '...', - }, - highlight: { - description: ['foo'], - 'description.exact': ['bar'], - }, - } - let result = highlightResults({ schemaHighlight, nodeHighlight, hit }) - expect(hit._source).toEqual({ - description: 'foo', - }) - expect(result).toEqual({ - additionalFields: [], - }) - }) - it('arrayToHighlightsFieldMap should work', () => { - let inline = [ - 'title', - 'description', - { - documents: { - number_of_fragments: 1, - }, - }, - ] - let result = arrayToHighlightsFieldMap(inline) - expect(result).toEqual({ - title: {}, - description: {}, - documents: { - number_of_fragments: 1, - }, - }) - }) - it('should work with nested', () => { - let schemaHighlight = { - nested: ['comments.text'], - nestedPath: 'comments', - } - let hit = { - _source: { - title: '...', - description: '...', - comments: [{ text: 'foo' }, { text: 'bar' }, { text: 'baz' }], - }, - highlight: { - 'comments.text': [ - 'foo', - 'bar', - ], - }, - } - let result = highlightResults({ - schemaHighlight, - nodeHighlight, - hit, - include: ['title', 'description', 'comments.text'], - }) - expect(hit._source).toEqual({ - title: '...', - description: '...', - comments: [ - { text: 'foo' }, - { text: 'bar' }, - { text: 'baz' }, - ], - }) - expect(result).toEqual({ - additionalFields: [], - }) - }) - it('should work with nested and filterNested', () => { - let schemaHighlight = { - nested: ['comments.text'], - nestedPath: 'comments', - filterNested: true, - } - let hit = { - _source: { - title: '...', - description: '...', - comments: [{ text: 'foo' }, { text: 'bar' }, { text: 'baz' }], - }, - highlight: { - 'comments.text': [ - 'foo', - 'bar', - ], - }, - } - let result = highlightResults({ - schemaHighlight, - nodeHighlight, - hit, - include: ['title', 'description', 'comments.text'], - }) - expect(hit._source).toEqual({ - title: '...', - description: '...', - comments: [ - { text: 'foo' }, - { text: 'bar' }, - ], - }) - expect(result).toEqual({ - additionalFields: [], - }) - }) - - it('should clear nested when filterNested and highlight is empty', () => { - let schemaHighlight = { - nested: ['comments.text'], - nestedPath: 'comments', - filterNested: true, - } - let hit = { - _source: { - title: '...', - description: '...', - comments: [{ text: 'foo' }, { text: 'bar' }, { text: 'baz' }], - }, - highlight: {}, - } - let result = highlightResults({ - schemaHighlight, - nodeHighlight, - hit, - include: ['title', 'description', 'comments.text'], - }) - expect(hit._source).toEqual({ - title: '...', - description: '...', - comments: [], - }) - expect(result).toEqual({ - additionalFields: [], - }) - }) - }) -}) - -describe('anyRegexesMatch()', () => { - it('should match', () => { - let actual = anyRegexesMatch(['non-matching', 'nested.*'], 'nested.field') - expect(actual).toEqual(true) - }) - - it('should not match', () => { - let actual = anyRegexesMatch( - ['non-matching', 'non-matching.*'], - 'nested.field' - ) - expect(actual).toEqual(false) - }) -}) - -describe('replaceHighlightTagRegex()', () => { - it('should remove all tags from highlighted text', () => { - let regex = replaceHighlightTagRegex({ - pre_tags: ['', ''], - post_tags: ['', ''], - }) - let text = - 'Lorem Ipsum has been the industry standard dummy text ever since the 1500s.' - expect(text.replace(regex, '')).toEqual( - 'Lorem Ipsum has been the industry standard dummy text ever since the 1500s.' - ) - }) -}) - -describe('containsHighlightTagRegex()', () => { - it('should match highlighted text', () => { - let regex = containsHighlightTagRegex({ - pre_tags: ['', ''], - post_tags: ['', ''], - }) - let text = - 'Lorem Ipsum has been the industry standard dummy text ever since the 1500s.' - expect(regex.test(text)).toEqual(true) - }) - - it('should not match non-highlighted text', () => { - let regex = containsHighlightTagRegex({ - pre_tags: ['', ''], - post_tags: ['', ''], - }) - let text = - 'Lorem Ipsum has been the industry standard dummy text ever since the 1500s.' - expect(regex.test(text)).toEqual(false) - }) - - it('should not match non-balanced tags', () => { - let regex = containsHighlightTagRegex({ - pre_tags: ['', ''], - post_tags: ['', ''], - }) - let text = - 'Lorem Ipsum has been the industry standard dummy text ever since the 1500s.' - expect(regex.test(text)).toEqual(false) +import { getHighlightFieldsFromQuery } from './highlighting.js' + +describe('getHighlightFieldsFromQuery()', () => { + it('should extract all fields relevant for highlighting', () => { + const query = { + // Full-text queries + intervals: { 'field:intervals': {} }, + match: { 'field:match': {} }, + match_bool_prefix: { 'field:match_bool_prefix': {} }, + match_phrase: { 'field:match_phrase': {} }, + match_phrase_prefix: { 'field:match_phrase_prefix': {} }, + combined_fields: { + fields: ['field:combined_fields:0', 'field:combined_fields:1'], + }, + multi_match: { + fields: ['field:multi_match:0', 'field:multi_match:1'], + }, + query_string: { + fields: ['field:query_string:0', 'field:query_string:1'], + default_field: 'field:query_string:default', + }, + simple_query_string: { + fields: ['field:simple_query_string:0', 'field:simple_query_string:1'], + default_field: 'field:simple_query_string:default', + }, + // Term-level queries + fuzzy: { 'field:fuzzy': {} }, + prefix: { 'field:prefix': {} }, + regexp: { 'field:regexp': {} }, + term: { 'field:term': {} }, + terms: { 'field:terms': {} }, + terms_set: { 'field:terms_set': {} }, + wildcard: { 'field:wildcard': {} }, + } + expect(getHighlightFieldsFromQuery(query)).toEqual([ + 'field:intervals', + 'field:match', + 'field:match_bool_prefix', + 'field:match_phrase', + 'field:match_phrase_prefix', + 'field:combined_fields:0', + 'field:combined_fields:1', + 'field:multi_match:0', + 'field:multi_match:1', + 'field:query_string:0', + 'field:query_string:1', + 'field:query_string:default', + 'field:simple_query_string:0', + 'field:simple_query_string:1', + 'field:simple_query_string:default', + 'field:fuzzy', + 'field:prefix', + 'field:regexp', + 'field:term', + 'field:terms', + 'field:terms_set', + 'field:wildcard', + ]) }) }) diff --git a/packages/provider-elasticsearch/src/example-types/results/index.js b/packages/provider-elasticsearch/src/example-types/results/index.js index 3217b6bd1..1f056d277 100644 --- a/packages/provider-elasticsearch/src/example-types/results/index.js +++ b/packages/provider-elasticsearch/src/example-types/results/index.js @@ -1,78 +1,77 @@ import F from 'futil' import _ from 'lodash/fp.js' -import { highlightResults, getHighlightSettings } from './highlighting.js' +import { + getHighlightFieldsFromQuery, + expandFieldWildcards, + expandFieldGroups, + makeHighlightConfig, + mergeHighlightResults, + inlineHighlightResults, +} from './highlighting.js' import { getField } from '../../utils/fields.js' export default { validContext: () => true, - result(node, search, schema) { - let page = (node.page || 1) - 1 - let pageSize = node.pageSize || 10 - let startRecord = page * pageSize - let sortField = node.sortField ? getField(schema, node.sortField) : '_score' + async result(node, search, schema) { + const mappings = _.flow( + _.mapValues('elasticsearch'), + F.compactObject + )(schema.fields) - let searchObj = { - from: startRecord, - size: pageSize, - sort: { [sortField]: node.sortDir || 'desc' }, - explain: node.explain, - // Without this, ES7+ stops counting at 10k instead of returning the actual count - track_total_hits: true, - } - - if (node.include || node.exclude) - searchObj._source = F.compactObject({ - includes: node.include, - excludes: node.exclude, - }) - - let resultColumns = node.include + const highlightFieldsNames = _.flow( + getHighlightFieldsFromQuery, + expandFieldWildcards(mappings), + expandFieldGroups(mappings) + )(node._meta.relevantFilters) - let { schemaHighlight, nodeHighlight } = getHighlightSettings(schema, node) + const highlightFields = _.flow( + _.pick(highlightFieldsNames), + _.mapValues(makeHighlightConfig) + )(mappings) - if (nodeHighlight) { - // Setup the DEFAULT highlight config object with the calculated fields above - // and merge with the search specific config - searchObj.highlight = nodeHighlight + const page = (node.page || 1) - 1 + const pageSize = node.pageSize || 10 + const startRecord = page * pageSize + const sortField = node.sortField + ? getField(schema, node.sortField) + : '_score' - // Make sure the search specific overrides are part of the node include. - // This way they do not have to be added manually. All that is needed is the highlight config - resultColumns = _.flow( - _.concat(_.keys((node.highlight ?? {}).fields)), - _.uniq, - _.compact - )(node.include) + const response = await search( + F.omitBlank({ + from: startRecord, + size: pageSize, + sort: { [sortField]: node.sortDir || 'desc' }, + explain: node.explain, + // Without this, ES7+ stops counting at 10k instead of returning the actual count + track_total_hits: true, + _source: F.omitBlank({ + includes: node.include, + excludes: node.exclude, + }), + highlight: node.enableHighlighting && { + pre_tags: [''], + post_tags: [''], + number_of_fragments: 0, + fields: highlightFields, + }, + }) + ) - // Make sure search returns the resultColumns we want by setting the _.source.includes - F.setOn('_source.includes', resultColumns, searchObj) - } + // Handling highlighting results + // - Merge subfields highlights into normal field + // - Replace values in `_source` with their highlights + const results = _.map( + _.flow(mergeHighlightResults(mappings), inlineHighlightResults(mappings)), + response.hits.hits + ) - return search(searchObj).then((results) => ({ - scrollId: results._scroll_id, + return { + scrollId: response._scroll_id, // ES 7+ is total.value, ES 6- is hits.total - totalRecords: F.getOrReturn('value', results.hits.total), + totalRecords: F.getOrReturn('value', response.hits.total), startRecord: startRecord + 1, - endRecord: startRecord + results.hits.hits.length, - results: _.map((hit) => { - let additionalFields - if (schemaHighlight) { - let highlightObject = highlightResults({ - schemaHighlight, // The schema highlight configuration - nodeHighlight, // The result node's highlight configuration - hit, // The ES result - include: resultColumns, // The columns to return - }) - additionalFields = highlightObject.additionalFields - } - - // TODO - If nested path, iterate properties on nested path, filtering - // out nested path results unless mainHighlighted or relevant nested - // fields have "" tags in them - return { - additionalFields: schemaHighlight ? additionalFields : [], - ...hit, - } - }, results.hits.hits), - })) + endRecord: startRecord + response.hits.hits.length, + results, + } }, } diff --git a/packages/server/src/utils.js b/packages/server/src/utils.js index 201c0f0b3..5a4199c9e 100644 --- a/packages/server/src/utils.js +++ b/packages/server/src/utils.js @@ -25,6 +25,7 @@ export let getRelevantFilters = _.curry((groupCombinator, Path, group) => { relevantChildren ) + console.log(relevantChildren) let relevantFilters = _.compact( _.map(getRelevantFilters(groupCombinator, path), relevantChildren) ) From 2891e13979e8ee43d0ccd5cfba66dc047ad80560 Mon Sep 17 00:00:00 2001 From: Alejandro Hernandez Date: Thu, 2 Nov 2023 13:59:17 -0400 Subject: [PATCH 02/30] More work --- .../src/example-types/results/highlighting.js | 110 ++++----- .../results/highlighting.test.js | 233 +++++++++++++----- .../src/example-types/results/index.js | 88 ++++--- packages/provider-elasticsearch/src/schema.js | 15 +- packages/server/src/utils.js | 1 - 5 files changed, 277 insertions(+), 170 deletions(-) diff --git a/packages/provider-elasticsearch/src/example-types/results/highlighting.js b/packages/provider-elasticsearch/src/example-types/results/highlighting.js index a2d78dbb8..171fd0f17 100644 --- a/packages/provider-elasticsearch/src/example-types/results/highlighting.js +++ b/packages/provider-elasticsearch/src/example-types/results/highlighting.js @@ -4,75 +4,55 @@ import _ from 'lodash/fp.js' import F from 'futil' -// https://www.elastic.co/guide/en/elasticsearch/reference/8.10/query-dsl.html -const foo = new Set([ - // Full-text queries - 'intervals', - 'match', - 'match_bool_prefix', - 'match_phrase', - 'match_phrase_prefix', - // Term-level queries - 'fuzzy', - 'prefix', - 'regexp', - 'term', - 'terms', - 'terms_set', - 'wildcard', -]) - -const bar = new Set([ - 'query_string', - 'simple_query_string', - 'combined_fields', - 'multi_match', -]) - -/** - * Extract fields relevant for highlighting from an Elastic query DSL. - * - * This function walks the query, looking for whitelisted keys that correspond - * to elastic query names such as "fuzzy" and "match". As such, it may return - * extra fields that do not exist in the index mappings. For example, given the - * following query - * - * ``` - * { "match": { "match": { "query": "city" } } }` - * ``` - * - * this function will return `["match", "query"]` which is incorrect as "query" - * is not a field. This is a reasonable tradeoff to avoid a more comprehensive - * parser and keep the implementation simple. - */ -export const getHighlightFieldsFromQuery = F.reduceTree()( - (fields, query, key) => { - if (_.isPlainObject(query)) { - if (foo.has(key)) { - fields.push(..._.keys(query)) - } - // Use https://github.com/bripkens/lucene if we decide to parse lucene query - // strings. - if (bar.has(key)) { - fields.push(...(query.fields ?? [])) - if (query.default_field) { - fields.push(query.default_field) +export const inlineSubFieldsMappings = _.curry((subFields, mappings) => + F.reduceIndexed( + (mappings, fieldMapping, fieldName) => { + for (const k in fieldMapping.fields) { + if (subFields[k]?.shouldHighlight) { + mappings[`${fieldName}.${k}`] = { + ...fieldMapping.fields[k], + meta: { ...fieldMapping.meta, isSubField: true }, + copy_to: _.map((f) => `${f}.${k}`, fieldMapping.copy_to), + } } } - } - return fields - }, - [] + return mappings + }, + mappings, + mappings + ) ) -export const expandFieldWildcards = _.curry(() => []) - -// Also expand `FieldGroup.All.exact` into `description.exact`, `title.exact`, etc... -export const expandFieldGroups = _.curry(() => []) - -// For each field, produce some default configuration based on the mappings (ex. limiting number of fragments for big text blobs) -// - Syncing code should set `{ index_options: 'offsets', meta: { subtype: 'bigtext' } }` -export const makeHighlightConfig = _.curry(() => []) +export const makeHighlightConfig = _.curry((query, fieldMapping, fieldName) => { + const config = {} + if (fieldMapping.meta?.subType === 'blob') { + config.order = 'score' + config.fragment_size = 250 + config.number_of_fragments = 3 + } + if (!_.isEmpty(fieldMapping.copy_to)) { + // An improvement would be to only set highlight_query when a field group + // field is present in the query. + const queryHasFieldGroup = F.findNode()( + (val) => _.includes(val, fieldMapping.copy_to), + query + ) + if (queryHasFieldGroup) { + config.highlight_query = F.mapTree()((val) => { + if (_.includes(val, fieldMapping.copy_to)) { + val = fieldName + } + if (_.isPlainObject(val)) { + for (const copy_to of fieldMapping.copy_to) { + F.renamePropertyOn(copy_to, fieldName, val) + } + } + return val + }, query) + } + } + return config +}) export const mergeHighlightResults = _.curry(() => []) diff --git a/packages/provider-elasticsearch/src/example-types/results/highlighting.test.js b/packages/provider-elasticsearch/src/example-types/results/highlighting.test.js index 2952b001a..83f4de01a 100644 --- a/packages/provider-elasticsearch/src/example-types/results/highlighting.test.js +++ b/packages/provider-elasticsearch/src/example-types/results/highlighting.test.js @@ -1,60 +1,183 @@ -import { getHighlightFieldsFromQuery } from './highlighting.js' +import _ from 'lodash/fp' +import { inlineSubFieldsMappings, makeHighlightConfig } from './highlighting.js' -describe('getHighlightFieldsFromQuery()', () => { - it('should extract all fields relevant for highlighting', () => { +describe('inlineSubFieldsMappings()', () => { + it('should inline sub-fields into the top-level mappings', () => { + const subFields = { + keyword: { shouldHighlight: false }, + exact: { shouldHighlight: true }, + } + const mappings = { + all: { + type: 'text', + }, + fieldgroup: { + type: 'text', + }, + age: { + type: 'long', + copy_to: [], + }, + name: { + type: 'text', + copy_to: ['all', 'fieldgroup'], + }, + job: { + type: 'text', + copy_to: [], + fields: { + keyword: { type: 'keyword' }, + exact: { type: 'text', analyzer: 'exact' }, + }, + }, + street: { + type: 'text', + copy_to: ['all', 'fieldgroup'], + fields: { + keyword: { type: 'keyword' }, + exact: { type: 'text', analyzer: 'exact' }, + }, + meta: { subType: 'blob' }, + }, + } + expect(inlineSubFieldsMappings(subFields, mappings)).toEqual({ + ..._.omit(['all', 'fieldgroup'], mappings), + 'job.exact': { + type: 'text', + analyzer: 'exact', + copy_to: [], + meta: { isSubField: true }, + }, + 'street.exact': { + type: 'text', + analyzer: 'exact', + copy_to: ['all.exact', 'fieldgroup.exact'], + meta: { subType: 'blob', isSubField: true }, + }, + }) + }) +}) + +describe('makeHighlightConfig()', () => { + it('should generate default config for blob subtype', () => { + const query = {} + const fieldMapping = { + meta: { subType: 'blob' }, + } + const fieldName = 'street' + const result = makeHighlightConfig(query, fieldMapping, fieldName) + expect(result).toEqual({ + order: 'score', + fragment_size: 250, + number_of_fragments: 3, + }) + }) + + it('should generate highlight_query with field group name replaced by field name', () => { const query = { - // Full-text queries - intervals: { 'field:intervals': {} }, - match: { 'field:match': {} }, - match_bool_prefix: { 'field:match_bool_prefix': {} }, - match_phrase: { 'field:match_phrase': {} }, - match_phrase_prefix: { 'field:match_phrase_prefix': {} }, - combined_fields: { - fields: ['field:combined_fields:0', 'field:combined_fields:1'], - }, - multi_match: { - fields: ['field:multi_match:0', 'field:multi_match:1'], - }, - query_string: { - fields: ['field:query_string:0', 'field:query_string:1'], - default_field: 'field:query_string:default', - }, - simple_query_string: { - fields: ['field:simple_query_string:0', 'field:simple_query_string:1'], - default_field: 'field:simple_query_string:default', - }, - // Term-level queries - fuzzy: { 'field:fuzzy': {} }, - prefix: { 'field:prefix': {} }, - regexp: { 'field:regexp': {} }, - term: { 'field:term': {} }, - terms: { 'field:terms': {} }, - terms_set: { 'field:terms_set': {} }, - wildcard: { 'field:wildcard': {} }, + bool: { + must: [ + { terms: { all: 'city' } }, + { terms: { fieldgroup: 'city' } }, + { query_string: { query: 'city', default_field: 'all' } }, + { query_string: { query: 'city', default_field: 'fieldgroup' } }, + ], + }, } - expect(getHighlightFieldsFromQuery(query)).toEqual([ - 'field:intervals', - 'field:match', - 'field:match_bool_prefix', - 'field:match_phrase', - 'field:match_phrase_prefix', - 'field:combined_fields:0', - 'field:combined_fields:1', - 'field:multi_match:0', - 'field:multi_match:1', - 'field:query_string:0', - 'field:query_string:1', - 'field:query_string:default', - 'field:simple_query_string:0', - 'field:simple_query_string:1', - 'field:simple_query_string:default', - 'field:fuzzy', - 'field:prefix', - 'field:regexp', - 'field:term', - 'field:terms', - 'field:terms_set', - 'field:wildcard', - ]) + const fieldMapping = { + copy_to: ['all', 'fieldgroup'], + } + const fieldName = 'street' + const result = makeHighlightConfig(query, fieldMapping, fieldName) + expect(result).toEqual({ + highlight_query: { + bool: { + must: [ + { terms: { [fieldName]: 'city' } }, + { terms: { [fieldName]: 'city' } }, + { query_string: { query: 'city', default_field: fieldName } }, + { query_string: { query: 'city', default_field: fieldName } }, + ], + }, + }, + }) + }) + + it('should not generate highlight_query when field group name is not in query', () => { + const query = { + bool: { + must: [ + { terms: { age: 'city' } }, + { terms: { name: 'city' } }, + { query_string: { query: 'city', default_field: 'age' } }, + { query_string: { query: 'city', default_field: 'name' } }, + ], + }, + } + const fieldMapping = { + copy_to: ['all', 'fieldgroup'], + } + const fieldName = 'street' + const result = makeHighlightConfig(query, fieldMapping, fieldName) + expect(result).toEqual({}) }) }) + +// describe('getHighlightFieldsFromQuery()', () => { +// it('should extract all fields relevant for highlighting', () => { +// const query = { +// // Full-text queries +// intervals: { 'field:intervals': {} }, +// match: { 'field:match': {} }, +// match_bool_prefix: { 'field:match_bool_prefix': {} }, +// match_phrase: { 'field:match_phrase': {} }, +// match_phrase_prefix: { 'field:match_phrase_prefix': {} }, +// combined_fields: { +// fields: ['field:combined_fields:0', 'field:combined_fields:1'], +// }, +// multi_match: { +// fields: ['field:multi_match:0', 'field:multi_match:1'], +// }, +// query_string: { +// fields: ['field:query_string:0', 'field:query_string:1'], +// default_field: 'field:query_string:default', +// }, +// simple_query_string: { +// fields: ['field:simple_query_string:0', 'field:simple_query_string:1'], +// default_field: 'field:simple_query_string:default', +// }, +// // Term-level queries +// fuzzy: { 'field:fuzzy': {} }, +// prefix: { 'field:prefix': {} }, +// regexp: { 'field:regexp': {} }, +// term: { 'field:term': {} }, +// terms: { 'field:terms': {} }, +// terms_set: { 'field:terms_set': {} }, +// wildcard: { 'field:wildcard': {} }, +// } +// expect(getHighlightFieldsFromQuery(query)).toEqual([ +// 'field:intervals', +// 'field:match', +// 'field:match_bool_prefix', +// 'field:match_phrase', +// 'field:match_phrase_prefix', +// 'field:combined_fields:0', +// 'field:combined_fields:1', +// 'field:multi_match:0', +// 'field:multi_match:1', +// 'field:query_string:0', +// 'field:query_string:1', +// 'field:query_string:default', +// 'field:simple_query_string:0', +// 'field:simple_query_string:1', +// 'field:simple_query_string:default', +// 'field:fuzzy', +// 'field:prefix', +// 'field:regexp', +// 'field:term', +// 'field:terms', +// 'field:terms_set', +// 'field:wildcard', +// ]) +// }) +// }) diff --git a/packages/provider-elasticsearch/src/example-types/results/index.js b/packages/provider-elasticsearch/src/example-types/results/index.js index 1f056d277..d94d4fc8c 100644 --- a/packages/provider-elasticsearch/src/example-types/results/index.js +++ b/packages/provider-elasticsearch/src/example-types/results/index.js @@ -1,34 +1,27 @@ import F from 'futil' import _ from 'lodash/fp.js' import { - getHighlightFieldsFromQuery, - expandFieldWildcards, - expandFieldGroups, + inlineSubFieldsMappings, makeHighlightConfig, mergeHighlightResults, inlineHighlightResults, } from './highlighting.js' import { getField } from '../../utils/fields.js' +const processResults = (schema, results) => { + const mappings = _.flow( + _.mapValues('elasticsearch'), + F.compactObject + )(schema.fields) + return _.map( + _.flow(mergeHighlightResults(mappings), inlineHighlightResults(mappings)), + results + ) +} + export default { validContext: () => true, async result(node, search, schema) { - const mappings = _.flow( - _.mapValues('elasticsearch'), - F.compactObject - )(schema.fields) - - const highlightFieldsNames = _.flow( - getHighlightFieldsFromQuery, - expandFieldWildcards(mappings), - expandFieldGroups(mappings) - )(node._meta.relevantFilters) - - const highlightFields = _.flow( - _.pick(highlightFieldsNames), - _.mapValues(makeHighlightConfig) - )(mappings) - const page = (node.page || 1) - 1 const pageSize = node.pageSize || 10 const startRecord = page * pageSize @@ -36,34 +29,37 @@ export default { ? getField(schema, node.sortField) : '_score' - const response = await search( - F.omitBlank({ - from: startRecord, - size: pageSize, - sort: { [sortField]: node.sortDir || 'desc' }, - explain: node.explain, - // Without this, ES7+ stops counting at 10k instead of returning the actual count - track_total_hits: true, - _source: F.omitBlank({ - includes: node.include, - excludes: node.exclude, - }), - highlight: node.enableHighlighting && { - pre_tags: [''], - post_tags: [''], - number_of_fragments: 0, - fields: highlightFields, - }, - }) + const getHighlightFieldsMappings = _.memoize(() => + _.flow( + _.mapValues('elasticsearch'), + F.compactObject, + inlineSubFieldsMappings(schema.elasticsearch.subFields) + )(schema.fields) ) - // Handling highlighting results - // - Merge subfields highlights into normal field - // - Replace values in `_source` with their highlights - const results = _.map( - _.flow(mergeHighlightResults(mappings), inlineHighlightResults(mappings)), - response.hits.hits - ) + const body = F.omitBlank({ + from: startRecord, + size: pageSize, + sort: { [sortField]: node.sortDir || 'desc' }, + explain: node.explain, + // Without this, ES7+ stops counting at 10k instead of returning the actual count + track_total_hits: true, + _source: F.omitBlank({ + includes: node.include, + excludes: node.exclude, + }), + highlight: node.enableHighlighting && { + pre_tags: [''], + post_tags: [''], + number_of_fragments: 0, + fields: F.mapValuesIndexed( + makeHighlightConfig(node._meta.relevantFilters), + getHighlightFieldsMappings() + ), + }, + }) + + const response = await search(body) return { scrollId: response._scroll_id, @@ -71,7 +67,7 @@ export default { totalRecords: F.getOrReturn('value', response.hits.total), startRecord: startRecord + 1, endRecord: startRecord + response.hits.hits.length, - results, + results: response.hits.hits, } }, } diff --git a/packages/provider-elasticsearch/src/schema.js b/packages/provider-elasticsearch/src/schema.js index 0279c5876..1c1b47cae 100644 --- a/packages/provider-elasticsearch/src/schema.js +++ b/packages/provider-elasticsearch/src/schema.js @@ -29,17 +29,26 @@ let fromEsIndexMapping = (mapping) => { // filters out 'dynamic_templates' (an array), 'dynamic: true', etc. _.pickBy(_.isPlainObject), extractFieldsAndEsType, + // TODO: think about how to let users pass this multi-field config information + _.set('elasticsearch.subFields', { + keyword: { shouldHighlight: false }, + exact: { shouldHighlight: true }, + }), _.update( 'fields', _.flow( flatten, - F.mapValuesIndexed(({ type, fields }, field) => ({ + F.mapValuesIndexed((mapping, field) => ({ field, label: _.startCase(field), elasticsearch: F.compactObject({ - dataType: type, + ...mapping, + dataType: mapping.type, // Find the child notAnalyzedField to set up facet autocomplete vs word - notAnalyzedField: _.findKey({ type: 'keyword' }, fields), + notAnalyzedField: _.findKey( + { type: 'keyword' }, + mapping.fields + ), }), })) ) diff --git a/packages/server/src/utils.js b/packages/server/src/utils.js index 5a4199c9e..201c0f0b3 100644 --- a/packages/server/src/utils.js +++ b/packages/server/src/utils.js @@ -25,7 +25,6 @@ export let getRelevantFilters = _.curry((groupCombinator, Path, group) => { relevantChildren ) - console.log(relevantChildren) let relevantFilters = _.compact( _.map(getRelevantFilters(groupCombinator, path), relevantChildren) ) From 0d64cd187e22baea1796ba4a6dbbc4c23874aee7 Mon Sep 17 00:00:00 2001 From: Alejandro Hernandez Date: Fri, 3 Nov 2023 13:39:28 -0400 Subject: [PATCH 03/30] More changes --- .../src/example-types/results/highlighting.js | 232 ++++++++++--- .../results/highlighting.test.js | 304 +++++++++--------- .../src/example-types/results/index.js | 40 +-- 3 files changed, 341 insertions(+), 235 deletions(-) diff --git a/packages/provider-elasticsearch/src/example-types/results/highlighting.js b/packages/provider-elasticsearch/src/example-types/results/highlighting.js index 171fd0f17..2e315812d 100644 --- a/packages/provider-elasticsearch/src/example-types/results/highlighting.js +++ b/packages/provider-elasticsearch/src/example-types/results/highlighting.js @@ -1,59 +1,193 @@ -// https://stackoverflow.com/questions/70177601/does-elasticsearch-provide-highlighting-on-copy-to-field-in-their-newer-versio -// https://github.com/elastic/elasticsearch/issues/5172 - import _ from 'lodash/fp.js' import F from 'futil' +import { CartesianProduct } from 'js-combinatorics' -export const inlineSubFieldsMappings = _.curry((subFields, mappings) => - F.reduceIndexed( - (mappings, fieldMapping, fieldName) => { - for (const k in fieldMapping.fields) { - if (subFields[k]?.shouldHighlight) { - mappings[`${fieldName}.${k}`] = { - ...fieldMapping.fields[k], - meta: { ...fieldMapping.meta, isSubField: true }, - copy_to: _.map((f) => `${f}.${k}`, fieldMapping.copy_to), - } - } +export const getHighlightFields = (query, schema) => { + const querystr = JSON.stringify(query) + + const highlightSubFields = _.keys( + _.pickBy('shouldHighlight', schema.elasticsearch?.subFields) + ) + + // Set of all fields groups in the mappings, including their cartesian product with + // sub fields. For example, given the schema + // + // { + // elasticsearch: { + // subFields: { + // exact: { shouldHighlight: true } + // } + // }, + // fields: { + // address: {}, + // state: { + // elasticsearch: { + // copy_to: ['address'], + // fields: { exact: {} } + // } + // } + // } + // } + // + // this function will return `["address", "address.exact"]` + // + // See https://www.elastic.co/guide/en/elasticsearch/reference/current/copy-to.html + const allFieldsGroups = new Set( + _.flatMap((field) => { + const copy_to = field.elasticsearch?.copy_to + if (!_.isEmpty(copy_to)) { + const product = new CartesianProduct(copy_to, highlightSubFields) + return [...copy_to, ...Array.from(product).map(_.join('.'))] } - return mappings - }, - mappings, - mappings + return copy_to + }, schema.fields) ) -) - -export const makeHighlightConfig = _.curry((query, fieldMapping, fieldName) => { - const config = {} - if (fieldMapping.meta?.subType === 'blob') { - config.order = 'score' - config.fragment_size = 250 - config.number_of_fragments = 3 + + // Pre-computed list of fields groups present in the query + const queryFieldsGroups = [] + F.walk()((val, key) => { + if (allFieldsGroups.has(val)) queryFieldsGroups.push(val) + if (allFieldsGroups.has(key)) queryFieldsGroups.push(key) + })(query) + + // Only fields whose names are present in the query get highlighted by elastic + // due to us passing `require_field_match:true`. However, we have to consider + // fields groups as well. For example, given that `city` and `street` are + // copied to `address`, elastic won't highlight them in the following request: + // + // { + // "query": { + // "match": { + // "address": { "query": "memphis" } + // } + // }, + // "highlight": { + // "fields": { + // "city": {}, + // "street": {} + // } + // } + // } + // + // Instead, we have to specify a query just for highlighting, making sure we + // replace `address` with the correct field: + // + // { + // "query": { + // "match": { + // "address": { "query": "memphis" } + // } + // }, + // "highlight": { + // "fields": { + // "city": { + // "highlight_query": { + // "match": { + // "city": { "query": "memphis" } + // } + // } + // }, + // "street": { + // "highlight_query": { + // "match": { + // "street": { "query": "memphis" } + // } + // } + // } + // } + // } + // } + // + // This function replaces fields groups in the query with the name of the field + // to highlight. + const getHighlightQuery = (mapping, name) => { + const toReplace = _.intersection(queryFieldsGroups, mapping.copy_to) + if (!_.isEmpty(toReplace)) { + const regexp = new RegExp(_.join('|', toReplace), 'g') + return JSON.parse(_.replace(regexp, name, querystr)) + } } - if (!_.isEmpty(fieldMapping.copy_to)) { - // An improvement would be to only set highlight_query when a field group - // field is present in the query. - const queryHasFieldGroup = F.findNode()( - (val) => _.includes(val, fieldMapping.copy_to), - query - ) - if (queryHasFieldGroup) { - config.highlight_query = F.mapTree()((val) => { - if (_.includes(val, fieldMapping.copy_to)) { - val = fieldName - } - if (_.isPlainObject(val)) { - for (const copy_to of fieldMapping.copy_to) { - F.renamePropertyOn(copy_to, fieldName, val) + + // Transform a field mapping to a field highlighting configuration + // See https://www.elastic.co/guide/en/elasticsearch/reference/current/highlighting.html#override-global-settings + const fieldMappingToHighlightConfig = (mapping, name) => { + const isBlob = mapping.meta?.subType === 'blob' + return F.omitBlank({ + fragment_size: isBlob ? 250 : null, + number_of_fragments: isBlob ? 3 : null, + highlight_query: getHighlightQuery(mapping, name), + }) + } + + // Only fields whose names are present in the query get highlighted by elastic + // due to us passing `require_field_match:true`. However, we have to consider + // sub fields as well. For example, if `city` is a multi-field containing a + // sub-field named `exact`, elastic won't highlight `city` in the following + // request: + // + // { + // "query": { + // "match": { + // "city.exact": { "query": "memphis" } + // } + // }, + // "highlight": { + // "fields": { + // "city": {}, + // } + // } + // } + // + // Instead, we have to match the sub-field verbatim in the highlight config: + // + // { + // "query": { + // "match": { + // "city.exact": { "query": "memphis" } + // } + // }, + // "highlight": { + // "fields": { + // "city.exact": {}, + // } + // } + // } + // + // This function will make mappings for subfields so we can spread them at the + // top-level and send them along with regular fields for elastic to highlight. + const getSubFieldsMappings = (multiFieldMapping, multiFieldName) => + F.reduceIndexed( + (acc, mapping, name) => { + if (schema.elasticsearch.subFields[name]?.shouldHighlight) { + acc[`${multiFieldName}.${name}`] = { + ...mapping, + meta: { ...multiFieldMapping.meta, isSubField: true }, + copy_to: _.map((k) => `${k}.${name}`, multiFieldMapping.copy_to), } } - return val - }, query) - } - } - return config -}) + return acc + }, + {}, + multiFieldMapping.fields + ) -export const mergeHighlightResults = _.curry(() => []) + // Mappings for fields that should be highlighted + const highlightFieldsMappings = F.reduceIndexed( + (acc, { elasticsearch: mapping }, name) => { + if (mapping && !allFieldsGroups.has(name)) { + Object.assign(acc, { + [name]: mapping, + ...getSubFieldsMappings(mapping, name), + }) + } + return acc + }, + {}, + schema.fields + ) -export const inlineHighlightResults = _.curry(() => []) + return F.mapValuesIndexed( + fieldMappingToHighlightConfig, + highlightFieldsMappings + ) +} diff --git a/packages/provider-elasticsearch/src/example-types/results/highlighting.test.js b/packages/provider-elasticsearch/src/example-types/results/highlighting.test.js index 83f4de01a..4add479b0 100644 --- a/packages/provider-elasticsearch/src/example-types/results/highlighting.test.js +++ b/packages/provider-elasticsearch/src/example-types/results/highlighting.test.js @@ -1,183 +1,175 @@ -import _ from 'lodash/fp' -import { inlineSubFieldsMappings, makeHighlightConfig } from './highlighting.js' +import _ from 'lodash/fp.js' +import { getHighlightFields } from './highlighting.js' -describe('inlineSubFieldsMappings()', () => { - it('should inline sub-fields into the top-level mappings', () => { - const subFields = { - keyword: { shouldHighlight: false }, - exact: { shouldHighlight: true }, +describe('getHighlightFields()', () => { + it('should exclude fields without mappings', () => { + const actual = getHighlightFields( + {}, + { + fields: { + other: {}, + state: { elasticsearch: {} }, + 'city.street': { elasticsearch: {} }, + }, + } + ) + const expected = { + state: {}, + 'city.street': {}, } - const mappings = { - all: { - type: 'text', - }, - fieldgroup: { - type: 'text', - }, - age: { - type: 'long', - copy_to: [], - }, - name: { - type: 'text', - copy_to: ['all', 'fieldgroup'], - }, - job: { - type: 'text', - copy_to: [], + expect(actual).toEqual(expected) + }) + + it('should exclude group fields', () => { + const actual = getHighlightFields( + {}, + { fields: { - keyword: { type: 'keyword' }, - exact: { type: 'text', analyzer: 'exact' }, + all: { elasticsearch: {} }, + address: { elasticsearch: {} }, + state: { elasticsearch: { copy_to: ['all', 'address'] } }, + 'city.street': { elasticsearch: { copy_to: ['all', 'address'] } }, + }, + } + ) + const expected = { + state: {}, + 'city.street': {}, + } + expect(actual).toEqual(expected) + }) + + it('should include whitelisted sub fields', () => { + const actual = getHighlightFields( + {}, + { + elasticsearch: { + subFields: { + keyword: { shouldHighlight: false }, + exact: { shouldHighlight: true }, + }, }, - }, - street: { - type: 'text', - copy_to: ['all', 'fieldgroup'], fields: { - keyword: { type: 'keyword' }, - exact: { type: 'text', analyzer: 'exact' }, + state: { + elasticsearch: { + fields: { keyword: {}, exact: {} }, + }, + }, + 'city.street': { + elasticsearch: { + fields: { keyword: {}, exact: {} }, + }, + }, }, - meta: { subType: 'blob' }, - }, + } + ) + const expected = { + state: {}, + 'state.exact': {}, + 'city.street': {}, + 'city.street.exact': {}, } - expect(inlineSubFieldsMappings(subFields, mappings)).toEqual({ - ..._.omit(['all', 'fieldgroup'], mappings), - 'job.exact': { - type: 'text', - analyzer: 'exact', - copy_to: [], - meta: { isSubField: true }, - }, - 'street.exact': { - type: 'text', - analyzer: 'exact', - copy_to: ['all.exact', 'fieldgroup.exact'], - meta: { subType: 'blob', isSubField: true }, - }, - }) + expect(actual).toEqual(expected) }) -}) -describe('makeHighlightConfig()', () => { - it('should generate default config for blob subtype', () => { - const query = {} - const fieldMapping = { - meta: { subType: 'blob' }, + it('should generate configuration for blob text fields', () => { + const actual = getHighlightFields( + {}, + { + elasticsearch: { + subFields: { + exact: { shouldHighlight: true }, + }, + }, + fields: { + state: { + elasticsearch: { + meta: { subType: 'blob' }, + fields: { exact: {} }, + }, + }, + }, + } + ) + const expected = { + state: { + fragment_size: 250, + number_of_fragments: 3, + }, + 'state.exact': { + fragment_size: 250, + number_of_fragments: 3, + }, } - const fieldName = 'street' - const result = makeHighlightConfig(query, fieldMapping, fieldName) - expect(result).toEqual({ - order: 'score', - fragment_size: 250, - number_of_fragments: 3, - }) + expect(actual).toEqual(expected) }) - it('should generate highlight_query with field group name replaced by field name', () => { - const query = { + it('should generate highlight_query with field groups replaced', () => { + const queryWith = (field) => ({ bool: { must: [ - { terms: { all: 'city' } }, - { terms: { fieldgroup: 'city' } }, - { query_string: { query: 'city', default_field: 'all' } }, - { query_string: { query: 'city', default_field: 'fieldgroup' } }, + { terms: { [field]: 'memphis' } }, + { query_string: { query: 'memphis', default_field: field } }, ], }, - } - const fieldMapping = { - copy_to: ['all', 'fieldgroup'], - } - const fieldName = 'street' - const result = makeHighlightConfig(query, fieldMapping, fieldName) - expect(result).toEqual({ - highlight_query: { - bool: { - must: [ - { terms: { [fieldName]: 'city' } }, - { terms: { [fieldName]: 'city' } }, - { query_string: { query: 'city', default_field: fieldName } }, - { query_string: { query: 'city', default_field: fieldName } }, - ], - }, + }) + const actual = getHighlightFields(queryWith('address'), { + fields: { + address: { elasticsearch: {} }, + state: { elasticsearch: { copy_to: ['address'] } }, + 'city.street': { elasticsearch: { copy_to: ['address'] } }, }, }) + const expected = { + state: { + highlight_query: queryWith('state'), + }, + 'city.street': { + highlight_query: queryWith('city.street'), + }, + } + expect(actual).toEqual(expected) }) - it('should not generate highlight_query when field group name is not in query', () => { - const query = { + it('should generate highlight_query with field groups replaced for sub fields', () => { + const queryWith = (field) => ({ bool: { must: [ - { terms: { age: 'city' } }, - { terms: { name: 'city' } }, - { query_string: { query: 'city', default_field: 'age' } }, - { query_string: { query: 'city', default_field: 'name' } }, + { terms: { [field]: 'memphis' } }, + { query_string: { query: 'memphis', default_field: field } }, ], }, + }) + const actual = getHighlightFields(queryWith('address.exact'), { + elasticsearch: { + subFields: { + exact: { shouldHighlight: true }, + }, + }, + fields: { + address: { + elasticsearch: {}, + }, + state: { + elasticsearch: { + copy_to: ['address'], + fields: { exact: {} }, + }, + }, + 'city.street': { + elasticsearch: { + copy_to: ['address'], + fields: { exact: {} }, + }, + }, + }, + }) + const expected = { + state: {}, + 'state.exact': { highlight_query: queryWith('state.exact') }, + 'city.street': {}, + 'city.street.exact': { highlight_query: queryWith('city.street.exact') }, } - const fieldMapping = { - copy_to: ['all', 'fieldgroup'], - } - const fieldName = 'street' - const result = makeHighlightConfig(query, fieldMapping, fieldName) - expect(result).toEqual({}) + expect(actual).toEqual(expected) }) }) - -// describe('getHighlightFieldsFromQuery()', () => { -// it('should extract all fields relevant for highlighting', () => { -// const query = { -// // Full-text queries -// intervals: { 'field:intervals': {} }, -// match: { 'field:match': {} }, -// match_bool_prefix: { 'field:match_bool_prefix': {} }, -// match_phrase: { 'field:match_phrase': {} }, -// match_phrase_prefix: { 'field:match_phrase_prefix': {} }, -// combined_fields: { -// fields: ['field:combined_fields:0', 'field:combined_fields:1'], -// }, -// multi_match: { -// fields: ['field:multi_match:0', 'field:multi_match:1'], -// }, -// query_string: { -// fields: ['field:query_string:0', 'field:query_string:1'], -// default_field: 'field:query_string:default', -// }, -// simple_query_string: { -// fields: ['field:simple_query_string:0', 'field:simple_query_string:1'], -// default_field: 'field:simple_query_string:default', -// }, -// // Term-level queries -// fuzzy: { 'field:fuzzy': {} }, -// prefix: { 'field:prefix': {} }, -// regexp: { 'field:regexp': {} }, -// term: { 'field:term': {} }, -// terms: { 'field:terms': {} }, -// terms_set: { 'field:terms_set': {} }, -// wildcard: { 'field:wildcard': {} }, -// } -// expect(getHighlightFieldsFromQuery(query)).toEqual([ -// 'field:intervals', -// 'field:match', -// 'field:match_bool_prefix', -// 'field:match_phrase', -// 'field:match_phrase_prefix', -// 'field:combined_fields:0', -// 'field:combined_fields:1', -// 'field:multi_match:0', -// 'field:multi_match:1', -// 'field:query_string:0', -// 'field:query_string:1', -// 'field:query_string:default', -// 'field:simple_query_string:0', -// 'field:simple_query_string:1', -// 'field:simple_query_string:default', -// 'field:fuzzy', -// 'field:prefix', -// 'field:regexp', -// 'field:term', -// 'field:terms', -// 'field:terms_set', -// 'field:wildcard', -// ]) -// }) -// }) diff --git a/packages/provider-elasticsearch/src/example-types/results/index.js b/packages/provider-elasticsearch/src/example-types/results/index.js index d94d4fc8c..480687f0c 100644 --- a/packages/provider-elasticsearch/src/example-types/results/index.js +++ b/packages/provider-elasticsearch/src/example-types/results/index.js @@ -1,24 +1,10 @@ +// https://stackoverflow.com/questions/70177601/does-elasticsearch-provide-highlighting-on-copy-to-field-in-their-newer-versio +// https://github.com/elastic/elasticsearch/issues/5172 + import F from 'futil' -import _ from 'lodash/fp.js' -import { - inlineSubFieldsMappings, - makeHighlightConfig, - mergeHighlightResults, - inlineHighlightResults, -} from './highlighting.js' +import { getHighlightFields } from './highlighting.js' import { getField } from '../../utils/fields.js' -const processResults = (schema, results) => { - const mappings = _.flow( - _.mapValues('elasticsearch'), - F.compactObject - )(schema.fields) - return _.map( - _.flow(mergeHighlightResults(mappings), inlineHighlightResults(mappings)), - results - ) -} - export default { validContext: () => true, async result(node, search, schema) { @@ -29,13 +15,9 @@ export default { ? getField(schema, node.sortField) : '_score' - const getHighlightFieldsMappings = _.memoize(() => - _.flow( - _.mapValues('elasticsearch'), - F.compactObject, - inlineSubFieldsMappings(schema.elasticsearch.subFields) - )(schema.fields) - ) + const highlightFields = + node.enableHighlighting && + getHighlightFields(node._meta.relevantFilters, schema) const body = F.omitBlank({ from: startRecord, @@ -48,14 +30,12 @@ export default { includes: node.include, excludes: node.exclude, }), - highlight: node.enableHighlighting && { + highlight: highlightFields && { pre_tags: [''], post_tags: [''], number_of_fragments: 0, - fields: F.mapValuesIndexed( - makeHighlightConfig(node._meta.relevantFilters), - getHighlightFieldsMappings() - ), + require_field_match: true, + fields: highlightFields, }, }) From cc8ea2dfb6740df7434e8ec31fcbc0c5511638c1 Mon Sep 17 00:00:00 2001 From: Alejandro Hernandez Date: Fri, 3 Nov 2023 13:57:42 -0400 Subject: [PATCH 04/30] Fix tests --- .../src/example-types/results/highlighting.js | 400 +++++++++++------- .../results/highlighting.test.js | 179 ++++++-- .../src/example-types/results/index.js | 28 +- .../src/example-types/results/index.test.js | 119 ------ .../src/schema-data/schema-with-types.js | 204 +++++++++ .../src/schema-data/schema-without-types.js | 36 ++ 6 files changed, 660 insertions(+), 306 deletions(-) diff --git a/packages/provider-elasticsearch/src/example-types/results/highlighting.js b/packages/provider-elasticsearch/src/example-types/results/highlighting.js index 2e315812d..177cca388 100644 --- a/packages/provider-elasticsearch/src/example-types/results/highlighting.js +++ b/packages/provider-elasticsearch/src/example-types/results/highlighting.js @@ -2,37 +2,36 @@ import _ from 'lodash/fp.js' import F from 'futil' import { CartesianProduct } from 'js-combinatorics' -export const getHighlightFields = (query, schema) => { - const querystr = JSON.stringify(query) - +/** + * Set of all fields groups in the mappings, including their cartesian product with + * sub fields. For example, given the schema + * + * { + * elasticsearch: { + * subFields: { + * exact: { shouldHighlight: true } + * } + * }, + * fields: { + * address: {}, + * state: { + * elasticsearch: { + * copy_to: ['address'], + * fields: { exact: {} } + * } + * } + * } + * } + * + * this function will return `["address", "address.exact"]` + * + * See https://www.elastic.co/guide/en/elasticsearch/reference/current/copy-to.html + */ +const getAllFieldsGroups = _.memoize((schema) => { const highlightSubFields = _.keys( _.pickBy('shouldHighlight', schema.elasticsearch?.subFields) ) - - // Set of all fields groups in the mappings, including their cartesian product with - // sub fields. For example, given the schema - // - // { - // elasticsearch: { - // subFields: { - // exact: { shouldHighlight: true } - // } - // }, - // fields: { - // address: {}, - // state: { - // elasticsearch: { - // copy_to: ['address'], - // fields: { exact: {} } - // } - // } - // } - // } - // - // this function will return `["address", "address.exact"]` - // - // See https://www.elastic.co/guide/en/elasticsearch/reference/current/copy-to.html - const allFieldsGroups = new Set( + return new Set( _.flatMap((field) => { const copy_to = field.elasticsearch?.copy_to if (!_.isEmpty(copy_to)) { @@ -42,6 +41,84 @@ export const getHighlightFields = (query, schema) => { return copy_to }, schema.fields) ) +}, _.get('elasticsearch.index')) + +/** + * Only fields whose names are present in the query get highlighted by elastic + * due to us passing `require_field_match:true`. However, we have to consider + * sub fields as well. For example, if `city` is a multi-field containing a + * sub-field named `exact`, elastic won't highlight `city` in the following + * request: + * + * { + * "query": { + * "match": { + * "city.exact": { "query": "memphis" } + * } + * }, + * "highlight": { + * "fields": { + * "city": {}, + * } + * } + * } + * + * Instead, we have to match the sub-field verbatim in the highlight config: + * + * { + * "query": { + * "match": { + * "city.exact": { "query": "memphis" } + * } + * }, + * "highlight": { + * "fields": { + * "city.exact": {}, + * } + * } + * } + * + * This function will make mappings for subfields so we can spread them at the + * top-level and send them along with regular fields for elastic to highlight. + */ +const getSubFieldsMappings = (schema, multiFieldMapping, multiFieldName) => + F.reduceIndexed( + (acc, mapping, name) => { + if (schema.elasticsearch.subFields[name]?.shouldHighlight) { + acc[`${multiFieldName}.${name}`] = { + ...mapping, + meta: { ...multiFieldMapping.meta, isSubField: true }, + copy_to: _.map((k) => `${k}.${name}`, multiFieldMapping.copy_to), + } + } + return acc + }, + {}, + multiFieldMapping.fields + ) + +/** Mappings for fields that should be highlighted */ +const getHighlightFieldsMappings = _.memoize((schema) => { + const allFieldsGroups = getAllFieldsGroups(schema) + return F.reduceIndexed( + (acc, { elasticsearch: mapping }, name) => { + if (mapping && !allFieldsGroups.has(name)) { + Object.assign(acc, { + [name]: mapping, + ...getSubFieldsMappings(schema, mapping, name), + }) + } + return acc + }, + {}, + schema.fields + ) +}, _.get('elasticsearch.index')) + +export const getHighlightFields = (schema, query) => { + const allFieldsGroups = getAllFieldsGroups(schema) + + const querystr = JSON.stringify(query) // Pre-computed list of fields groups present in the query const queryFieldsGroups = [] @@ -50,56 +127,76 @@ export const getHighlightFields = (query, schema) => { if (allFieldsGroups.has(key)) queryFieldsGroups.push(key) })(query) - // Only fields whose names are present in the query get highlighted by elastic - // due to us passing `require_field_match:true`. However, we have to consider - // fields groups as well. For example, given that `city` and `street` are - // copied to `address`, elastic won't highlight them in the following request: - // - // { - // "query": { - // "match": { - // "address": { "query": "memphis" } - // } - // }, - // "highlight": { - // "fields": { - // "city": {}, - // "street": {} - // } - // } - // } - // - // Instead, we have to specify a query just for highlighting, making sure we - // replace `address` with the correct field: - // - // { - // "query": { - // "match": { - // "address": { "query": "memphis" } - // } - // }, - // "highlight": { - // "fields": { - // "city": { - // "highlight_query": { - // "match": { - // "city": { "query": "memphis" } - // } - // } - // }, - // "street": { - // "highlight_query": { - // "match": { - // "street": { "query": "memphis" } - // } - // } - // } - // } - // } - // } - // - // This function replaces fields groups in the query with the name of the field - // to highlight. + /** + * Only fields whose names are present in the query get highlighted by elastic + * due to us passing `require_field_match:true`. However, we have to consider + * fields groups as well. For example, given that `city` and `street` are + * copied to `address`, elastic won't highlight them in the following request: + * + * { + * "query": { + * "match": { + * "address": { "query": "memphis" } + * } + * }, + * "highlight": { + * "fields": { + * "city": {}, + * "street": {} + * } + * } + * } + * + * Instead, we have to specify a query just for highlighting, making sure we + * replace `address` with the correct field: + * + * { + * "query": { + * "match": { + * "address": { "query": "memphis" } + * } + * }, + * "highlight": { + * "fields": { + * "city": { + * "highlight_query": { + * "match": { + * "city": { "query": "memphis" } + * } + * } + * }, + * "street": { + * "highlight_query": { + * "match": { + * "street": { "query": "memphis" } + * } + * } + * } + * } + * } + * } + * + * Also, an interesting behavior is that boolean logic has no effect in + * highlighting. The following query will highlight both `memphis` and + * `miami` in the field `city` even though only the first `should` expression + * matches. + * + * { + * "bool": { + * "should": [ + * { "match": { "city": "memphis" } }, + * { + * "bool": { + * "must": [ + * { "match": { "city": "miami" } }, + * { "match": { "state": "" } } + * ] + * } + * } + * ] + * } + * } + */ const getHighlightQuery = (mapping, name) => { const toReplace = _.intersection(queryFieldsGroups, mapping.copy_to) if (!_.isEmpty(toReplace)) { @@ -119,75 +216,86 @@ export const getHighlightFields = (query, schema) => { }) } - // Only fields whose names are present in the query get highlighted by elastic - // due to us passing `require_field_match:true`. However, we have to consider - // sub fields as well. For example, if `city` is a multi-field containing a - // sub-field named `exact`, elastic won't highlight `city` in the following - // request: - // - // { - // "query": { - // "match": { - // "city.exact": { "query": "memphis" } - // } - // }, - // "highlight": { - // "fields": { - // "city": {}, - // } - // } - // } - // - // Instead, we have to match the sub-field verbatim in the highlight config: - // - // { - // "query": { - // "match": { - // "city.exact": { "query": "memphis" } - // } - // }, - // "highlight": { - // "fields": { - // "city.exact": {}, - // } - // } - // } - // - // This function will make mappings for subfields so we can spread them at the - // top-level and send them along with regular fields for elastic to highlight. - const getSubFieldsMappings = (multiFieldMapping, multiFieldName) => - F.reduceIndexed( - (acc, mapping, name) => { - if (schema.elasticsearch.subFields[name]?.shouldHighlight) { - acc[`${multiFieldName}.${name}`] = { - ...mapping, - meta: { ...multiFieldMapping.meta, isSubField: true }, - copy_to: _.map((k) => `${k}.${name}`, multiFieldMapping.copy_to), - } - } - return acc - }, - {}, - multiFieldMapping.fields - ) - - // Mappings for fields that should be highlighted - const highlightFieldsMappings = F.reduceIndexed( - (acc, { elasticsearch: mapping }, name) => { - if (mapping && !allFieldsGroups.has(name)) { - Object.assign(acc, { - [name]: mapping, - ...getSubFieldsMappings(mapping, name), - }) - } - return acc - }, - {}, - schema.fields - ) - return F.mapValuesIndexed( fieldMappingToHighlightConfig, - highlightFieldsMappings + getHighlightFieldsMappings(schema) ) } + +/** + * Returns an array of [start, end] ranges that correspond to substrings + * enclosed in pre/post tags. The ranges correspond to the plain string without + * tags. For example given the tags `{ pre: '', post: '' }`, this + * function will return [[2, 5], [6, 9]] for the string + * + * `A red car` + */ +const getHighlightRanges = (tags, str) => { + let runningTagsLength = 0 + const ranges = [] + const regexp = new RegExp(`${tags.pre}(?.*?)${tags.post}`, 'g') + for (const match of str.matchAll(regexp)) { + const start = match.index - runningTagsLength + const end = start + match.groups.capture.length + ranges.push([start, end]) + runningTagsLength += tags.pre.length + tags.post.length + } + return ranges +} + +/** Wrap substrings given by [start, end] ranges with pre/post tags */ +const highlightFromRanges = (tags, str, ranges) => { + const starts = _.fromPairs(_.map((x) => [x[0]], ranges)) + const ends = _.fromPairs(_.map((x) => [x[1]], ranges)) + const highlighted = str.replace(/./g, (match, index) => { + if (index in starts) return `${tags.pre}${match}` + if (index in ends) return `${tags.post}${match}` + return match + }) + // Sometimes the last word is highlighted so the index for the last tag is + // `str.length` but `replace` only makes it up to `str.length - 1`. + return _.last(_.last(ranges)) === str.length + ? `${highlighted}${tags.post}` + : highlighted +} + +export const mergeHighlights = _.curry((tags, strs) => { + // This may look unnecessary but merging highlights is not cheap and many + // times is not even needed + if (_.size(strs) <= 1) return _.head(strs) + const ranges = F.mergeRanges( + _.flatMap((str) => getHighlightRanges(tags, str), strs) + ) + const plain = _.head(strs).replaceAll(tags.pre, '').replaceAll(tags.post, '') + return highlightFromRanges(tags, plain, ranges) +}) + +// Group sub-fields under their containing multi-fields keys. The only reason +// this is a reduce instead of a groupBy is because we need the keys. +const foo = (schema) => (acc, val, key) => { + const mappings = getHighlightFieldsMappings(schema) + const parts = key.split('.') + const multiFieldName = _.dropRight(1, parts).join('.') + const subFieldName = _.last(parts) + // Will group `name` and `name.{subfield}` under `name` + const name = mappings[multiFieldName]?.fields?.[subFieldName] + ? multiFieldName + : key + acc[name] ??= [] + acc[name].push(val) + return acc +} + +// This function mutates hits for performance reasons +export const inlineHighlightResults = (tags, schema, hits) => { + for (const hit of hits) { + const highlightedFields = _.flow( + _.mapValues(_.head), + F.reduceIndexed(foo(schema), {}), + _.mapValues(mergeHighlights(tags)) + )(hit.highlight) + for (const [key, val] of _.toPairs(highlightedFields)) { + F.setOn(key, val, hit._source) + } + } +} diff --git a/packages/provider-elasticsearch/src/example-types/results/highlighting.test.js b/packages/provider-elasticsearch/src/example-types/results/highlighting.test.js index 4add479b0..a10a56b24 100644 --- a/packages/provider-elasticsearch/src/example-types/results/highlighting.test.js +++ b/packages/provider-elasticsearch/src/example-types/results/highlighting.test.js @@ -1,17 +1,20 @@ -import _ from 'lodash/fp.js' -import { getHighlightFields } from './highlighting.js' +import { + getHighlightFields, + mergeHighlights, + inlineHighlightResults, +} from './highlighting.js' describe('getHighlightFields()', () => { it('should exclude fields without mappings', () => { const actual = getHighlightFields( - {}, { fields: { other: {}, state: { elasticsearch: {} }, 'city.street': { elasticsearch: {} }, }, - } + }, + {} ) const expected = { state: {}, @@ -22,7 +25,6 @@ describe('getHighlightFields()', () => { it('should exclude group fields', () => { const actual = getHighlightFields( - {}, { fields: { all: { elasticsearch: {} }, @@ -30,7 +32,8 @@ describe('getHighlightFields()', () => { state: { elasticsearch: { copy_to: ['all', 'address'] } }, 'city.street': { elasticsearch: { copy_to: ['all', 'address'] } }, }, - } + }, + {} ) const expected = { state: {}, @@ -41,7 +44,6 @@ describe('getHighlightFields()', () => { it('should include whitelisted sub fields', () => { const actual = getHighlightFields( - {}, { elasticsearch: { subFields: { @@ -61,7 +63,8 @@ describe('getHighlightFields()', () => { }, }, }, - } + }, + {} ) const expected = { state: {}, @@ -74,7 +77,6 @@ describe('getHighlightFields()', () => { it('should generate configuration for blob text fields', () => { const actual = getHighlightFields( - {}, { elasticsearch: { subFields: { @@ -89,7 +91,8 @@ describe('getHighlightFields()', () => { }, }, }, - } + }, + {} ) const expected = { state: { @@ -113,13 +116,16 @@ describe('getHighlightFields()', () => { ], }, }) - const actual = getHighlightFields(queryWith('address'), { - fields: { - address: { elasticsearch: {} }, - state: { elasticsearch: { copy_to: ['address'] } }, - 'city.street': { elasticsearch: { copy_to: ['address'] } }, + const actual = getHighlightFields( + { + fields: { + address: { elasticsearch: {} }, + state: { elasticsearch: { copy_to: ['address'] } }, + 'city.street': { elasticsearch: { copy_to: ['address'] } }, + }, }, - }) + queryWith('address') + ) const expected = { state: { highlight_query: queryWith('state'), @@ -140,30 +146,33 @@ describe('getHighlightFields()', () => { ], }, }) - const actual = getHighlightFields(queryWith('address.exact'), { - elasticsearch: { - subFields: { - exact: { shouldHighlight: true }, - }, - }, - fields: { - address: { - elasticsearch: {}, - }, - state: { - elasticsearch: { - copy_to: ['address'], - fields: { exact: {} }, + const actual = getHighlightFields( + { + elasticsearch: { + subFields: { + exact: { shouldHighlight: true }, }, }, - 'city.street': { - elasticsearch: { - copy_to: ['address'], - fields: { exact: {} }, + fields: { + address: { + elasticsearch: {}, + }, + state: { + elasticsearch: { + copy_to: ['address'], + fields: { exact: {} }, + }, + }, + 'city.street': { + elasticsearch: { + copy_to: ['address'], + fields: { exact: {} }, + }, }, }, }, - }) + queryWith('address.exact') + ) const expected = { state: {}, 'state.exact': { highlight_query: queryWith('state.exact') }, @@ -173,3 +182,103 @@ describe('getHighlightFields()', () => { expect(actual).toEqual(expected) }) }) + +describe('mergeHighlights()', () => { + const merge = mergeHighlights({ pre: '', post: '' }) + + it('should merge highlights that do not overlap', () => { + const actual = merge([ + 'The quick brown fox jumps over the lazy dog', + 'The quick brown fox jumps over the lazy dog', + ]) + const expected = + 'The quick brown fox jumps over the lazy dog' + expect(actual).toEqual(expected) + }) + + it('should merge highlights that overlap', () => { + const actual = merge([ + 'The quick brown fox jumps over the lazy dog', + 'The quick brown fox jumps over the lazy dog', + ]) + const expected = 'The quick brown fox jumps over the lazy dog' + expect(actual).toEqual(expected) + }) + + it('should merge highlights that are contained within another', () => { + const actual = merge([ + 'The quick brown fox jumps over the lazy dog', + 'The quick brown fox jumps over the lazy dog', + ]) + const expected = 'The quick brown fox jumps over the lazy dog' + expect(actual).toEqual(expected) + }) + + it('should merge highlights at the end of the string', () => { + const actual = merge([ + 'The quick brown fox jumps over the lazy dog', + 'The quick brown fox jumps over the lazy dog', + ]) + const expected = + 'The quick brown fox jumps over the lazy dog' + expect(actual).toEqual(expected) + }) +}) + +describe('inlineHighlightResults()', () => { + it('works', () => { + const hit = { + _source: { + name: 'John Wayne', + state: 'New Jersey', + 'city.street': 'Jefferson Ave', + }, + highlight: { + state: ['New Jersey'], + 'state.exact': ['New Jersey'], + 'city.street': ['Jefferson Ave'], + 'city.street.exact': ['Jefferson Ave'], + }, + } + inlineHighlightResults( + { + pre: '', + post: '', + }, + { + elasticsearch: { + subFields: { + exact: { shouldHighlight: true }, + }, + }, + fields: { + state: { + elasticsearch: { + fields: { exact: {} }, + }, + }, + 'city.street': { + elasticsearch: { + fields: { exact: {} }, + }, + }, + }, + }, + [hit] + ) + const expected = { + _source: { + name: 'John Wayne', + state: 'New Jersey', + 'city.street': 'Jefferson Ave', + }, + highlight: { + state: ['New Jersey'], + 'state.exact': ['New Jersey'], + 'city.street': ['Jefferson Ave'], + 'city.street.exact': ['Jefferson Ave'], + }, + } + expect(hit).toEqual(expected) + }) +}) diff --git a/packages/provider-elasticsearch/src/example-types/results/index.js b/packages/provider-elasticsearch/src/example-types/results/index.js index 480687f0c..64da0c166 100644 --- a/packages/provider-elasticsearch/src/example-types/results/index.js +++ b/packages/provider-elasticsearch/src/example-types/results/index.js @@ -1,8 +1,9 @@ // https://stackoverflow.com/questions/70177601/does-elasticsearch-provide-highlighting-on-copy-to-field-in-their-newer-versio // https://github.com/elastic/elasticsearch/issues/5172 +import _ from 'lodash/fp.js' import F from 'futil' -import { getHighlightFields } from './highlighting.js' +import { getHighlightFields, inlineHighlightResults } from './highlighting.js' import { getField } from '../../utils/fields.js' export default { @@ -15,9 +16,15 @@ export default { ? getField(schema, node.sortField) : '_score' + const index = schema.elasticsearch.index + + console.time(`${index}:getHighlightFields`) const highlightFields = node.enableHighlighting && - getHighlightFields(node._meta.relevantFilters, schema) + getHighlightFields(schema, node._meta.relevantFilters) + console.timeEnd(`${index}:getHighlightFields`) + + const tags = { pre: '', post: '' } const body = F.omitBlank({ from: startRecord, @@ -31,23 +38,32 @@ export default { excludes: node.exclude, }), highlight: highlightFields && { - pre_tags: [''], - post_tags: [''], + pre_tags: [tags.pre], + post_tags: [tags.post], number_of_fragments: 0, require_field_match: true, fields: highlightFields, }, }) + console.time(`${index}:search`) const response = await search(body) + const results = response.hits.hits + console.timeEnd(`${index}:search`) + + if (node.enableHighlighting) { + console.time(`${index}:inlineHighlightResults`) + inlineHighlightResults(tags, schema, results) + console.timeEnd(`${index}:inlineHighlightResults`) + } return { scrollId: response._scroll_id, // ES 7+ is total.value, ES 6- is hits.total totalRecords: F.getOrReturn('value', response.hits.total), startRecord: startRecord + 1, - endRecord: startRecord + response.hits.hits.length, - results: response.hits.hits, + endRecord: startRecord + results.length, + results, } }, } diff --git a/packages/provider-elasticsearch/src/example-types/results/index.test.js b/packages/provider-elasticsearch/src/example-types/results/index.test.js index d9eb2e510..ad7cd8e59 100644 --- a/packages/provider-elasticsearch/src/example-types/results/index.test.js +++ b/packages/provider-elasticsearch/src/example-types/results/index.test.js @@ -39,7 +39,6 @@ describe('results', () => { { _id: 'test-id', _score: 'test-score', - additionalFields: [], field: 'test field', }, ], @@ -47,7 +46,6 @@ describe('results', () => { node = { key: 'test', type: 'results', - highlight: false, verbose: false, explain: false, } @@ -96,35 +94,6 @@ describe('results', () => { ]) delete node.exclude }) - it('should add fields to "_source.include" if in highlight override', async () => { - schema.elasticsearch.highlight = {} - F.extendOn(node, { - highlight: { - fields: { - myField: {}, - }, - }, - }) - await resultsTest(node, [ - _.extend(expectedCalls[0], { - _source: { - includes: ['myField'], - }, - sort: { - _score: 'desc', - }, - highlight: { - fields: { - myField: {}, - }, - number_of_fragments: 0, - post_tags: [''], - pre_tags: [''], - require_field_match: false, - }, - }), - ]) - }) it('should skip highlight when node highlight is false', async () => { schema.elasticsearch.highlight = {} F.extendOn(node, { highlight: false }) @@ -132,94 +101,6 @@ describe('results', () => { _.extend(expectedCalls[0], { sort: { _score: 'desc' } }), ]) }) - it('should override schema highlight via node highlight', async () => { - schema.elasticsearch.highlight = {} - F.extendOn(node, { - highlight: { - fields: { - myField: { - number_of_fragments: 3, - fragment_size: 250, - order: 'score', - }, - }, - number_of_fragments: 4, - }, - }) - await resultsTest(node, [ - _.extend(expectedCalls[0], { - _source: { - includes: ['myField'], - }, - sort: { - _score: 'desc', - }, - highlight: { - fields: { - myField: { - number_of_fragments: 3, - fragment_size: 250, - order: 'score', - }, - }, - number_of_fragments: 4, - post_tags: [''], - pre_tags: [''], - require_field_match: false, - }, - }), - ]) - }) - it('should highlight additionalFields if showOtherMatches is set', async () => { - schema.elasticsearch.highlight = { test: ['field'] } - service[0].hits.hits[0].anotherField = 'test another field' - F.extendOn(node, { - showOtherMatches: true, - include: 'anotherField', - highlight: true, - }) - expectedResult.results[0].anotherField = 'test another field' - await resultsTest(node, [ - _.extend(expectedCalls[0], { - _source: { - includes: ['anotherField'], - }, - sort: { - _score: 'desc', - }, - highlight: { - fields: {}, - number_of_fragments: 0, - post_tags: [''], - pre_tags: [''], - require_field_match: false, - }, - }), - ]) - }) - it('should not highlight additionalFields if showOtherMatches is not set', async () => { - schema.elasticsearch.highlight = { test: ['field'] } - service[0].hits.hits[0].anotherField = 'test another field' - F.extendOn(node, { include: 'anotherField', highlight: true }) - expectedResult.results[0].anotherField = 'test another field' - await resultsTest(node, [ - _.extend(expectedCalls[0], { - _source: { - includes: ['anotherField'], - }, - sort: { - _score: 'desc', - }, - highlight: { - fields: {}, - number_of_fragments: 0, - post_tags: [''], - pre_tags: [''], - require_field_match: false, - }, - }), - ]) - }) it('should sort on "_score: desc" with no sortField config', () => resultsTest(node, [{ ...expectedCalls[0], sort: { _score: 'desc' } }])) it('should order by sortDir config', async () => { diff --git a/packages/provider-elasticsearch/src/schema-data/schema-with-types.js b/packages/provider-elasticsearch/src/schema-data/schema-with-types.js index 0d63d063c..6c868b74e 100644 --- a/packages/provider-elasticsearch/src/schema-data/schema-with-types.js +++ b/packages/provider-elasticsearch/src/schema-data/schema-with-types.js @@ -3,6 +3,10 @@ export default { elasticsearch: { index: 'movies', type: 'movie', + subFields: { + keyword: { shouldHighlight: false }, + exact: { shouldHighlight: true }, + }, }, fields: { actors: { @@ -11,6 +15,13 @@ export default { elasticsearch: { dataType: 'text', notAnalyzedField: 'keyword', + type: 'text', + fields: { + keyword: { + type: 'keyword', + ignore_above: 256, + }, + }, }, field: 'actors', label: 'Actors', @@ -21,6 +32,13 @@ export default { elasticsearch: { dataType: 'text', notAnalyzedField: 'keyword', + type: 'text', + fields: { + keyword: { + type: 'keyword', + ignore_above: 256, + }, + }, }, field: 'awards', label: 'Awards', @@ -31,6 +49,13 @@ export default { elasticsearch: { dataType: 'text', notAnalyzedField: 'keyword', + type: 'text', + fields: { + keyword: { + type: 'keyword', + ignore_above: 256, + }, + }, }, field: 'countries', label: 'Countries', @@ -41,6 +66,13 @@ export default { elasticsearch: { dataType: 'text', notAnalyzedField: 'keyword', + type: 'text', + fields: { + keyword: { + type: 'keyword', + ignore_above: 256, + }, + }, }, field: 'directors', label: 'Directors', @@ -51,6 +83,13 @@ export default { elasticsearch: { dataType: 'text', notAnalyzedField: 'keyword', + type: 'text', + fields: { + keyword: { + type: 'keyword', + ignore_above: 256, + }, + }, }, field: 'genres', label: 'Genres', @@ -61,6 +100,13 @@ export default { elasticsearch: { dataType: 'text', notAnalyzedField: 'keyword', + type: 'text', + fields: { + keyword: { + type: 'keyword', + ignore_above: 256, + }, + }, }, field: 'imdbId', label: 'Imdb Id', @@ -70,6 +116,7 @@ export default { typeOptions: ['number', 'exists'], elasticsearch: { dataType: 'float', + type: 'float', }, field: 'imdbRating', label: 'Imdb Rating', @@ -79,6 +126,7 @@ export default { typeOptions: ['number', 'exists'], elasticsearch: { dataType: 'long', + type: 'long', }, field: 'imdbVotes', label: 'Imdb Votes', @@ -89,6 +137,13 @@ export default { elasticsearch: { dataType: 'text', notAnalyzedField: 'keyword', + type: 'text', + fields: { + keyword: { + type: 'keyword', + ignore_above: 256, + }, + }, }, field: 'languages', label: 'Languages', @@ -98,6 +153,7 @@ export default { typeOptions: ['number', 'exists'], elasticsearch: { dataType: 'long', + type: 'long', }, field: 'metaScore', label: 'Meta Score', @@ -108,6 +164,13 @@ export default { elasticsearch: { dataType: 'text', notAnalyzedField: 'keyword', + type: 'text', + fields: { + keyword: { + type: 'keyword', + ignore_above: 256, + }, + }, }, field: 'plot', label: 'Plot', @@ -118,6 +181,13 @@ export default { elasticsearch: { dataType: 'text', notAnalyzedField: 'keyword', + type: 'text', + fields: { + keyword: { + type: 'keyword', + ignore_above: 256, + }, + }, }, field: 'poster', label: 'Poster', @@ -128,6 +198,13 @@ export default { elasticsearch: { dataType: 'text', notAnalyzedField: 'keyword', + type: 'text', + fields: { + keyword: { + type: 'keyword', + ignore_above: 256, + }, + }, }, field: 'rated', label: 'Rated', @@ -137,6 +214,7 @@ export default { typeOptions: ['date', 'exists'], elasticsearch: { dataType: 'date', + type: 'date', }, field: 'released', label: 'Released', @@ -146,6 +224,7 @@ export default { typeOptions: ['number', 'exists'], elasticsearch: { dataType: 'long', + type: 'long', }, field: 'runtimeMinutes', label: 'Runtime Minutes', @@ -156,6 +235,13 @@ export default { elasticsearch: { dataType: 'text', notAnalyzedField: 'keyword', + type: 'text', + fields: { + keyword: { + type: 'keyword', + ignore_above: 256, + }, + }, }, field: 'title', label: 'Title', @@ -166,6 +252,13 @@ export default { elasticsearch: { dataType: 'text', notAnalyzedField: 'keyword', + type: 'text', + fields: { + keyword: { + type: 'keyword', + ignore_above: 256, + }, + }, }, field: 'type', label: 'Type', @@ -176,6 +269,13 @@ export default { elasticsearch: { dataType: 'text', notAnalyzedField: 'keyword', + type: 'text', + fields: { + keyword: { + type: 'keyword', + ignore_above: 256, + }, + }, }, field: 'writers', label: 'Writers', @@ -185,6 +285,7 @@ export default { typeOptions: ['number', 'exists'], elasticsearch: { dataType: 'long', + type: 'long', }, field: 'year', label: 'Year', @@ -194,6 +295,7 @@ export default { typeOptions: ['number', 'exists'], elasticsearch: { dataType: 'long', + type: 'long', }, field: 'yearEnded', label: 'Year Ended', @@ -205,6 +307,10 @@ export default { index: 'imdb', type: 'movie', aliasOf: 'movies', + subFields: { + keyword: { shouldHighlight: false }, + exact: { shouldHighlight: true }, + }, }, fields: { actors: { @@ -213,6 +319,13 @@ export default { elasticsearch: { dataType: 'text', notAnalyzedField: 'keyword', + type: 'text', + fields: { + keyword: { + type: 'keyword', + ignore_above: 256, + }, + }, }, field: 'actors', label: 'Actors', @@ -223,6 +336,13 @@ export default { elasticsearch: { dataType: 'text', notAnalyzedField: 'keyword', + type: 'text', + fields: { + keyword: { + type: 'keyword', + ignore_above: 256, + }, + }, }, field: 'awards', label: 'Awards', @@ -233,6 +353,13 @@ export default { elasticsearch: { dataType: 'text', notAnalyzedField: 'keyword', + type: 'text', + fields: { + keyword: { + type: 'keyword', + ignore_above: 256, + }, + }, }, field: 'countries', label: 'Countries', @@ -243,6 +370,13 @@ export default { elasticsearch: { dataType: 'text', notAnalyzedField: 'keyword', + type: 'text', + fields: { + keyword: { + type: 'keyword', + ignore_above: 256, + }, + }, }, field: 'directors', label: 'Directors', @@ -253,6 +387,13 @@ export default { elasticsearch: { dataType: 'text', notAnalyzedField: 'keyword', + type: 'text', + fields: { + keyword: { + type: 'keyword', + ignore_above: 256, + }, + }, }, field: 'genres', label: 'Genres', @@ -263,6 +404,13 @@ export default { elasticsearch: { dataType: 'text', notAnalyzedField: 'keyword', + type: 'text', + fields: { + keyword: { + type: 'keyword', + ignore_above: 256, + }, + }, }, field: 'imdbId', label: 'Imdb Id', @@ -272,6 +420,7 @@ export default { typeOptions: ['number', 'exists'], elasticsearch: { dataType: 'float', + type: 'float', }, field: 'imdbRating', label: 'Imdb Rating', @@ -281,6 +430,7 @@ export default { typeOptions: ['number', 'exists'], elasticsearch: { dataType: 'long', + type: 'long', }, field: 'imdbVotes', label: 'Imdb Votes', @@ -291,6 +441,13 @@ export default { elasticsearch: { dataType: 'text', notAnalyzedField: 'keyword', + type: 'text', + fields: { + keyword: { + type: 'keyword', + ignore_above: 256, + }, + }, }, field: 'languages', label: 'Languages', @@ -300,6 +457,7 @@ export default { typeOptions: ['number', 'exists'], elasticsearch: { dataType: 'long', + type: 'long', }, field: 'metaScore', label: 'Meta Score', @@ -310,6 +468,13 @@ export default { elasticsearch: { dataType: 'text', notAnalyzedField: 'keyword', + type: 'text', + fields: { + keyword: { + type: 'keyword', + ignore_above: 256, + }, + }, }, field: 'plot', label: 'Plot', @@ -320,6 +485,13 @@ export default { elasticsearch: { dataType: 'text', notAnalyzedField: 'keyword', + type: 'text', + fields: { + keyword: { + type: 'keyword', + ignore_above: 256, + }, + }, }, field: 'poster', label: 'Poster', @@ -330,6 +502,13 @@ export default { elasticsearch: { dataType: 'text', notAnalyzedField: 'keyword', + type: 'text', + fields: { + keyword: { + type: 'keyword', + ignore_above: 256, + }, + }, }, field: 'rated', label: 'Rated', @@ -339,6 +518,7 @@ export default { typeOptions: ['date', 'exists'], elasticsearch: { dataType: 'date', + type: 'date', }, field: 'released', label: 'Released', @@ -348,6 +528,7 @@ export default { typeOptions: ['number', 'exists'], elasticsearch: { dataType: 'long', + type: 'long', }, field: 'runtimeMinutes', label: 'Runtime Minutes', @@ -358,6 +539,13 @@ export default { elasticsearch: { dataType: 'text', notAnalyzedField: 'keyword', + type: 'text', + fields: { + keyword: { + type: 'keyword', + ignore_above: 256, + }, + }, }, field: 'title', label: 'Title', @@ -368,6 +556,13 @@ export default { elasticsearch: { dataType: 'text', notAnalyzedField: 'keyword', + type: 'text', + fields: { + keyword: { + type: 'keyword', + ignore_above: 256, + }, + }, }, field: 'type', label: 'Type', @@ -378,6 +573,13 @@ export default { elasticsearch: { dataType: 'text', notAnalyzedField: 'keyword', + type: 'text', + fields: { + keyword: { + type: 'keyword', + ignore_above: 256, + }, + }, }, field: 'writers', label: 'Writers', @@ -387,6 +589,7 @@ export default { typeOptions: ['number', 'exists'], elasticsearch: { dataType: 'long', + type: 'long', }, field: 'year', label: 'Year', @@ -396,6 +599,7 @@ export default { typeOptions: ['number', 'exists'], elasticsearch: { dataType: 'long', + type: 'long', }, field: 'yearEnded', label: 'Year Ended', diff --git a/packages/provider-elasticsearch/src/schema-data/schema-without-types.js b/packages/provider-elasticsearch/src/schema-data/schema-without-types.js index 1e174eae5..227605719 100644 --- a/packages/provider-elasticsearch/src/schema-data/schema-without-types.js +++ b/packages/provider-elasticsearch/src/schema-data/schema-without-types.js @@ -2,6 +2,10 @@ export default { movies: { elasticsearch: { index: 'movies', + subFields: { + keyword: { shouldHighlight: false }, + exact: { shouldHighlight: true }, + }, }, fields: { actors: { @@ -10,6 +14,13 @@ export default { elasticsearch: { dataType: 'text', notAnalyzedField: 'keyword', + type: 'text', + fields: { + keyword: { + type: 'keyword', + ignore_above: 256, + }, + }, }, field: 'actors', label: 'Actors', @@ -20,6 +31,13 @@ export default { elasticsearch: { dataType: 'text', notAnalyzedField: 'keyword', + type: 'text', + fields: { + keyword: { + type: 'keyword', + ignore_above: 256, + }, + }, }, field: 'awards', label: 'Awards', @@ -31,6 +49,10 @@ export default { elasticsearch: { index: 'imdb', aliasOf: 'movies', + subFields: { + keyword: { shouldHighlight: false }, + exact: { shouldHighlight: true }, + }, }, fields: { actors: { @@ -39,6 +61,13 @@ export default { elasticsearch: { dataType: 'text', notAnalyzedField: 'keyword', + type: 'text', + fields: { + keyword: { + type: 'keyword', + ignore_above: 256, + }, + }, }, field: 'actors', label: 'Actors', @@ -49,6 +78,13 @@ export default { elasticsearch: { dataType: 'text', notAnalyzedField: 'keyword', + type: 'text', + fields: { + keyword: { + type: 'keyword', + ignore_above: 256, + }, + }, }, field: 'awards', label: 'Awards', From e4d308be0ee2053c93b27a442ed9577eabe26774 Mon Sep 17 00:00:00 2001 From: Alejandro Hernandez Date: Thu, 9 Nov 2023 14:13:22 -0500 Subject: [PATCH 05/30] Updates --- .../src/example-types/results/highlighting.js | 108 +++++-- .../results/highlighting.test.js | 270 +++++++++++++++--- .../src/example-types/results/index.js | 6 +- .../provider-elasticsearch/src/utils/futil.js | 14 + 4 files changed, 333 insertions(+), 65 deletions(-) diff --git a/packages/provider-elasticsearch/src/example-types/results/highlighting.js b/packages/provider-elasticsearch/src/example-types/results/highlighting.js index 177cca388..aaa0a8a61 100644 --- a/packages/provider-elasticsearch/src/example-types/results/highlighting.js +++ b/packages/provider-elasticsearch/src/example-types/results/highlighting.js @@ -1,6 +1,7 @@ import _ from 'lodash/fp.js' import F from 'futil' import { CartesianProduct } from 'js-combinatorics' +import { groupByIndexed } from '../../utils/futil.js' /** * Set of all fields groups in the mappings, including their cartesian product with @@ -259,7 +260,7 @@ const highlightFromRanges = (tags, str, ranges) => { : highlighted } -export const mergeHighlights = _.curry((tags, strs) => { +export const mergeHighlights = (tags, ...strs) => { // This may look unnecessary but merging highlights is not cheap and many // times is not even needed if (_.size(strs) <= 1) return _.head(strs) @@ -268,34 +269,91 @@ export const mergeHighlights = _.curry((tags, strs) => { ) const plain = _.head(strs).replaceAll(tags.pre, '').replaceAll(tags.post, '') return highlightFromRanges(tags, plain, ranges) -}) - -// Group sub-fields under their containing multi-fields keys. The only reason -// this is a reduce instead of a groupBy is because we need the keys. -const foo = (schema) => (acc, val, key) => { - const mappings = getHighlightFieldsMappings(schema) - const parts = key.split('.') - const multiFieldName = _.dropRight(1, parts).join('.') - const subFieldName = _.last(parts) - // Will group `name` and `name.{subfield}` under `name` - const name = mappings[multiFieldName]?.fields?.[subFieldName] - ? multiFieldName - : key - acc[name] ??= [] - acc[name].push(val) - return acc } // This function mutates hits for performance reasons -export const inlineHighlightResults = (tags, schema, hits) => { +export const inlineHighlightResults = (tags, schema, highlightConfig, hits) => { + const arrayFields = _.flow( + _.pickBy({ elasticsearch: { meta: { subType: 'array' } } }), + _.keys + )(schema.fields) + + const isSubFieldOf = (field, subField) => + !!schema.fields[field]?.elasticsearch?.fields?.[subField] + + const lastWordRegex = /\.(\w+)$/ + const getFieldKey = (val, key) => { + const [field, sub] = key.split(lastWordRegex) + return isSubFieldOf(field, sub) ? field : key + } + for (const hit of hits) { - const highlightedFields = _.flow( - _.mapValues(_.head), - F.reduceIndexed(foo(schema), {}), - _.mapValues(mergeHighlights(tags)) - )(hit.highlight) - for (const [key, val] of _.toPairs(highlightedFields)) { - F.setOn(key, val, hit._source) + const highlights = F.reduceIndexed( + (acc, fragments, field) => { + const arrayField = _.find((k) => field.startsWith(k), arrayFields) + + if (!arrayField) { + acc[field] = mergeHighlights(tags, ...fragments) + return acc + } + + const nestedField = field.slice(arrayField.length).replace('.', '') + const array = _.get(arrayField, hit._source) + + if (!array) { + acc[arrayField] = nestedField + ? _.map((fragment) => _.set(nestedField, fragment, {}), fragments) + : fragments + } else { + const fragmentsMap = _.reduce( + (acc, fragment) => { + const plain = fragment + .replaceAll(tags.pre, '') + .replaceAll(tags.post, '') + acc[plain] = fragment + return acc + }, + {}, + fragments + ) + + acc[arrayField] = [] + + for (let index in array) { + if (nestedField) { + const fragment = fragmentsMap[_.get(nestedField, array[index])] + const item = highlightConfig.filterSourceArrays + ? undefined + : _.get(nestedField, array[index]) + acc[arrayField].push( + _.set(nestedField, fragment ?? item, array[index]) + ) + } else { + const fragment = fragmentsMap[array[index]] + const item = highlightConfig.filterSourceArrays + ? undefined + : array[index] + acc[arrayField].push(fragment ?? item) + } + } + + if (highlightConfig.filterSourceArrays) { + acc[arrayField] = _.remove( + (item) => + _.isUndefined(nestedField ? _.get(nestedField, item) : item), + acc[arrayField] + ) + } + } + + return acc + }, + {}, + _.mapValues(_.flatten, groupByIndexed(getFieldKey, hit.highlight)) + ) + + for (const [field, val] of _.toPairs(highlights)) { + F.setOn(field, val, hit._source) } } } diff --git a/packages/provider-elasticsearch/src/example-types/results/highlighting.test.js b/packages/provider-elasticsearch/src/example-types/results/highlighting.test.js index a10a56b24..435343705 100644 --- a/packages/provider-elasticsearch/src/example-types/results/highlighting.test.js +++ b/packages/provider-elasticsearch/src/example-types/results/highlighting.test.js @@ -4,6 +4,8 @@ import { inlineHighlightResults, } from './highlighting.js' +const tags = { pre: '', post: '' } + describe('getHighlightFields()', () => { it('should exclude fields without mappings', () => { const actual = getHighlightFields( @@ -184,41 +186,43 @@ describe('getHighlightFields()', () => { }) describe('mergeHighlights()', () => { - const merge = mergeHighlights({ pre: '', post: '' }) - it('should merge highlights that do not overlap', () => { - const actual = merge([ + const actual = mergeHighlights( + tags, 'The quick brown fox jumps over the lazy dog', - 'The quick brown fox jumps over the lazy dog', - ]) + 'The quick brown fox jumps over the lazy dog' + ) const expected = 'The quick brown fox jumps over the lazy dog' expect(actual).toEqual(expected) }) it('should merge highlights that overlap', () => { - const actual = merge([ + const actual = mergeHighlights( + tags, 'The quick brown fox jumps over the lazy dog', - 'The quick brown fox jumps over the lazy dog', - ]) + 'The quick brown fox jumps over the lazy dog' + ) const expected = 'The quick brown fox jumps over the lazy dog' expect(actual).toEqual(expected) }) it('should merge highlights that are contained within another', () => { - const actual = merge([ + const actual = mergeHighlights( + tags, 'The quick brown fox jumps over the lazy dog', - 'The quick brown fox jumps over the lazy dog', - ]) + 'The quick brown fox jumps over the lazy dog' + ) const expected = 'The quick brown fox jumps over the lazy dog' expect(actual).toEqual(expected) }) it('should merge highlights at the end of the string', () => { - const actual = merge([ + const actual = mergeHighlights( + tags, 'The quick brown fox jumps over the lazy dog', - 'The quick brown fox jumps over the lazy dog', - ]) + 'The quick brown fox jumps over the lazy dog' + ) const expected = 'The quick brown fox jumps over the lazy dog' expect(actual).toEqual(expected) @@ -226,12 +230,12 @@ describe('mergeHighlights()', () => { }) describe('inlineHighlightResults()', () => { - it('works', () => { + it('should work', () => { const hit = { _source: { name: 'John Wayne', state: 'New Jersey', - 'city.street': 'Jefferson Ave', + city: { street: 'Jefferson Ave' }, }, highlight: { state: ['New Jersey'], @@ -240,37 +244,31 @@ describe('inlineHighlightResults()', () => { 'city.street.exact': ['Jefferson Ave'], }, } - inlineHighlightResults( - { - pre: '', - post: '', + const schema = { + elasticsearch: { + subFields: { + exact: { shouldHighlight: true }, + }, }, - { - elasticsearch: { - subFields: { - exact: { shouldHighlight: true }, + fields: { + state: { + elasticsearch: { + fields: { exact: {} }, }, }, - fields: { - state: { - elasticsearch: { - fields: { exact: {} }, - }, - }, - 'city.street': { - elasticsearch: { - fields: { exact: {} }, - }, + 'city.street': { + elasticsearch: { + fields: { exact: {} }, }, }, }, - [hit] - ) + } + inlineHighlightResults(tags, schema, {}, [hit]) const expected = { _source: { name: 'John Wayne', state: 'New Jersey', - 'city.street': 'Jefferson Ave', + city: { street: 'Jefferson Ave' }, }, highlight: { state: ['New Jersey'], @@ -281,4 +279,202 @@ describe('inlineHighlightResults()', () => { } expect(hit).toEqual(expected) }) + + describe('arrays of strings', () => { + const schema = { + fields: { + 'city.street': { elasticsearch: { meta: { subType: 'array' } } }, + }, + } + + it('should inline array of strings when source is empty', () => { + const hit = { + _source: {}, + highlight: { + 'city.street': ['Collins Ave.', 'Meridian St.'], + }, + } + inlineHighlightResults(tags, schema, {}, [hit]) + expect(hit._source).toEqual({ + city: { + street: ['Collins Ave.', 'Meridian St.'], + }, + }) + }) + + it('should inline array of strings when source has value', () => { + const hit = { + _source: { + city: { + street: ['Jefferson Ave.', 'Meridian St.', 'Collins Ave.'], + }, + }, + highlight: { + 'city.street': ['Collins Ave.', 'Meridian St.'], + }, + } + inlineHighlightResults(tags, schema, {}, [hit]) + expect(hit._source).toEqual({ + city: { + street: [ + 'Jefferson Ave.', + 'Meridian St.', + 'Collins Ave.', + ], + }, + }) + }) + + it('should inline and filter array of strings when source is empty', () => { + const hit = { + _source: {}, + highlight: { + 'city.street': ['Collins Ave.', 'Meridian St.'], + }, + } + inlineHighlightResults(tags, schema, { filterSourceArrays: true }, [hit]) + expect(hit._source).toEqual({ + city: { + street: ['Collins Ave.', 'Meridian St.'], + }, + }) + }) + + it('should inline and filter array of strings when source has value', () => { + const hit = { + _source: { + city: { + street: [ + 'Jefferson Ave.', + 'Washington St.', + 'Meridian St.', + 'Collins Ave.', + 'Ocean Drive', + ], + }, + }, + highlight: { + 'city.street': ['Collins Ave.', 'Meridian St.'], + }, + } + inlineHighlightResults(tags, schema, { filterSourceArrays: true }, [hit]) + expect(hit._source).toEqual({ + city: { + street: ['Meridian St.', 'Collins Ave.'], + }, + }) + }) + }) + + describe('arrays of objects', () => { + const schema = { + fields: { + 'city.street': { elasticsearch: { meta: { subType: 'array' } } }, + 'city.street.name': {}, + }, + } + + it('should inline array of objects when source is empty', () => { + const hit = { + _source: {}, + highlight: { + 'city.street.name': [ + 'Collins Ave.', + 'Meridian St.', + ], + }, + } + inlineHighlightResults(tags, schema, {}, [hit]) + expect(hit._source).toEqual({ + city: { + street: [ + { name: 'Collins Ave.' }, + { name: 'Meridian St.' }, + ], + }, + }) + }) + + it('should inline array of objects when source has value', () => { + const hit = { + _source: { + city: { + street: [ + { number: 101, name: 'Jefferson Ave.' }, + { number: 235, name: 'Meridian St.' }, + { number: 9, name: 'Collins Ave.' }, + ], + }, + }, + highlight: { + 'city.street.name': [ + 'Collins Ave.', + 'Meridian St.', + ], + }, + } + inlineHighlightResults(tags, schema, {}, [hit]) + expect(hit._source).toEqual({ + city: { + street: [ + { number: 101, name: 'Jefferson Ave.' }, + { number: 235, name: 'Meridian St.' }, + { number: 9, name: 'Collins Ave.' }, + ], + }, + }) + }) + + it('should inline and filter array of objects when source is empty', () => { + const hit = { + _source: {}, + highlight: { + 'city.street.name': [ + 'Collins Ave.', + 'Meridian St.', + ], + }, + } + inlineHighlightResults(tags, schema, { filterSourceArrays: true }, [hit]) + expect(hit._source).toEqual({ + city: { + street: [ + { name: 'Collins Ave.' }, + { name: 'Meridian St.' }, + ], + }, + }) + }) + + it('should inline and filter array of objects when source has value', () => { + const hit = { + _source: { + city: { + street: [ + { number: 101, name: 'Jefferson Ave.' }, + { number: 789, name: 'Washington St.' }, + { number: 235, name: 'Meridian St.' }, + { number: 9, name: 'Collins Ave.' }, + { number: 655, name: 'Ocean Drive' }, + ], + }, + }, + highlight: { + 'city.street.name': [ + 'Collins Ave.', + 'Meridian St.', + ], + }, + } + inlineHighlightResults(tags, schema, { filterSourceArrays: true }, [hit]) + expect(hit._source).toEqual({ + city: { + street: [ + { number: 235, name: 'Meridian St.' }, + { number: 9, name: 'Collins Ave.' }, + ], + }, + }) + }) + }) }) diff --git a/packages/provider-elasticsearch/src/example-types/results/index.js b/packages/provider-elasticsearch/src/example-types/results/index.js index 64da0c166..982ddfe2e 100644 --- a/packages/provider-elasticsearch/src/example-types/results/index.js +++ b/packages/provider-elasticsearch/src/example-types/results/index.js @@ -20,7 +20,7 @@ export default { console.time(`${index}:getHighlightFields`) const highlightFields = - node.enableHighlighting && + node.highlight?.enable && getHighlightFields(schema, node._meta.relevantFilters) console.timeEnd(`${index}:getHighlightFields`) @@ -51,9 +51,9 @@ export default { const results = response.hits.hits console.timeEnd(`${index}:search`) - if (node.enableHighlighting) { + if (node.highlight?.enable) { console.time(`${index}:inlineHighlightResults`) - inlineHighlightResults(tags, schema, results) + inlineHighlightResults(tags, schema, node.highlight, results) console.timeEnd(`${index}:inlineHighlightResults`) } diff --git a/packages/provider-elasticsearch/src/utils/futil.js b/packages/provider-elasticsearch/src/utils/futil.js index dea42062c..ebd16074a 100644 --- a/packages/provider-elasticsearch/src/utils/futil.js +++ b/packages/provider-elasticsearch/src/utils/futil.js @@ -99,3 +99,17 @@ export let renameOn = (from, to, obj) => { // Async version of compactMap (and indexed) export let compactMapAsync = async (...args) => _.compact(await Promise.all(F.mapIndexed(...args))) + +// _.groupBy but also passing the current key +export const groupByIndexed = _.curry((it, coll) => + F.reduceIndexed( + (acc, val, key) => { + const k = _.iteratee(it)(val, key) + acc[k] ??= [] + acc[k].push(val) + return acc + }, + {}, + coll + ) +) From 4e7e1932032325e810573d091a2de32abd82fe93 Mon Sep 17 00:00:00 2001 From: Alejandro Hernandez Date: Thu, 9 Nov 2023 15:39:55 -0500 Subject: [PATCH 06/30] Handle arrays when there are no highlights --- .../src/example-types/results/highlighting.js | 28 ++++-- .../results/highlighting.test.js | 90 +++++++++++++++++++ .../src/example-types/results/index.js | 2 +- 3 files changed, 111 insertions(+), 9 deletions(-) diff --git a/packages/provider-elasticsearch/src/example-types/results/highlighting.js b/packages/provider-elasticsearch/src/example-types/results/highlighting.js index aaa0a8a61..ca2b817ba 100644 --- a/packages/provider-elasticsearch/src/example-types/results/highlighting.js +++ b/packages/provider-elasticsearch/src/example-types/results/highlighting.js @@ -273,18 +273,20 @@ export const mergeHighlights = (tags, ...strs) => { // This function mutates hits for performance reasons export const inlineHighlightResults = (tags, schema, highlightConfig, hits) => { - const arrayFields = _.flow( - _.pickBy({ elasticsearch: { meta: { subType: 'array' } } }), - _.keys - )(schema.fields) + const isSubType = _.curry( + (subType, field) => field?.elasticsearch?.meta?.subType === subType + ) + + const isSubField = _.curry( + (subField, field) => !!field?.elasticsearch?.fields?.[subField] + ) - const isSubFieldOf = (field, subField) => - !!schema.fields[field]?.elasticsearch?.fields?.[subField] + const arrayFields = _.keys(_.pickBy(isSubType('array'), schema.fields)) const lastWordRegex = /\.(\w+)$/ const getFieldKey = (val, key) => { const [field, sub] = key.split(lastWordRegex) - return isSubFieldOf(field, sub) ? field : key + return isSubField(sub, schema.fields[field]) ? field : key } for (const hit of hits) { @@ -293,7 +295,11 @@ export const inlineHighlightResults = (tags, schema, highlightConfig, hits) => { const arrayField = _.find((k) => field.startsWith(k), arrayFields) if (!arrayField) { - acc[field] = mergeHighlights(tags, ...fragments) + if (isSubType('blob', schema.fields[field])) { + acc[field] = fragments + } else { + acc[field] = mergeHighlights(tags, ...fragments) + } return acc } @@ -352,6 +358,12 @@ export const inlineHighlightResults = (tags, schema, highlightConfig, hits) => { _.mapValues(_.flatten, groupByIndexed(getFieldKey, hit.highlight)) ) + if (highlightConfig.filterSourceArrays) { + for (const field of arrayFields) { + highlights[field] ??= [] + } + } + for (const [field, val] of _.toPairs(highlights)) { F.setOn(field, val, hit._source) } diff --git a/packages/provider-elasticsearch/src/example-types/results/highlighting.test.js b/packages/provider-elasticsearch/src/example-types/results/highlighting.test.js index 435343705..cdc11cc3f 100644 --- a/packages/provider-elasticsearch/src/example-types/results/highlighting.test.js +++ b/packages/provider-elasticsearch/src/example-types/results/highlighting.test.js @@ -280,6 +280,50 @@ describe('inlineHighlightResults()', () => { expect(hit).toEqual(expected) }) + it('should not merge fragments for blob text fields', () => { + const hit = { + _source: {}, + highlight: { + blob: [ + 'Meridian St.', + 'Collins Ave.', + 'Ocean Drive', + ], + 'blob.exact': [ + 'Jefferson Ave.', + 'Washington St.', + 'Lincoln Rd.', + ], + }, + } + const schema = { + elasticsearch: { + subFields: { + exact: { shouldHighlight: true }, + }, + }, + fields: { + blob: { + elasticsearch: { + meta: { subType: 'blob' }, + fields: { exact: {} }, + }, + }, + }, + } + inlineHighlightResults(tags, schema, {}, [hit]) + expect(hit._source).toEqual({ + blob: [ + 'Meridian St.', + 'Collins Ave.', + 'Ocean Drive', + 'Jefferson Ave.', + 'Washington St.', + 'Lincoln Rd.', + ], + }) + }) + describe('arrays of strings', () => { const schema = { fields: { @@ -364,6 +408,29 @@ describe('inlineHighlightResults()', () => { }, }) }) + + it('should inline source array with empty array when there are no highlights', () => { + const hit = { + _source: { + city: { + street: [ + 'Jefferson Ave.', + 'Washington St.', + 'Meridian St.', + 'Collins Ave.', + 'Ocean Drive', + ], + }, + }, + highlight: {}, + } + inlineHighlightResults(tags, schema, { filterSourceArrays: true }, [hit]) + expect(hit._source).toEqual({ + city: { + street: [], + }, + }) + }) }) describe('arrays of objects', () => { @@ -476,5 +543,28 @@ describe('inlineHighlightResults()', () => { }, }) }) + + it('should inline source array with empty array when there are no highlights', () => { + const hit = { + _source: { + city: { + street: [ + { number: 101, name: 'Jefferson Ave.' }, + { number: 789, name: 'Washington St.' }, + { number: 235, name: 'Meridian St.' }, + { number: 9, name: 'Collins Ave.' }, + { number: 655, name: 'Ocean Drive' }, + ], + }, + }, + highlight: {}, + } + inlineHighlightResults(tags, schema, { filterSourceArrays: true }, [hit]) + expect(hit._source).toEqual({ + city: { + street: [], + }, + }) + }) }) }) diff --git a/packages/provider-elasticsearch/src/example-types/results/index.js b/packages/provider-elasticsearch/src/example-types/results/index.js index 982ddfe2e..a3d7e7172 100644 --- a/packages/provider-elasticsearch/src/example-types/results/index.js +++ b/packages/provider-elasticsearch/src/example-types/results/index.js @@ -1,7 +1,6 @@ // https://stackoverflow.com/questions/70177601/does-elasticsearch-provide-highlighting-on-copy-to-field-in-their-newer-versio // https://github.com/elastic/elasticsearch/issues/5172 -import _ from 'lodash/fp.js' import F from 'futil' import { getHighlightFields, inlineHighlightResults } from './highlighting.js' import { getField } from '../../utils/fields.js' @@ -18,6 +17,7 @@ export default { const index = schema.elasticsearch.index + console.info('') console.time(`${index}:getHighlightFields`) const highlightFields = node.highlight?.enable && From c0d80cff62242180a0ca1b4a4b3ef60e136a8e33 Mon Sep 17 00:00:00 2001 From: Alejandro Hernandez Date: Thu, 9 Nov 2023 15:42:28 -0500 Subject: [PATCH 07/30] Rename highlighting files to reduce diff --- .../src/example-types/results/{highlighting.js => highlight.js} | 0 .../results/{highlighting.test.js => highlight.test.js} | 2 +- .../provider-elasticsearch/src/example-types/results/index.js | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) rename packages/provider-elasticsearch/src/example-types/results/{highlighting.js => highlight.js} (100%) rename packages/provider-elasticsearch/src/example-types/results/{highlighting.test.js => highlight.test.js} (99%) diff --git a/packages/provider-elasticsearch/src/example-types/results/highlighting.js b/packages/provider-elasticsearch/src/example-types/results/highlight.js similarity index 100% rename from packages/provider-elasticsearch/src/example-types/results/highlighting.js rename to packages/provider-elasticsearch/src/example-types/results/highlight.js diff --git a/packages/provider-elasticsearch/src/example-types/results/highlighting.test.js b/packages/provider-elasticsearch/src/example-types/results/highlight.test.js similarity index 99% rename from packages/provider-elasticsearch/src/example-types/results/highlighting.test.js rename to packages/provider-elasticsearch/src/example-types/results/highlight.test.js index cdc11cc3f..088f6e7a4 100644 --- a/packages/provider-elasticsearch/src/example-types/results/highlighting.test.js +++ b/packages/provider-elasticsearch/src/example-types/results/highlight.test.js @@ -2,7 +2,7 @@ import { getHighlightFields, mergeHighlights, inlineHighlightResults, -} from './highlighting.js' +} from './highlight.js' const tags = { pre: '', post: '' } diff --git a/packages/provider-elasticsearch/src/example-types/results/index.js b/packages/provider-elasticsearch/src/example-types/results/index.js index a3d7e7172..f8f426217 100644 --- a/packages/provider-elasticsearch/src/example-types/results/index.js +++ b/packages/provider-elasticsearch/src/example-types/results/index.js @@ -2,7 +2,7 @@ // https://github.com/elastic/elasticsearch/issues/5172 import F from 'futil' -import { getHighlightFields, inlineHighlightResults } from './highlighting.js' +import { getHighlightFields, inlineHighlightResults } from './highlight.js' import { getField } from '../../utils/fields.js' export default { From a0c07a8fc2923fbfac7a21c1d01f29dffab9ad11 Mon Sep 17 00:00:00 2001 From: Alejandro Hernandez Date: Thu, 9 Nov 2023 17:09:28 -0500 Subject: [PATCH 08/30] Purge mentions of highlight --- .../src/nodes/__snapshots__/results.test.js.snap | 12 ------------ packages/export/src/nodes/results.test.js | 1 - packages/provider-elasticsearch/README.md | 7 +++---- .../src/example-types/results/highlight.js | 10 +++++----- packages/react/src/stories/imdb/utils/contexture.js | 4 ---- 5 files changed, 8 insertions(+), 26 deletions(-) diff --git a/packages/export/src/nodes/__snapshots__/results.test.js.snap b/packages/export/src/nodes/__snapshots__/results.test.js.snap index 43aeba2fa..7dddbc696 100644 --- a/packages/export/src/nodes/__snapshots__/results.test.js.snap +++ b/packages/export/src/nodes/__snapshots__/results.test.js.snap @@ -45,7 +45,6 @@ exports[`results with contexts not wrapped in \`response\` retrieves records 1` "totalRecords": 3, }, }, - "highlight": false, "include": [ "a", "b", @@ -108,7 +107,6 @@ exports[`results with contexts not wrapped in \`response\` retrieves records 1` "totalRecords": 3, }, }, - "highlight": false, "include": [ "a", "b", @@ -172,7 +170,6 @@ exports[`results with contexts not wrapped in \`response\` retrieves records 1` "totalRecords": 3, }, }, - "highlight": false, "include": [ "a", "b", @@ -239,7 +236,6 @@ exports[`results with contexts not wrapped in \`response\` retrieves records 1` "totalRecords": 3, }, }, - "highlight": false, "include": [ "a", "b", @@ -303,7 +299,6 @@ exports[`results with contexts not wrapped in \`response\` retrieves records 1` "totalRecords": 3, }, }, - "highlight": false, "include": [ "a", "b", @@ -368,7 +363,6 @@ exports[`results with contexts not wrapped in \`response\` retrieves records 1` "totalRecords": 3, }, }, - "highlight": false, "include": [ "a", "b", @@ -437,7 +431,6 @@ exports[`results with contexts wrapped in \`response\` retrieves records 1`] = ], "totalRecords": 3, }, - "highlight": false, "include": [ "a", "b", @@ -498,7 +491,6 @@ exports[`results with contexts wrapped in \`response\` retrieves records 1`] = ], "totalRecords": 3, }, - "highlight": false, "include": [ "a", "b", @@ -560,7 +552,6 @@ exports[`results with contexts wrapped in \`response\` retrieves records 1`] = ], "totalRecords": 3, }, - "highlight": false, "include": [ "a", "b", @@ -625,7 +616,6 @@ exports[`results with contexts wrapped in \`response\` retrieves records 1`] = ], "totalRecords": 3, }, - "highlight": false, "include": [ "a", "b", @@ -687,7 +677,6 @@ exports[`results with contexts wrapped in \`response\` retrieves records 1`] = ], "totalRecords": 3, }, - "highlight": false, "include": [ "a", "b", @@ -750,7 +739,6 @@ exports[`results with contexts wrapped in \`response\` retrieves records 1`] = ], "totalRecords": 3, }, - "highlight": false, "include": [ "a", "b", diff --git a/packages/export/src/nodes/results.test.js b/packages/export/src/nodes/results.test.js index a6feee523..1ace3c0ca 100644 --- a/packages/export/src/nodes/results.test.js +++ b/packages/export/src/nodes/results.test.js @@ -46,7 +46,6 @@ describe('results', () => { page: 1, totalPages: 1, include, - highlight: false, sortField: 'a', sortDir: 'desc', ...strategyParams, diff --git a/packages/provider-elasticsearch/README.md b/packages/provider-elasticsearch/README.md index a17ad38a8..d65395675 100644 --- a/packages/provider-elasticsearch/README.md +++ b/packages/provider-elasticsearch/README.md @@ -20,10 +20,9 @@ This provider takes a config object as a parameter: Schemas with with an elasticsearch provider can specify any or all of the following properties: -| Option | Type | Description | Required | -| ----------- | -------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------- | -| `index` | `string` | Which ES index to use when querying | x | -| `highlight` | `object` | Used by `results` to determine what fields to highlight, and whether or not they are `inline` (copied over inline on to the source) or `additional` (in a list of additional fields that matched) | | +| Option | Type | Description | Required | +| ------- | -------- | ----------------------------------- | -------- | +| `index` | `string` | Which ES index to use when querying | x | ### Example Schema for SomeType in SomeIndex diff --git a/packages/provider-elasticsearch/src/example-types/results/highlight.js b/packages/provider-elasticsearch/src/example-types/results/highlight.js index ca2b817ba..fab3a01cc 100644 --- a/packages/provider-elasticsearch/src/example-types/results/highlight.js +++ b/packages/provider-elasticsearch/src/example-types/results/highlight.js @@ -272,7 +272,7 @@ export const mergeHighlights = (tags, ...strs) => { } // This function mutates hits for performance reasons -export const inlineHighlightResults = (tags, schema, highlightConfig, hits) => { +export const inlineHighlightResults = (tags, schema, highlight, hits) => { const isSubType = _.curry( (subType, field) => field?.elasticsearch?.meta?.subType === subType ) @@ -328,7 +328,7 @@ export const inlineHighlightResults = (tags, schema, highlightConfig, hits) => { for (let index in array) { if (nestedField) { const fragment = fragmentsMap[_.get(nestedField, array[index])] - const item = highlightConfig.filterSourceArrays + const item = highlight.filterSourceArrays ? undefined : _.get(nestedField, array[index]) acc[arrayField].push( @@ -336,14 +336,14 @@ export const inlineHighlightResults = (tags, schema, highlightConfig, hits) => { ) } else { const fragment = fragmentsMap[array[index]] - const item = highlightConfig.filterSourceArrays + const item = highlight.filterSourceArrays ? undefined : array[index] acc[arrayField].push(fragment ?? item) } } - if (highlightConfig.filterSourceArrays) { + if (highlight.filterSourceArrays) { acc[arrayField] = _.remove( (item) => _.isUndefined(nestedField ? _.get(nestedField, item) : item), @@ -358,7 +358,7 @@ export const inlineHighlightResults = (tags, schema, highlightConfig, hits) => { _.mapValues(_.flatten, groupByIndexed(getFieldKey, hit.highlight)) ) - if (highlightConfig.filterSourceArrays) { + if (highlight.filterSourceArrays) { for (const field of arrayFields) { highlights[field] ??= [] } diff --git a/packages/react/src/stories/imdb/utils/contexture.js b/packages/react/src/stories/imdb/utils/contexture.js index 4fe07c482..3635df886 100644 --- a/packages/react/src/stories/imdb/utils/contexture.js +++ b/packages/react/src/stories/imdb/utils/contexture.js @@ -24,10 +24,6 @@ export let schemas = { elasticsearch: { index: 'movies', type: 'movie', - highlight: { - inline: ['title'], - additional: 'writers', - }, }, modeMap: { word: '', From 58ee43d57102d0068e3daa3a7cf8c9320f4a9571 Mon Sep 17 00:00:00 2001 From: Alejandro Hernandez Date: Fri, 10 Nov 2023 11:52:45 -0500 Subject: [PATCH 09/30] Cleanup code --- .../src/example-types/results/highlight.js | 189 ++++----- .../example-types/results/highlight.test.js | 382 +++++++++++------- .../src/example-types/results/index.js | 51 +-- .../provider-elasticsearch/src/utils/futil.js | 4 + 4 files changed, 356 insertions(+), 270 deletions(-) diff --git a/packages/provider-elasticsearch/src/example-types/results/highlight.js b/packages/provider-elasticsearch/src/example-types/results/highlight.js index fab3a01cc..bcd279d1d 100644 --- a/packages/provider-elasticsearch/src/example-types/results/highlight.js +++ b/packages/provider-elasticsearch/src/example-types/results/highlight.js @@ -1,7 +1,7 @@ import _ from 'lodash/fp.js' import F from 'futil' import { CartesianProduct } from 'js-combinatorics' -import { groupByIndexed } from '../../utils/futil.js' +import { groupByIndexed, setOrReturn } from '../../utils/futil.js' /** * Set of all fields groups in the mappings, including their cartesian product with @@ -10,7 +10,7 @@ import { groupByIndexed } from '../../utils/futil.js' * { * elasticsearch: { * subFields: { - * exact: { shouldHighlight: true } + * exact: { highlight: true } * } * }, * fields: { @@ -30,7 +30,7 @@ import { groupByIndexed } from '../../utils/futil.js' */ const getAllFieldsGroups = _.memoize((schema) => { const highlightSubFields = _.keys( - _.pickBy('shouldHighlight', schema.elasticsearch?.subFields) + _.pickBy('highlight', schema.elasticsearch?.subFields) ) return new Set( _.flatMap((field) => { @@ -85,7 +85,7 @@ const getAllFieldsGroups = _.memoize((schema) => { const getSubFieldsMappings = (schema, multiFieldMapping, multiFieldName) => F.reduceIndexed( (acc, mapping, name) => { - if (schema.elasticsearch.subFields[name]?.shouldHighlight) { + if (schema.elasticsearch.subFields[name]?.highlight) { acc[`${multiFieldName}.${name}`] = { ...mapping, meta: { ...multiFieldMapping.meta, isSubField: true }, @@ -231,141 +231,124 @@ export const getHighlightFields = (schema, query) => { * * `A red car` */ -const getHighlightRanges = (tags, str) => { +const getHighlightRanges = (pre, post, str) => { let runningTagsLength = 0 const ranges = [] - const regexp = new RegExp(`${tags.pre}(?.*?)${tags.post}`, 'g') + const regexp = new RegExp(`${pre}(?.*?)${post}`, 'g') for (const match of str.matchAll(regexp)) { const start = match.index - runningTagsLength const end = start + match.groups.capture.length ranges.push([start, end]) - runningTagsLength += tags.pre.length + tags.post.length + runningTagsLength += pre.length + post.length } return ranges } /** Wrap substrings given by [start, end] ranges with pre/post tags */ -const highlightFromRanges = (tags, str, ranges) => { +const highlightFromRanges = (pre, post, str, ranges) => { const starts = _.fromPairs(_.map((x) => [x[0]], ranges)) const ends = _.fromPairs(_.map((x) => [x[1]], ranges)) const highlighted = str.replace(/./g, (match, index) => { - if (index in starts) return `${tags.pre}${match}` - if (index in ends) return `${tags.post}${match}` + if (index in starts) return `${pre}${match}` + if (index in ends) return `${post}${match}` return match }) // Sometimes the last word is highlighted so the index for the last tag is // `str.length` but `replace` only makes it up to `str.length - 1`. return _.last(_.last(ranges)) === str.length - ? `${highlighted}${tags.post}` + ? `${highlighted}${post}` : highlighted } -export const mergeHighlights = (tags, ...strs) => { +export const mergeHighlights = (pre, post, ...strs) => { // This may look unnecessary but merging highlights is not cheap and many // times is not even needed if (_.size(strs) <= 1) return _.head(strs) const ranges = F.mergeRanges( - _.flatMap((str) => getHighlightRanges(tags, str), strs) + _.flatMap((str) => getHighlightRanges(pre, post, str), strs) ) - const plain = _.head(strs).replaceAll(tags.pre, '').replaceAll(tags.post, '') - return highlightFromRanges(tags, plain, ranges) + const plain = _.head(strs).replaceAll(pre, '').replaceAll(post, '') + return highlightFromRanges(pre, post, plain, ranges) } -// This function mutates hits for performance reasons -export const inlineHighlightResults = (tags, schema, highlight, hits) => { - const isSubType = _.curry( - (subType, field) => field?.elasticsearch?.meta?.subType === subType - ) +const stripTags = _.curry((pre, post, fragment) => + fragment.replaceAll(pre, '').replaceAll(post, '') +) - const isSubField = _.curry( - (subField, field) => !!field?.elasticsearch?.fields?.[subField] +export const highlightArray = (array, fragments, config) => { + if (_.isEmpty(array)) { + return _.map( + (fragment) => setOrReturn(config.fragmentPath, fragment, {}), + fragments + ) + } + const fragmentsMap = F.arrayToObject( + stripTags(config.pre_tag, config.post_tag), + _.identity, + fragments ) + return _.reduce( + (acc, item) => { + const plain = F.getOrReturn(config.fragmentPath, item) + const fragment = fragmentsMap[plain] + return config.filterSourceArrays && fragment === undefined + ? acc + : F.push(setOrReturn(config.fragmentPath, fragment ?? plain, item), acc) + }, + [], + array + ) +} - const arrayFields = _.keys(_.pickBy(isSubType('array'), schema.fields)) +// Best-effort naming here :/ +export const alignHighlightsWithSourceStructure = (schema, highlightConfig) => { + const arrayFields = _.pickBy( + { elasticsearch: { meta: { subType: 'array' } } }, + schema.fields + ) + const emptyArrayFields = _.mapValues(_.constant([]), arrayFields) + const arrayFieldsNames = _.keys(arrayFields) + const getArrayFieldName = (field) => + _.find((k) => field.startsWith(k), arrayFieldsNames) const lastWordRegex = /\.(\w+)$/ - const getFieldKey = (val, key) => { - const [field, sub] = key.split(lastWordRegex) - return isSubField(sub, schema.fields[field]) ? field : key + const getMultiFieldName = (field) => { + const [multi, sub] = field.split(lastWordRegex) + return schema.fields[multi]?.elasticsearch?.fields?.[sub] ? multi : field } - for (const hit of hits) { - const highlights = F.reduceIndexed( - (acc, fragments, field) => { - const arrayField = _.find((k) => field.startsWith(k), arrayFields) - - if (!arrayField) { - if (isSubType('blob', schema.fields[field])) { - acc[field] = fragments - } else { - acc[field] = mergeHighlights(tags, ...fragments) - } - return acc - } - - const nestedField = field.slice(arrayField.length).replace('.', '') - const array = _.get(arrayField, hit._source) - - if (!array) { - acc[arrayField] = nestedField - ? _.map((fragment) => _.set(nestedField, fragment, {}), fragments) - : fragments - } else { - const fragmentsMap = _.reduce( - (acc, fragment) => { - const plain = fragment - .replaceAll(tags.pre, '') - .replaceAll(tags.post, '') - acc[plain] = fragment - return acc - }, - {}, - fragments - ) - - acc[arrayField] = [] - - for (let index in array) { - if (nestedField) { - const fragment = fragmentsMap[_.get(nestedField, array[index])] - const item = highlight.filterSourceArrays - ? undefined - : _.get(nestedField, array[index]) - acc[arrayField].push( - _.set(nestedField, fragment ?? item, array[index]) - ) - } else { - const fragment = fragmentsMap[array[index]] - const item = highlight.filterSourceArrays - ? undefined - : array[index] - acc[arrayField].push(fragment ?? item) - } - } + const getHighlightedArray = (fragments, field, source) => { + const arrayPath = getArrayFieldName(field) + return highlightArray(_.get(arrayPath, source), fragments, { + ...highlightConfig, + fragmentPath: field.slice(arrayPath.length + 1), // +1 strips off leading dot + }) + } - if (highlight.filterSourceArrays) { - acc[arrayField] = _.remove( - (item) => - _.isUndefined(nestedField ? _.get(nestedField, item) : item), - acc[arrayField] + return (hit) => + _.flow( + // Group `city` and `city.exact` under `city` + groupByIndexed((v, k) => getMultiFieldName(k)), + _.mapValues(_.flatten), + // Transform highlighted segments into something that can be used to + // replace source values + F.mapValuesIndexed((fragments, field) => + getArrayFieldName(field) + ? getHighlightedArray(fragments, field, hit._source) + : schema.fields[field]?.elasticsearch?.meta?.subType === 'blob' + ? fragments + : mergeHighlights( + highlightConfig.pre_tag, + highlightConfig.post_tag, + ...fragments ) - } - } - - return acc - }, - {}, - _.mapValues(_.flatten, groupByIndexed(getFieldKey, hit.highlight)) - ) - - if (highlight.filterSourceArrays) { - for (const field of arrayFields) { - highlights[field] ??= [] - } - } - - for (const [field, val] of _.toPairs(highlights)) { - F.setOn(field, val, hit._source) - } - } + ), + // Rename `streets.name` to `streets` if `streets` is an array field so + // that we can simply replace arrays wholesale in the source. + _.mapKeys((field) => getArrayFieldName(field) ?? field), + // Default to empty arrays if source arrays should be filtered but no + // highlights come back. That way the source arrays will get replaced with + // empty arrays when highlights are inlined. + _.defaults(highlightConfig.filterSourceArrays ? emptyArrayFields : {}) + )(hit.highlight) } diff --git a/packages/provider-elasticsearch/src/example-types/results/highlight.test.js b/packages/provider-elasticsearch/src/example-types/results/highlight.test.js index 088f6e7a4..3e8b985fe 100644 --- a/packages/provider-elasticsearch/src/example-types/results/highlight.test.js +++ b/packages/provider-elasticsearch/src/example-types/results/highlight.test.js @@ -1,10 +1,11 @@ import { getHighlightFields, mergeHighlights, - inlineHighlightResults, + alignHighlightsWithSourceStructure, + highlightArray, } from './highlight.js' -const tags = { pre: '', post: '' } +const highlightConfig = { pre_tag: '', post_tag: '' } describe('getHighlightFields()', () => { it('should exclude fields without mappings', () => { @@ -49,8 +50,8 @@ describe('getHighlightFields()', () => { { elasticsearch: { subFields: { - keyword: { shouldHighlight: false }, - exact: { shouldHighlight: true }, + keyword: { highlight: false }, + exact: { highlight: true }, }, }, fields: { @@ -82,7 +83,7 @@ describe('getHighlightFields()', () => { { elasticsearch: { subFields: { - exact: { shouldHighlight: true }, + exact: { highlight: true }, }, }, fields: { @@ -152,7 +153,7 @@ describe('getHighlightFields()', () => { { elasticsearch: { subFields: { - exact: { shouldHighlight: true }, + exact: { highlight: true }, }, }, fields: { @@ -188,7 +189,8 @@ describe('getHighlightFields()', () => { describe('mergeHighlights()', () => { it('should merge highlights that do not overlap', () => { const actual = mergeHighlights( - tags, + highlightConfig.pre_tag, + highlightConfig.post_tag, 'The quick brown fox jumps over the lazy dog', 'The quick brown fox jumps over the lazy dog' ) @@ -199,7 +201,8 @@ describe('mergeHighlights()', () => { it('should merge highlights that overlap', () => { const actual = mergeHighlights( - tags, + highlightConfig.pre_tag, + highlightConfig.post_tag, 'The quick brown fox jumps over the lazy dog', 'The quick brown fox jumps over the lazy dog' ) @@ -209,7 +212,8 @@ describe('mergeHighlights()', () => { it('should merge highlights that are contained within another', () => { const actual = mergeHighlights( - tags, + highlightConfig.pre_tag, + highlightConfig.post_tag, 'The quick brown fox jumps over the lazy dog', 'The quick brown fox jumps over the lazy dog' ) @@ -219,7 +223,8 @@ describe('mergeHighlights()', () => { it('should merge highlights at the end of the string', () => { const actual = mergeHighlights( - tags, + highlightConfig.pre_tag, + highlightConfig.post_tag, 'The quick brown fox jumps over the lazy dog', 'The quick brown fox jumps over the lazy dog' ) @@ -229,25 +234,83 @@ describe('mergeHighlights()', () => { }) }) -describe('inlineHighlightResults()', () => { - it('should work', () => { - const hit = { - _source: { - name: 'John Wayne', - state: 'New Jersey', - city: { street: 'Jefferson Ave' }, +describe('highlightArray()', () => { + it('should return ordered array of fragments', () => { + const actual = highlightArray( + ['Meridian St.', 'Collins Ave.'], + ['Collins Ave.', 'Meridian St.'], + highlightConfig + ) + const expected = ['Meridian St.', 'Collins Ave.'] + expect(actual).toEqual(expected) + }) + + it('should return ordered and filtered array of fragments', () => { + const actual = highlightArray( + ['Meridian St.', 'Collins Ave.', 'Raunch Rd.'], + ['Collins Ave.', 'Meridian St.'], + { ...highlightConfig, filterSourceArrays: true } + ) + const expected = ['Meridian St.', 'Collins Ave.'] + expect(actual).toEqual(expected) + }) + + it('should return ordered array of objects with fragments', () => { + const actual = highlightArray( + [ + { state: 'Florida', city: { number: 405, street: 'Meridian St.' } }, + { state: 'Georgia', city: { number: 235, street: 'Collins Ave.' } }, + ], + ['Collins Ave.', 'Meridian St.'], + { ...highlightConfig, fragmentPath: 'city.street' } + ) + const expected = [ + { + state: 'Florida', + city: { number: 405, street: 'Meridian St.' }, }, - highlight: { - state: ['New Jersey'], - 'state.exact': ['New Jersey'], - 'city.street': ['Jefferson Ave'], - 'city.street.exact': ['Jefferson Ave'], + { + state: 'Georgia', + city: { number: 235, street: 'Collins Ave.' }, }, - } + ] + expect(actual).toEqual(expected) + }) + + it('should return ordered and filtered array of objects with fragments', () => { + const actual = highlightArray( + [ + { state: 'Florida', city: { number: 405, street: 'Meridian St.' } }, + { state: 'Georgia', city: { number: 235, street: 'Collins Ave.' } }, + { state: 'Iowa', city: { number: 111, street: 'Raunch Rd.' } }, + ], + ['Collins Ave.', 'Meridian St.'], + { + ...highlightConfig, + fragmentPath: 'city.street', + filterSourceArrays: true, + } + ) + const expected = [ + { + state: 'Florida', + city: { number: 405, street: 'Meridian St.' }, + }, + { + state: 'Georgia', + city: { number: 235, street: 'Collins Ave.' }, + }, + ] + expect(actual).toEqual(expected) + }) +}) + +describe('alignHighlightsWithSourceStructure()', () => { + describe('text fields', () => { const schema = { elasticsearch: { subFields: { - exact: { shouldHighlight: true }, + exact: { highlight: true }, }, }, fields: { @@ -263,43 +326,38 @@ describe('inlineHighlightResults()', () => { }, }, } - inlineHighlightResults(tags, schema, {}, [hit]) - const expected = { - _source: { - name: 'John Wayne', + + it('should merge fragments', () => { + const hit = { + _source: { + name: 'John Wayne', + state: 'New Jersey', + city: { street: 'Jefferson Ave' }, + }, + highlight: { + state: ['New Jersey'], + 'state.exact': ['New Jersey'], + 'city.street': ['Jefferson Ave'], + 'city.street.exact': ['Jefferson Ave'], + }, + } + const actual = alignHighlightsWithSourceStructure( + schema, + highlightConfig + )(hit) + const expected = { state: 'New Jersey', - city: { street: 'Jefferson Ave' }, - }, - highlight: { - state: ['New Jersey'], - 'state.exact': ['New Jersey'], - 'city.street': ['Jefferson Ave'], - 'city.street.exact': ['Jefferson Ave'], - }, - } - expect(hit).toEqual(expected) + 'city.street': 'Jefferson Ave', + } + expect(actual).toEqual(expected) + }) }) - it('should not merge fragments for blob text fields', () => { - const hit = { - _source: {}, - highlight: { - blob: [ - 'Meridian St.', - 'Collins Ave.', - 'Ocean Drive', - ], - 'blob.exact': [ - 'Jefferson Ave.', - 'Washington St.', - 'Lincoln Rd.', - ], - }, - } + describe('blob text fields', () => { const schema = { elasticsearch: { subFields: { - exact: { shouldHighlight: true }, + exact: { highlight: true }, }, }, fields: { @@ -311,16 +369,38 @@ describe('inlineHighlightResults()', () => { }, }, } - inlineHighlightResults(tags, schema, {}, [hit]) - expect(hit._source).toEqual({ - blob: [ - 'Meridian St.', - 'Collins Ave.', - 'Ocean Drive', - 'Jefferson Ave.', - 'Washington St.', - 'Lincoln Rd.', - ], + + it('should not merge fragments', () => { + const hit = { + _source: {}, + highlight: { + blob: [ + 'Meridian St.', + 'Collins Ave.', + 'Ocean Drive', + ], + 'blob.exact': [ + 'Jefferson Ave.', + 'Washington St.', + 'Lincoln Rd.', + ], + }, + } + const actual = alignHighlightsWithSourceStructure( + schema, + highlightConfig + )(hit) + const expected = { + blob: [ + 'Meridian St.', + 'Collins Ave.', + 'Ocean Drive', + 'Jefferson Ave.', + 'Washington St.', + 'Lincoln Rd.', + ], + } + expect(actual).toEqual(expected) }) }) @@ -338,12 +418,14 @@ describe('inlineHighlightResults()', () => { 'city.street': ['Collins Ave.', 'Meridian St.'], }, } - inlineHighlightResults(tags, schema, {}, [hit]) - expect(hit._source).toEqual({ - city: { - street: ['Collins Ave.', 'Meridian St.'], - }, - }) + const actual = alignHighlightsWithSourceStructure( + schema, + highlightConfig + )(hit) + const expected = { + 'city.street': ['Collins Ave.', 'Meridian St.'], + } + expect(actual).toEqual(expected) }) it('should inline array of strings when source has value', () => { @@ -357,16 +439,18 @@ describe('inlineHighlightResults()', () => { 'city.street': ['Collins Ave.', 'Meridian St.'], }, } - inlineHighlightResults(tags, schema, {}, [hit]) - expect(hit._source).toEqual({ - city: { - street: [ - 'Jefferson Ave.', - 'Meridian St.', - 'Collins Ave.', - ], - }, - }) + const actual = alignHighlightsWithSourceStructure( + schema, + highlightConfig + )(hit) + const expected = { + 'city.street': [ + 'Jefferson Ave.', + 'Meridian St.', + 'Collins Ave.', + ], + } + expect(actual).toEqual(expected) }) it('should inline and filter array of strings when source is empty', () => { @@ -376,12 +460,14 @@ describe('inlineHighlightResults()', () => { 'city.street': ['Collins Ave.', 'Meridian St.'], }, } - inlineHighlightResults(tags, schema, { filterSourceArrays: true }, [hit]) - expect(hit._source).toEqual({ - city: { - street: ['Collins Ave.', 'Meridian St.'], - }, - }) + const actual = alignHighlightsWithSourceStructure(schema, { + ...highlightConfig, + filterSourceArrays: true, + })(hit) + const expected = { + 'city.street': ['Collins Ave.', 'Meridian St.'], + } + expect(actual).toEqual(expected) }) it('should inline and filter array of strings when source has value', () => { @@ -401,12 +487,14 @@ describe('inlineHighlightResults()', () => { 'city.street': ['Collins Ave.', 'Meridian St.'], }, } - inlineHighlightResults(tags, schema, { filterSourceArrays: true }, [hit]) - expect(hit._source).toEqual({ - city: { - street: ['Meridian St.', 'Collins Ave.'], - }, - }) + const actual = alignHighlightsWithSourceStructure(schema, { + ...highlightConfig, + filterSourceArrays: true, + })(hit) + const expected = { + 'city.street': ['Meridian St.', 'Collins Ave.'], + } + expect(actual).toEqual(expected) }) it('should inline source array with empty array when there are no highlights', () => { @@ -424,12 +512,14 @@ describe('inlineHighlightResults()', () => { }, highlight: {}, } - inlineHighlightResults(tags, schema, { filterSourceArrays: true }, [hit]) - expect(hit._source).toEqual({ - city: { - street: [], - }, - }) + const actual = alignHighlightsWithSourceStructure(schema, { + ...highlightConfig, + filterSourceArrays: true, + })(hit) + const expected = { + 'city.street': [], + } + expect(actual).toEqual(expected) }) }) @@ -451,15 +541,17 @@ describe('inlineHighlightResults()', () => { ], }, } - inlineHighlightResults(tags, schema, {}, [hit]) - expect(hit._source).toEqual({ - city: { - street: [ - { name: 'Collins Ave.' }, - { name: 'Meridian St.' }, - ], - }, - }) + const actual = alignHighlightsWithSourceStructure( + schema, + highlightConfig + )(hit) + const expected = { + 'city.street': [ + { name: 'Collins Ave.' }, + { name: 'Meridian St.' }, + ], + } + expect(actual).toEqual(expected) }) it('should inline array of objects when source has value', () => { @@ -480,16 +572,18 @@ describe('inlineHighlightResults()', () => { ], }, } - inlineHighlightResults(tags, schema, {}, [hit]) - expect(hit._source).toEqual({ - city: { - street: [ - { number: 101, name: 'Jefferson Ave.' }, - { number: 235, name: 'Meridian St.' }, - { number: 9, name: 'Collins Ave.' }, - ], - }, - }) + const actual = alignHighlightsWithSourceStructure( + schema, + highlightConfig + )(hit) + const expected = { + 'city.street': [ + { number: 101, name: 'Jefferson Ave.' }, + { number: 235, name: 'Meridian St.' }, + { number: 9, name: 'Collins Ave.' }, + ], + } + expect(actual).toEqual(expected) }) it('should inline and filter array of objects when source is empty', () => { @@ -502,15 +596,17 @@ describe('inlineHighlightResults()', () => { ], }, } - inlineHighlightResults(tags, schema, { filterSourceArrays: true }, [hit]) - expect(hit._source).toEqual({ - city: { - street: [ - { name: 'Collins Ave.' }, - { name: 'Meridian St.' }, - ], - }, - }) + const actual = alignHighlightsWithSourceStructure(schema, { + ...highlightConfig, + filterSourceArrays: true, + })(hit) + const expected = { + 'city.street': [ + { name: 'Collins Ave.' }, + { name: 'Meridian St.' }, + ], + } + expect(actual).toEqual(expected) }) it('should inline and filter array of objects when source has value', () => { @@ -533,15 +629,17 @@ describe('inlineHighlightResults()', () => { ], }, } - inlineHighlightResults(tags, schema, { filterSourceArrays: true }, [hit]) - expect(hit._source).toEqual({ - city: { - street: [ - { number: 235, name: 'Meridian St.' }, - { number: 9, name: 'Collins Ave.' }, - ], - }, - }) + const actual = alignHighlightsWithSourceStructure(schema, { + ...highlightConfig, + filterSourceArrays: true, + })(hit) + const expected = { + 'city.street': [ + { number: 235, name: 'Meridian St.' }, + { number: 9, name: 'Collins Ave.' }, + ], + } + expect(actual).toEqual(expected) }) it('should inline source array with empty array when there are no highlights', () => { @@ -559,12 +657,12 @@ describe('inlineHighlightResults()', () => { }, highlight: {}, } - inlineHighlightResults(tags, schema, { filterSourceArrays: true }, [hit]) - expect(hit._source).toEqual({ - city: { - street: [], - }, - }) + const actual = alignHighlightsWithSourceStructure(schema, { + ...highlightConfig, + filterSourceArrays: true, + })(hit) + const expected = { 'city.street': [] } + expect(actual).toEqual(expected) }) }) }) diff --git a/packages/provider-elasticsearch/src/example-types/results/index.js b/packages/provider-elasticsearch/src/example-types/results/index.js index f8f426217..663295fb7 100644 --- a/packages/provider-elasticsearch/src/example-types/results/index.js +++ b/packages/provider-elasticsearch/src/example-types/results/index.js @@ -1,10 +1,19 @@ // https://stackoverflow.com/questions/70177601/does-elasticsearch-provide-highlighting-on-copy-to-field-in-their-newer-versio // https://github.com/elastic/elasticsearch/issues/5172 +import _ from 'lodash/fp.js' import F from 'futil' -import { getHighlightFields, inlineHighlightResults } from './highlight.js' +import { + getHighlightFields, + alignHighlightsWithSourceStructure, +} from './highlight.js' import { getField } from '../../utils/fields.js' +const defaultHighlightConfig = { + pre_tag: '', + post_tag: '', +} + export default { validContext: () => true, async result(node, search, schema) { @@ -15,16 +24,7 @@ export default { ? getField(schema, node.sortField) : '_score' - const index = schema.elasticsearch.index - - console.info('') - console.time(`${index}:getHighlightFields`) - const highlightFields = - node.highlight?.enable && - getHighlightFields(schema, node._meta.relevantFilters) - console.timeEnd(`${index}:getHighlightFields`) - - const tags = { pre: '', post: '' } + const highlightConfig = _.defaults(defaultHighlightConfig, node.highlight) const body = F.omitBlank({ from: startRecord, @@ -33,28 +33,29 @@ export default { explain: node.explain, // Without this, ES7+ stops counting at 10k instead of returning the actual count track_total_hits: true, - _source: F.omitBlank({ - includes: node.include, - excludes: node.exclude, - }), - highlight: highlightFields && { - pre_tags: [tags.pre], - post_tags: [tags.post], + _source: F.omitBlank({ includes: node.include, excludes: node.exclude }), + highlight: highlightConfig.enable && { + pre_tags: [highlightConfig.pre_tag], + post_tags: [highlightConfig.post_tag], number_of_fragments: 0, require_field_match: true, - fields: highlightFields, + fields: getHighlightFields(schema, node._meta.relevantFilters), }, }) - console.time(`${index}:search`) const response = await search(body) const results = response.hits.hits - console.timeEnd(`${index}:search`) - if (node.highlight?.enable) { - console.time(`${index}:inlineHighlightResults`) - inlineHighlightResults(tags, schema, node.highlight, results) - console.timeEnd(`${index}:inlineHighlightResults`) + if (highlightConfig.enable) { + // Not mutating source in the helper function leaves the door open + // for a configuration flag to control inlining of highlighted + // results in source + const fn = alignHighlightsWithSourceStructure(schema, highlightConfig) + for (const result of results) { + for (const [field, val] of _.toPairs(fn(result))) { + F.setOn(field, val, result._source) + } + } } return { diff --git a/packages/provider-elasticsearch/src/utils/futil.js b/packages/provider-elasticsearch/src/utils/futil.js index ebd16074a..a53b78a19 100644 --- a/packages/provider-elasticsearch/src/utils/futil.js +++ b/packages/provider-elasticsearch/src/utils/futil.js @@ -100,6 +100,10 @@ export let renameOn = (from, to, obj) => { export let compactMapAsync = async (...args) => _.compact(await Promise.all(F.mapIndexed(...args))) +// Similar to F.getOrReturn but for _.set +export const setOrReturn = (path, val, obj) => + path ? _.set(path, val, obj) : val + // _.groupBy but also passing the current key export const groupByIndexed = _.curry((it, coll) => F.reduceIndexed( From 6c74f9fcd888efac375688c06931cabe5e80dedd Mon Sep 17 00:00:00 2001 From: Alejandro Hernandez Date: Mon, 13 Nov 2023 11:00:33 -0500 Subject: [PATCH 10/30] Add more comments --- .../src/example-types/results/highlight.js | 39 +++++++++++-------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/packages/provider-elasticsearch/src/example-types/results/highlight.js b/packages/provider-elasticsearch/src/example-types/results/highlight.js index bcd279d1d..0b15d1d84 100644 --- a/packages/provider-elasticsearch/src/example-types/results/highlight.js +++ b/packages/provider-elasticsearch/src/example-types/results/highlight.js @@ -275,6 +275,7 @@ const stripTags = _.curry((pre, post, fragment) => fragment.replaceAll(pre, '').replaceAll(post, '') ) +// Merge highlighted fragments onto a source array export const highlightArray = (array, fragments, config) => { if (_.isEmpty(array)) { return _.map( @@ -300,7 +301,7 @@ export const highlightArray = (array, fragments, config) => { ) } -// Best-effort naming here :/ +// Best-effort naming on this function :/ export const alignHighlightsWithSourceStructure = (schema, highlightConfig) => { const arrayFields = _.pickBy( { elasticsearch: { meta: { subType: 'array' } } }, @@ -312,11 +313,13 @@ export const alignHighlightsWithSourceStructure = (schema, highlightConfig) => { _.find((k) => field.startsWith(k), arrayFieldsNames) const lastWordRegex = /\.(\w+)$/ + // Ex: `title` and `title.exact` both result in `title` const getMultiFieldName = (field) => { const [multi, sub] = field.split(lastWordRegex) return schema.fields[multi]?.elasticsearch?.fields?.[sub] ? multi : field } + // Merge highlighted fragments onto a source array const getHighlightedArray = (fragments, field, source) => { const arrayPath = getArrayFieldName(field) return highlightArray(_.get(arrayPath, source), fragments, { @@ -325,30 +328,34 @@ export const alignHighlightsWithSourceStructure = (schema, highlightConfig) => { }) } + // Transform highlighted fragments into something that can be used to replace + // source values + const handleHighlightedFragments = (hit) => (fragments, field) => + getArrayFieldName(field) + ? getHighlightedArray(fragments, field, hit._source) + : // Do not do anything with fragments for text blobs + schema.fields[field]?.elasticsearch?.meta?.subType === 'blob' + ? fragments + : // Assumming we sent `number_of_fragments:0` to elastic, there should be + // at most one fragment per multi-field (ex: `title`) and at most one + // fragment for each sub-field (ex: `title.exact`, `title.keyword`). + mergeHighlights( + highlightConfig.pre_tag, + highlightConfig.post_tag, + ...fragments + ) + return (hit) => _.flow( // Group `city` and `city.exact` under `city` groupByIndexed((v, k) => getMultiFieldName(k)), _.mapValues(_.flatten), - // Transform highlighted segments into something that can be used to - // replace source values - F.mapValuesIndexed((fragments, field) => - getArrayFieldName(field) - ? getHighlightedArray(fragments, field, hit._source) - : schema.fields[field]?.elasticsearch?.meta?.subType === 'blob' - ? fragments - : mergeHighlights( - highlightConfig.pre_tag, - highlightConfig.post_tag, - ...fragments - ) - ), + F.mapValuesIndexed(handleHighlightedFragments(hit)), // Rename `streets.name` to `streets` if `streets` is an array field so // that we can simply replace arrays wholesale in the source. _.mapKeys((field) => getArrayFieldName(field) ?? field), // Default to empty arrays if source arrays should be filtered but no - // highlights come back. That way the source arrays will get replaced with - // empty arrays when highlights are inlined. + // highlights come back for them. _.defaults(highlightConfig.filterSourceArrays ? emptyArrayFields : {}) )(hit.highlight) } From d630f60aac7b90a9d7a20478123bb5f822d4ff7a Mon Sep 17 00:00:00 2001 From: Alejandro Hernandez Date: Mon, 13 Nov 2023 18:20:07 -0500 Subject: [PATCH 11/30] Add initial docs --- .eslintrc.json | 5 +-- .../src/example-types/results/highlight.d.ts | 31 +++++++++++++++++++ .../src/example-types/results/highlighting.md | 1 + .../src/example-types/results/index.js | 16 +++++----- .../src/schema-data/schema-with-types.js | 8 ++--- .../src/schema-data/schema-without-types.js | 8 ++--- packages/provider-elasticsearch/src/schema.js | 4 +-- 7 files changed, 53 insertions(+), 20 deletions(-) create mode 100644 packages/provider-elasticsearch/src/example-types/results/highlight.d.ts create mode 100644 packages/provider-elasticsearch/src/example-types/results/highlighting.md diff --git a/.eslintrc.json b/.eslintrc.json index ad73be1ea..591d734c2 100644 --- a/.eslintrc.json +++ b/.eslintrc.json @@ -1,6 +1,6 @@ { "extends": ["eslint:recommended", "plugin:import/recommended"], - "ignorePatterns": ["dist", "node_modules"], + "ignorePatterns": ["dist", "node_modules", "*.ts"], "parserOptions": { "ecmaVersion": "2022", "sourceType": "module" @@ -11,7 +11,8 @@ "jest": true }, "rules": { - "import/extensions": [2, { "js": "always" }] + "import/extensions": [2, { "js": "always" }], + "no-reserved-keys": [0] }, "settings": { "import/resolver": { diff --git a/packages/provider-elasticsearch/src/example-types/results/highlight.d.ts b/packages/provider-elasticsearch/src/example-types/results/highlight.d.ts new file mode 100644 index 000000000..6b2b798fc --- /dev/null +++ b/packages/provider-elasticsearch/src/example-types/results/highlight.d.ts @@ -0,0 +1,31 @@ +declare enum HighlightBehavior { + /** + * Replace source values with highlighted results. + */ + replaceSource, +} + +interface Highlight { + /** + * How to handle highlighted results from elastic. Setting this field to a + * non-empty value will automatically enable highlighting. + */ + behavior?: HighlightBehavior + /** + * Remove non-highlighted items in source arrays when + * `behavior: "replaceSource"`. + */ + filterSourceArrays?: boolean + /** + * Just like elastic's + * [pre_tags](https://www.elastic.co/guide/en/elasticsearch/reference/8.11/highlighting.html#highlighting-settings), + * except we only support one tag for now. The default is ``. + */ + pre_tag?: string + /** + * Just like elastic's + * [post_tags](https://www.elastic.co/guide/en/elasticsearch/reference/8.11/highlighting.html#highlighting-settings), + * except we only support one tag for now. The default is ``. + */ + post_tag?: string +} diff --git a/packages/provider-elasticsearch/src/example-types/results/highlighting.md b/packages/provider-elasticsearch/src/example-types/results/highlighting.md new file mode 100644 index 000000000..d57ae7fe2 --- /dev/null +++ b/packages/provider-elasticsearch/src/example-types/results/highlighting.md @@ -0,0 +1 @@ +Explain API with typescript type diff --git a/packages/provider-elasticsearch/src/example-types/results/index.js b/packages/provider-elasticsearch/src/example-types/results/index.js index 663295fb7..63ad1f41d 100644 --- a/packages/provider-elasticsearch/src/example-types/results/index.js +++ b/packages/provider-elasticsearch/src/example-types/results/index.js @@ -34,7 +34,7 @@ export default { // Without this, ES7+ stops counting at 10k instead of returning the actual count track_total_hits: true, _source: F.omitBlank({ includes: node.include, excludes: node.exclude }), - highlight: highlightConfig.enable && { + highlight: highlightConfig.behavior && { pre_tags: [highlightConfig.pre_tag], post_tags: [highlightConfig.post_tag], number_of_fragments: 0, @@ -46,14 +46,14 @@ export default { const response = await search(body) const results = response.hits.hits - if (highlightConfig.enable) { - // Not mutating source in the helper function leaves the door open - // for a configuration flag to control inlining of highlighted - // results in source - const fn = alignHighlightsWithSourceStructure(schema, highlightConfig) + if (highlightConfig.behavior === 'replaceSource') { + const getHighlights = alignHighlightsWithSourceStructure( + schema, + highlightConfig + ) for (const result of results) { - for (const [field, val] of _.toPairs(fn(result))) { - F.setOn(field, val, result._source) + for (const [k, v] of _.toPairs(getHighlights(result))) { + F.setOn(k, v, result._source) } } } diff --git a/packages/provider-elasticsearch/src/schema-data/schema-with-types.js b/packages/provider-elasticsearch/src/schema-data/schema-with-types.js index 6c868b74e..657b321a4 100644 --- a/packages/provider-elasticsearch/src/schema-data/schema-with-types.js +++ b/packages/provider-elasticsearch/src/schema-data/schema-with-types.js @@ -4,8 +4,8 @@ export default { index: 'movies', type: 'movie', subFields: { - keyword: { shouldHighlight: false }, - exact: { shouldHighlight: true }, + keyword: { highlight: false }, + exact: { highlight: true }, }, }, fields: { @@ -308,8 +308,8 @@ export default { type: 'movie', aliasOf: 'movies', subFields: { - keyword: { shouldHighlight: false }, - exact: { shouldHighlight: true }, + keyword: { highlight: false }, + exact: { highlight: true }, }, }, fields: { diff --git a/packages/provider-elasticsearch/src/schema-data/schema-without-types.js b/packages/provider-elasticsearch/src/schema-data/schema-without-types.js index 227605719..4b3c90156 100644 --- a/packages/provider-elasticsearch/src/schema-data/schema-without-types.js +++ b/packages/provider-elasticsearch/src/schema-data/schema-without-types.js @@ -3,8 +3,8 @@ export default { elasticsearch: { index: 'movies', subFields: { - keyword: { shouldHighlight: false }, - exact: { shouldHighlight: true }, + keyword: { highlight: false }, + exact: { highlight: true }, }, }, fields: { @@ -50,8 +50,8 @@ export default { index: 'imdb', aliasOf: 'movies', subFields: { - keyword: { shouldHighlight: false }, - exact: { shouldHighlight: true }, + keyword: { highlight: false }, + exact: { highlight: true }, }, }, fields: { diff --git a/packages/provider-elasticsearch/src/schema.js b/packages/provider-elasticsearch/src/schema.js index 1c1b47cae..9b9620a38 100644 --- a/packages/provider-elasticsearch/src/schema.js +++ b/packages/provider-elasticsearch/src/schema.js @@ -31,8 +31,8 @@ let fromEsIndexMapping = (mapping) => { extractFieldsAndEsType, // TODO: think about how to let users pass this multi-field config information _.set('elasticsearch.subFields', { - keyword: { shouldHighlight: false }, - exact: { shouldHighlight: true }, + keyword: { highlight: false }, + exact: { highlight: true }, }), _.update( 'fields', From 83d77f2231291b27bed861afb0988ffe05a358bf Mon Sep 17 00:00:00 2001 From: Alejandro Hernandez Date: Tue, 14 Nov 2023 11:27:30 -0500 Subject: [PATCH 12/30] Update unit tests --- .../src/example-types/results/highlight.test.js | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/packages/provider-elasticsearch/src/example-types/results/highlight.test.js b/packages/provider-elasticsearch/src/example-types/results/highlight.test.js index 3e8b985fe..2c279d8a3 100644 --- a/packages/provider-elasticsearch/src/example-types/results/highlight.test.js +++ b/packages/provider-elasticsearch/src/example-types/results/highlight.test.js @@ -407,6 +407,7 @@ describe('alignHighlightsWithSourceStructure()', () => { describe('arrays of strings', () => { const schema = { fields: { + state: {}, 'city.street': { elasticsearch: { meta: { subType: 'array' } } }, }, } @@ -500,6 +501,7 @@ describe('alignHighlightsWithSourceStructure()', () => { it('should inline source array with empty array when there are no highlights', () => { const hit = { _source: { + state: 'New Jersey', city: { street: [ 'Jefferson Ave.', @@ -510,13 +512,16 @@ describe('alignHighlightsWithSourceStructure()', () => { ], }, }, - highlight: {}, + highlight: { + state: 'New Jersey', + }, } const actual = alignHighlightsWithSourceStructure(schema, { ...highlightConfig, filterSourceArrays: true, })(hit) const expected = { + state: 'New Jersey', 'city.street': [], } expect(actual).toEqual(expected) @@ -645,6 +650,7 @@ describe('alignHighlightsWithSourceStructure()', () => { it('should inline source array with empty array when there are no highlights', () => { const hit = { _source: { + state: 'New Jersey', city: { street: [ { number: 101, name: 'Jefferson Ave.' }, @@ -655,13 +661,18 @@ describe('alignHighlightsWithSourceStructure()', () => { ], }, }, - highlight: {}, + highlight: { + state: 'New Jersey', + }, } const actual = alignHighlightsWithSourceStructure(schema, { ...highlightConfig, filterSourceArrays: true, })(hit) - const expected = { 'city.street': [] } + const expected = { + state: 'New Jersey', + 'city.street': [], + } expect(actual).toEqual(expected) }) }) From 26628b085a449a9e70290d7e3e3543837b705f85 Mon Sep 17 00:00:00 2001 From: Alejandro Hernandez Date: Tue, 14 Nov 2023 16:51:16 -0500 Subject: [PATCH 13/30] Expand on documentation --- .../src/example-types/results/highlight.d.ts | 4 +- .../src/example-types/results/highlight.js | 23 ++-- .../src/example-types/results/highlighting.md | 101 +++++++++++++++++- .../src/example-types/results/index.js | 6 +- 4 files changed, 122 insertions(+), 12 deletions(-) diff --git a/packages/provider-elasticsearch/src/example-types/results/highlight.d.ts b/packages/provider-elasticsearch/src/example-types/results/highlight.d.ts index 6b2b798fc..a0e16a95e 100644 --- a/packages/provider-elasticsearch/src/example-types/results/highlight.d.ts +++ b/packages/provider-elasticsearch/src/example-types/results/highlight.d.ts @@ -1,8 +1,8 @@ declare enum HighlightBehavior { /** - * Replace source values with highlighted results. + * Merge highlighted results onto _source. */ - replaceSource, + mergeOnSource, } interface Highlight { diff --git a/packages/provider-elasticsearch/src/example-types/results/highlight.js b/packages/provider-elasticsearch/src/example-types/results/highlight.js index 0b15d1d84..f56b3c379 100644 --- a/packages/provider-elasticsearch/src/example-types/results/highlight.js +++ b/packages/provider-elasticsearch/src/example-types/results/highlight.js @@ -103,6 +103,8 @@ const getHighlightFieldsMappings = _.memoize((schema) => { const allFieldsGroups = getAllFieldsGroups(schema) return F.reduceIndexed( (acc, { elasticsearch: mapping }, name) => { + // Only include leaf fields (have mapping) and do not include fields + // groups. if (mapping && !allFieldsGroups.has(name)) { Object.assign(acc, { [name]: mapping, @@ -244,7 +246,17 @@ const getHighlightRanges = (pre, post, str) => { return ranges } -/** Wrap substrings given by [start, end] ranges with pre/post tags */ +/** + * Wrap substrings given by [start, end] ranges with pre/post tags + * + * This function could extend `F.highlight` functionality to accept ranges. For + * example: + * + * ```javascript + * const braceHighlight = F.highlight("{", "}") + * braceHighlight([[2, 4], [9, 10]], "hello world") // -> "he{llo} wor{ld}" + * ```` + */ const highlightFromRanges = (pre, post, str, ranges) => { const starts = _.fromPairs(_.map((x) => [x[0]], ranges)) const ends = _.fromPairs(_.map((x) => [x[1]], ranges)) @@ -275,7 +287,7 @@ const stripTags = _.curry((pre, post, fragment) => fragment.replaceAll(pre, '').replaceAll(post, '') ) -// Merge highlighted fragments onto a source array +/** Merge highlighted fragments onto a source array */ export const highlightArray = (array, fragments, config) => { if (_.isEmpty(array)) { return _.map( @@ -331,11 +343,10 @@ export const alignHighlightsWithSourceStructure = (schema, highlightConfig) => { // Transform highlighted fragments into something that can be used to replace // source values const handleHighlightedFragments = (hit) => (fragments, field) => - getArrayFieldName(field) - ? getHighlightedArray(fragments, field, hit._source) - : // Do not do anything with fragments for text blobs - schema.fields[field]?.elasticsearch?.meta?.subType === 'blob' + schema.fields[field]?.elasticsearch?.meta?.subType === 'blob' ? fragments + : getArrayFieldName(field) + ? getHighlightedArray(fragments, field, hit._source) : // Assumming we sent `number_of_fragments:0` to elastic, there should be // at most one fragment per multi-field (ex: `title`) and at most one // fragment for each sub-field (ex: `title.exact`, `title.keyword`). diff --git a/packages/provider-elasticsearch/src/example-types/results/highlighting.md b/packages/provider-elasticsearch/src/example-types/results/highlighting.md index d57ae7fe2..1ea80a993 100644 --- a/packages/provider-elasticsearch/src/example-types/results/highlighting.md +++ b/packages/provider-elasticsearch/src/example-types/results/highlighting.md @@ -1 +1,100 @@ -Explain API with typescript type +# Request + +### Fields sent for highlighting + +Our approach to highlighting is designed to be as out of the box as possible, without too many configuration options. We assume by default that users want to highlight all the fields present in their query. In order to achieve this, the most logical approach is to extract relevant fields from the elastic query and send them for highlighting, but instead we send every field in the schema for simplicity's sake (with the exception of fields groups). For example, given the following schema: + +```json +{ + "fields": { + "address": { + "elasticsearch": {} + }, + "state": { + "elasticsearch": { + "copy_to": ["address"] + } + }, + "city.street": { + "elasticsearch": { + "copy_to": ["address"] + } + } + } +} +``` + +The fields `state` and `city.street` will be sent for highlighting, but the fields group `address` will be ommitted. + +Whitelisted sub-fields (as defined by the schema) are also sent for highlighting, since they could be present in the query. For example, given the following schema: + +```json +{ + "elasticsearch": { + "subFields": { + "keyword": { "highlight": false }, + "exact": { "highlight": true } + } + }, + "fields": { + "state": { + "elasticsearch": { + "fields": { "keyword": {}, "exact": {} } + } + }, + "city.street": { + "elasticsearch": { + "fields": { "keyword": {}, "exact": {} } + } + } + } +} +``` + +The fields `state`, `state.exact`, `city.street`, and `city.street.exact` will be sent, since the `exact` sub-field is whitelisted for highlighting in the schema's `elasticsearch.subFields` configuration. + +> Explain how in the future we could pick up fields from the query itself (handling fields groups and wildcards), to reduce the payload sent to elastic. + +### Highlight queries + +> [!NOTE] +> Elastic only highlights fields contained in the query by default, for example in the following query: +> +> ```json +> { +> "query": { +> "match": { +> "city": { "query": "memphis" } +> } +> }, +> "highlight": { +> "fields": { +> "city": {}, +> "state": {} +> } +> } +> } +> ``` +> +> `city` will get highlighted but `state` won't since it's not in the query. + +Explain how text blobs get picked up from the schema and what configuration gets generated for them. + +Explain why we need to set a highlight query on certain fields + +## Response stage + +Explain `number_of_fragments` if behavior is to replace source values + +Explain current behavior of replacing source values (even when source values are missing) with highlighted results and how in the future we could not do this. + +Explain that subfields get collapsed into their multifields + +Explain how fragments get handled for each field depending on its mapping type + +- noop for text blob fragments +- merge fragments into source array (elaborate) + - when source is missing + - when source is present + - when `filterSourceArrays` is passed +- merge fragments otherwise diff --git a/packages/provider-elasticsearch/src/example-types/results/index.js b/packages/provider-elasticsearch/src/example-types/results/index.js index 63ad1f41d..c531a39b6 100644 --- a/packages/provider-elasticsearch/src/example-types/results/index.js +++ b/packages/provider-elasticsearch/src/example-types/results/index.js @@ -37,8 +37,8 @@ export default { highlight: highlightConfig.behavior && { pre_tags: [highlightConfig.pre_tag], post_tags: [highlightConfig.post_tag], - number_of_fragments: 0, - require_field_match: true, + number_of_fragments: + highlightConfig.behavior === 'mergeOnSource' ? 0 : undefined, fields: getHighlightFields(schema, node._meta.relevantFilters), }, }) @@ -46,7 +46,7 @@ export default { const response = await search(body) const results = response.hits.hits - if (highlightConfig.behavior === 'replaceSource') { + if (highlightConfig.behavior === 'mergeOnSource') { const getHighlights = alignHighlightsWithSourceStructure( schema, highlightConfig From 3dba24056453157690b1338829d2f1f385be266b Mon Sep 17 00:00:00 2001 From: Alejandro Hernandez Date: Tue, 14 Nov 2023 18:27:53 -0500 Subject: [PATCH 14/30] Expand on documentation --- .../src/example-types/results/highlighting.md | 155 ++++++++++++------ packages/provider-elasticsearch/src/schema.js | 5 - 2 files changed, 102 insertions(+), 58 deletions(-) diff --git a/packages/provider-elasticsearch/src/example-types/results/highlighting.md b/packages/provider-elasticsearch/src/example-types/results/highlighting.md index 1ea80a993..eae09e847 100644 --- a/packages/provider-elasticsearch/src/example-types/results/highlighting.md +++ b/packages/provider-elasticsearch/src/example-types/results/highlighting.md @@ -1,88 +1,137 @@ -# Request +## Request + +Our approach to highlighting is designed to be as out of the box as possible, without too many configuration options. ### Fields sent for highlighting -Our approach to highlighting is designed to be as out of the box as possible, without too many configuration options. We assume by default that users want to highlight all the fields present in their query. In order to achieve this, the most logical approach is to extract relevant fields from the elastic query and send them for highlighting, but instead we send every field in the schema for simplicity's sake (with the exception of fields groups). For example, given the following schema: +We assume that users want to highlight all the fields present in the query. The most logical approach is to extract relevant fields from the query and send them for highlighting, but for simplicity's sake we send every field in the schema, with some caveats. + +#### Sub-fields + +Whitelisted sub-fields are sent for highlighting, since they could be present in the query: + +**schema.json** + +```jsonc +{ + "elasticsearch": { + "subFields": { + // `{field}.keyword` will be sent for highlighting. + "keyword": { "highlight": false }, + // `{field}.exact` will be sent for highlighting. + "exact": { "highlight": true } + } + }, + "fields": { + // `state` will be sent for highlighting. + "state": { + "elasticsearch": { + "fields": { + "keyword": {}, + // `state.exact` will be sent for highlighting. + "exact": {} + } + } + } + } +} +``` + +#### Fields groups -```json +Fields groups are not sent for highlighting because we assume users want to highlight fields that were copied over instead of the fields groups themselves: + +**schema.json** + +```jsonc { "fields": { + // `address` won't be sent for highlighting since it's a field group. "address": { "elasticsearch": {} }, + // `state` will be sent for highlighting. "state": { "elasticsearch": { "copy_to": ["address"] } - }, - "city.street": { - "elasticsearch": { - "copy_to": ["address"] + } + } +} +``` + +However, this presents a problem since elastic only highlights fields contained in the query by default: + +**request.json** + +```jsonc +{ + "query": { + "match": { + "address": { + "query": "memphis" } } + }, + "highlight": { + "fields": { + "state": {} // Won't be highlighted. + } } } ``` -The fields `state` and `city.street` will be sent for highlighting, but the fields group `address` will be ommitted. +In order to fix this, we make use of elastic's [highlight_query](https://www.elastic.co/guide/en/elasticsearch/reference/current/highlighting.html#highlighting-settings), which allows us to set a per-field query for highlighting purposes: -Whitelisted sub-fields (as defined by the schema) are also sent for highlighting, since they could be present in the query. For example, given the following schema: +**request.json** -```json +```jsonc { - "elasticsearch": { - "subFields": { - "keyword": { "highlight": false }, - "exact": { "highlight": true } + "query": { + "match": { + "address": { + "query": "memphis" + } } }, - "fields": { - "state": { - "elasticsearch": { - "fields": { "keyword": {}, "exact": {} } + "highlight": { + "fields": { + "state": { + "highlight_query": { + "match": { + // `address` is replaced by `state` + "state": { + "query": "memphis" + } + } + } } - }, - "city.street": { + } + } +} +``` + +#### Text blobs + +In the spirit of keeping our API simple, we generate opinionated highlighting configuration for large text blobs to improve highlighting performance. More often than not, it makes sense to only display highlighted fragments instead of the whole blob for these types of fields. Since elastic does not have a "blob" or "large text" type, we've adopted the convention of specifying a field's "subType" using elastic's [meta property](https://www.elastic.co/guide/en/elasticsearch/reference/8.11/mapping-field-meta.html): + +**schema.json** + +```jsonc +{ + "fields": { + "donQuixoteText": { "elasticsearch": { - "fields": { "keyword": {}, "exact": {} } + "meta": { + "subType": "blob" + } } } } } ``` -The fields `state`, `state.exact`, `city.street`, and `city.street.exact` will be sent, since the `exact` sub-field is whitelisted for highlighting in the schema's `elasticsearch.subFields` configuration. - -> Explain how in the future we could pick up fields from the query itself (handling fields groups and wildcards), to reduce the payload sent to elastic. - -### Highlight queries - -> [!NOTE] -> Elastic only highlights fields contained in the query by default, for example in the following query: -> -> ```json -> { -> "query": { -> "match": { -> "city": { "query": "memphis" } -> } -> }, -> "highlight": { -> "fields": { -> "city": {}, -> "state": {} -> } -> } -> } -> ``` -> -> `city` will get highlighted but `state` won't since it's not in the query. - -Explain how text blobs get picked up from the schema and what configuration gets generated for them. - -Explain why we need to set a highlight query on certain fields - -## Response stage +## TODO: Response stage Explain `number_of_fragments` if behavior is to replace source values diff --git a/packages/provider-elasticsearch/src/schema.js b/packages/provider-elasticsearch/src/schema.js index 9b9620a38..566e2f339 100644 --- a/packages/provider-elasticsearch/src/schema.js +++ b/packages/provider-elasticsearch/src/schema.js @@ -29,11 +29,6 @@ let fromEsIndexMapping = (mapping) => { // filters out 'dynamic_templates' (an array), 'dynamic: true', etc. _.pickBy(_.isPlainObject), extractFieldsAndEsType, - // TODO: think about how to let users pass this multi-field config information - _.set('elasticsearch.subFields', { - keyword: { highlight: false }, - exact: { highlight: true }, - }), _.update( 'fields', _.flow( From 09f536af9e1509cb094c0c8d3d923807408a481e Mon Sep 17 00:00:00 2001 From: Alejandro Hernandez Date: Wed, 15 Nov 2023 14:03:03 -0500 Subject: [PATCH 15/30] Almost done with documentation --- .../src/example-types/results/highlighting.md | 148 +++++++++++++++--- 1 file changed, 127 insertions(+), 21 deletions(-) diff --git a/packages/provider-elasticsearch/src/example-types/results/highlighting.md b/packages/provider-elasticsearch/src/example-types/results/highlighting.md index eae09e847..3debccfaa 100644 --- a/packages/provider-elasticsearch/src/example-types/results/highlighting.md +++ b/packages/provider-elasticsearch/src/example-types/results/highlighting.md @@ -6,17 +6,19 @@ Our approach to highlighting is designed to be as out of the box as possible, wi We assume that users want to highlight all the fields present in the query. The most logical approach is to extract relevant fields from the query and send them for highlighting, but for simplicity's sake we send every field in the schema, with some caveats. -#### Sub-fields +#### 1. Sub-fields Whitelisted sub-fields are sent for highlighting, since they could be present in the query: -**schema.json** +
+ +schema.json ```jsonc { "elasticsearch": { "subFields": { - // `{field}.keyword` will be sent for highlighting. + // `{field}.keyword` will *not* be sent for highlighting. "keyword": { "highlight": false }, // `{field}.exact` will be sent for highlighting. "exact": { "highlight": true } @@ -37,11 +39,15 @@ Whitelisted sub-fields are sent for highlighting, since they could be present in } ``` -#### Fields groups +
+ +#### 2. Fields groups Fields groups are not sent for highlighting because we assume users want to highlight fields that were copied over instead of the fields groups themselves: -**schema.json** +
+ +schema.json ```jsonc { @@ -60,9 +66,13 @@ Fields groups are not sent for highlighting because we assume users want to high } ``` +
+ However, this presents a problem since elastic only highlights fields contained in the query by default: -**request.json** +
+ +request.json ```jsonc { @@ -81,9 +91,13 @@ However, this presents a problem since elastic only highlights fields contained } ``` -In order to fix this, we make use of elastic's [highlight_query](https://www.elastic.co/guide/en/elasticsearch/reference/current/highlighting.html#highlighting-settings), which allows us to set a per-field query for highlighting purposes: +
+ +In order to fix this, we make use of elastic's [highlight_query](https://www.elastic.co/guide/en/elasticsearch/reference/current/highlighting.html#highlighting-settings), which allows us to set a query per-field for highlighting purposes: -**request.json** +
+ +request.json ```jsonc { @@ -111,11 +125,15 @@ In order to fix this, we make use of elastic's [highlight_query](https://www.ela } ``` -#### Text blobs +
+ +#### 3. Text blobs In the spirit of keeping our API simple, we generate opinionated highlighting configuration for large text blobs to improve highlighting performance. More often than not, it makes sense to only display highlighted fragments instead of the whole blob for these types of fields. Since elastic does not have a "blob" or "large text" type, we've adopted the convention of specifying a field's "subType" using elastic's [meta property](https://www.elastic.co/guide/en/elasticsearch/reference/8.11/mapping-field-meta.html): -**schema.json** +
+ +schema.json ```jsonc { @@ -131,19 +149,107 @@ In the spirit of keeping our API simple, we generate opinionated highlighting co } ``` -## TODO: Response stage +
+ +## Response + +Currently the only supported behavior is to merge highlighted fields into source fields (we may provide an option to opt-out in the future). Fields present in the highlighted results but not in the source still get merged onto the source. For this approach to work, the highlighted fields must contain the entire field value (as opposed to only fragments), so we set [number_of_fragments](https://www.elastic.co/guide/en/elasticsearch/reference/current/highlighting.html#highlighting-settings) to `0` in the request. The exception being blob text fields which we default to return highlighted fragments instead of the entire highlighted value. + +Before merging, highlighted results need to be transformed. Assumming `exact` to be a sub-field of `details`, the following rules apply: + +#### 1. Text fields + +The first fragments of each field (which should contain the entire field value because of `number_of_fragments: 0`) are merged into one value + +```json +{ + "details": ["The lazy fox"], + "details.exact": ["The lazy fox"] +} +``` + +will be transformed into + +```json +{ + "details": "The lazy fox" +} +``` + +Merging of highlighted fragments could be handled by elastic but this is still [an open issue](https://github.com/elastic/elasticsearch/issues/5172). + +#### 2. Blob text fields + +Blob text fields get their highlighted fragments joined, because there is no other behavior we could take here: + +```json +{ + "details": ["The lazy fox", "jumped over"], + "details.exact": ["The lazy fox", "jumped over"] +} +``` + +will be transformed into + +```json +{ + "details": [ + "The lazy fox", + "jumped over", + "The lazy fox", + "jumped over" + ] +} +``` + +#### 3. Array fields + +Elastic doesn't have a concept of array fields, so we rely again on the `subType` convention used for text blobs to identify them + +
+ +schema.json + +```jsonc +{ + "fields": { + "gloriousArrayField": { + "elasticsearch": { + "meta": { + "subType": "array" + } + } + } + } +} +``` + +
+ +which allows us to order highlighted array items based on the source array (as long as it's present) -Explain `number_of_fragments` if behavior is to replace source values +
-Explain current behavior of replacing source values (even when source values are missing) with highlighted results and how in the future we could not do this. +ordering.test.js -Explain that subfields get collapsed into their multifields +```javascript +import assert from 'node:assert' + +const hit = { + _source: { + names: ['John', 'Smith', 'Jane', 'Austen'], + }, + highlight: { + names: ['Austen', 'Smith'], + }, +} + +const actual = order(hit.highlight.names, hit._source.names) +const expected = [undefined, 'Smith', undefined, 'Austen'] + +assert.deepEqual(actual, expected) +``` -Explain how fragments get handled for each field depending on its mapping type +
-- noop for text blob fragments -- merge fragments into source array (elaborate) - - when source is missing - - when source is present - - when `filterSourceArrays` is passed -- merge fragments otherwise +Ideally elastic's response would include enough information to deduce the array index for each highlighted fragment but unfortunately this is still [an open issue](https://github.com/elastic/elasticsearch/issues/7416). From 895d825d8eaf9f183fe6e4b95b255290bcd5c18e Mon Sep 17 00:00:00 2001 From: Alejandro Hernandez Date: Wed, 15 Nov 2023 21:47:44 -0500 Subject: [PATCH 16/30] Reorganize highlighting code --- .eslintrc.json | 3 +- .../src/example-types/results/highlight.d.ts | 31 - .../src/example-types/results/highlight.js | 372 ---------- .../example-types/results/highlight.test.js | 679 ------------------ .../README.md} | 72 +- .../results/highlighting/merging.js | 33 + .../results/highlighting/merging.test.js | 201 ++++++ .../results/highlighting/request.js | 108 +++ .../results/highlighting/request.test.js | 170 +++++ .../results/highlighting/response.js | 66 ++ .../results/highlighting/response.test.js | 214 ++++++ .../results/highlighting/util.js | 73 ++ .../results/highlighting/util.test.js | 47 ++ .../src/example-types/results/index.js | 42 +- .../src/example-types/results/type.d.ts | 38 + 15 files changed, 1032 insertions(+), 1117 deletions(-) delete mode 100644 packages/provider-elasticsearch/src/example-types/results/highlight.d.ts delete mode 100644 packages/provider-elasticsearch/src/example-types/results/highlight.js delete mode 100644 packages/provider-elasticsearch/src/example-types/results/highlight.test.js rename packages/provider-elasticsearch/src/example-types/results/{highlighting.md => highlighting/README.md} (69%) create mode 100644 packages/provider-elasticsearch/src/example-types/results/highlighting/merging.js create mode 100644 packages/provider-elasticsearch/src/example-types/results/highlighting/merging.test.js create mode 100644 packages/provider-elasticsearch/src/example-types/results/highlighting/request.js create mode 100644 packages/provider-elasticsearch/src/example-types/results/highlighting/request.test.js create mode 100644 packages/provider-elasticsearch/src/example-types/results/highlighting/response.js create mode 100644 packages/provider-elasticsearch/src/example-types/results/highlighting/response.test.js create mode 100644 packages/provider-elasticsearch/src/example-types/results/highlighting/util.js create mode 100644 packages/provider-elasticsearch/src/example-types/results/highlighting/util.test.js create mode 100644 packages/provider-elasticsearch/src/example-types/results/type.d.ts diff --git a/.eslintrc.json b/.eslintrc.json index 591d734c2..2bb1cb7bb 100644 --- a/.eslintrc.json +++ b/.eslintrc.json @@ -11,8 +11,7 @@ "jest": true }, "rules": { - "import/extensions": [2, { "js": "always" }], - "no-reserved-keys": [0] + "import/extensions": [2, { "js": "always" }] }, "settings": { "import/resolver": { diff --git a/packages/provider-elasticsearch/src/example-types/results/highlight.d.ts b/packages/provider-elasticsearch/src/example-types/results/highlight.d.ts deleted file mode 100644 index a0e16a95e..000000000 --- a/packages/provider-elasticsearch/src/example-types/results/highlight.d.ts +++ /dev/null @@ -1,31 +0,0 @@ -declare enum HighlightBehavior { - /** - * Merge highlighted results onto _source. - */ - mergeOnSource, -} - -interface Highlight { - /** - * How to handle highlighted results from elastic. Setting this field to a - * non-empty value will automatically enable highlighting. - */ - behavior?: HighlightBehavior - /** - * Remove non-highlighted items in source arrays when - * `behavior: "replaceSource"`. - */ - filterSourceArrays?: boolean - /** - * Just like elastic's - * [pre_tags](https://www.elastic.co/guide/en/elasticsearch/reference/8.11/highlighting.html#highlighting-settings), - * except we only support one tag for now. The default is ``. - */ - pre_tag?: string - /** - * Just like elastic's - * [post_tags](https://www.elastic.co/guide/en/elasticsearch/reference/8.11/highlighting.html#highlighting-settings), - * except we only support one tag for now. The default is ``. - */ - post_tag?: string -} diff --git a/packages/provider-elasticsearch/src/example-types/results/highlight.js b/packages/provider-elasticsearch/src/example-types/results/highlight.js deleted file mode 100644 index f56b3c379..000000000 --- a/packages/provider-elasticsearch/src/example-types/results/highlight.js +++ /dev/null @@ -1,372 +0,0 @@ -import _ from 'lodash/fp.js' -import F from 'futil' -import { CartesianProduct } from 'js-combinatorics' -import { groupByIndexed, setOrReturn } from '../../utils/futil.js' - -/** - * Set of all fields groups in the mappings, including their cartesian product with - * sub fields. For example, given the schema - * - * { - * elasticsearch: { - * subFields: { - * exact: { highlight: true } - * } - * }, - * fields: { - * address: {}, - * state: { - * elasticsearch: { - * copy_to: ['address'], - * fields: { exact: {} } - * } - * } - * } - * } - * - * this function will return `["address", "address.exact"]` - * - * See https://www.elastic.co/guide/en/elasticsearch/reference/current/copy-to.html - */ -const getAllFieldsGroups = _.memoize((schema) => { - const highlightSubFields = _.keys( - _.pickBy('highlight', schema.elasticsearch?.subFields) - ) - return new Set( - _.flatMap((field) => { - const copy_to = field.elasticsearch?.copy_to - if (!_.isEmpty(copy_to)) { - const product = new CartesianProduct(copy_to, highlightSubFields) - return [...copy_to, ...Array.from(product).map(_.join('.'))] - } - return copy_to - }, schema.fields) - ) -}, _.get('elasticsearch.index')) - -/** - * Only fields whose names are present in the query get highlighted by elastic - * due to us passing `require_field_match:true`. However, we have to consider - * sub fields as well. For example, if `city` is a multi-field containing a - * sub-field named `exact`, elastic won't highlight `city` in the following - * request: - * - * { - * "query": { - * "match": { - * "city.exact": { "query": "memphis" } - * } - * }, - * "highlight": { - * "fields": { - * "city": {}, - * } - * } - * } - * - * Instead, we have to match the sub-field verbatim in the highlight config: - * - * { - * "query": { - * "match": { - * "city.exact": { "query": "memphis" } - * } - * }, - * "highlight": { - * "fields": { - * "city.exact": {}, - * } - * } - * } - * - * This function will make mappings for subfields so we can spread them at the - * top-level and send them along with regular fields for elastic to highlight. - */ -const getSubFieldsMappings = (schema, multiFieldMapping, multiFieldName) => - F.reduceIndexed( - (acc, mapping, name) => { - if (schema.elasticsearch.subFields[name]?.highlight) { - acc[`${multiFieldName}.${name}`] = { - ...mapping, - meta: { ...multiFieldMapping.meta, isSubField: true }, - copy_to: _.map((k) => `${k}.${name}`, multiFieldMapping.copy_to), - } - } - return acc - }, - {}, - multiFieldMapping.fields - ) - -/** Mappings for fields that should be highlighted */ -const getHighlightFieldsMappings = _.memoize((schema) => { - const allFieldsGroups = getAllFieldsGroups(schema) - return F.reduceIndexed( - (acc, { elasticsearch: mapping }, name) => { - // Only include leaf fields (have mapping) and do not include fields - // groups. - if (mapping && !allFieldsGroups.has(name)) { - Object.assign(acc, { - [name]: mapping, - ...getSubFieldsMappings(schema, mapping, name), - }) - } - return acc - }, - {}, - schema.fields - ) -}, _.get('elasticsearch.index')) - -export const getHighlightFields = (schema, query) => { - const allFieldsGroups = getAllFieldsGroups(schema) - - const querystr = JSON.stringify(query) - - // Pre-computed list of fields groups present in the query - const queryFieldsGroups = [] - F.walk()((val, key) => { - if (allFieldsGroups.has(val)) queryFieldsGroups.push(val) - if (allFieldsGroups.has(key)) queryFieldsGroups.push(key) - })(query) - - /** - * Only fields whose names are present in the query get highlighted by elastic - * due to us passing `require_field_match:true`. However, we have to consider - * fields groups as well. For example, given that `city` and `street` are - * copied to `address`, elastic won't highlight them in the following request: - * - * { - * "query": { - * "match": { - * "address": { "query": "memphis" } - * } - * }, - * "highlight": { - * "fields": { - * "city": {}, - * "street": {} - * } - * } - * } - * - * Instead, we have to specify a query just for highlighting, making sure we - * replace `address` with the correct field: - * - * { - * "query": { - * "match": { - * "address": { "query": "memphis" } - * } - * }, - * "highlight": { - * "fields": { - * "city": { - * "highlight_query": { - * "match": { - * "city": { "query": "memphis" } - * } - * } - * }, - * "street": { - * "highlight_query": { - * "match": { - * "street": { "query": "memphis" } - * } - * } - * } - * } - * } - * } - * - * Also, an interesting behavior is that boolean logic has no effect in - * highlighting. The following query will highlight both `memphis` and - * `miami` in the field `city` even though only the first `should` expression - * matches. - * - * { - * "bool": { - * "should": [ - * { "match": { "city": "memphis" } }, - * { - * "bool": { - * "must": [ - * { "match": { "city": "miami" } }, - * { "match": { "state": "" } } - * ] - * } - * } - * ] - * } - * } - */ - const getHighlightQuery = (mapping, name) => { - const toReplace = _.intersection(queryFieldsGroups, mapping.copy_to) - if (!_.isEmpty(toReplace)) { - const regexp = new RegExp(_.join('|', toReplace), 'g') - return JSON.parse(_.replace(regexp, name, querystr)) - } - } - - // Transform a field mapping to a field highlighting configuration - // See https://www.elastic.co/guide/en/elasticsearch/reference/current/highlighting.html#override-global-settings - const fieldMappingToHighlightConfig = (mapping, name) => { - const isBlob = mapping.meta?.subType === 'blob' - return F.omitBlank({ - fragment_size: isBlob ? 250 : null, - number_of_fragments: isBlob ? 3 : null, - highlight_query: getHighlightQuery(mapping, name), - }) - } - - return F.mapValuesIndexed( - fieldMappingToHighlightConfig, - getHighlightFieldsMappings(schema) - ) -} - -/** - * Returns an array of [start, end] ranges that correspond to substrings - * enclosed in pre/post tags. The ranges correspond to the plain string without - * tags. For example given the tags `{ pre: '', post: '' }`, this - * function will return [[2, 5], [6, 9]] for the string - * - * `A red car` - */ -const getHighlightRanges = (pre, post, str) => { - let runningTagsLength = 0 - const ranges = [] - const regexp = new RegExp(`${pre}(?.*?)${post}`, 'g') - for (const match of str.matchAll(regexp)) { - const start = match.index - runningTagsLength - const end = start + match.groups.capture.length - ranges.push([start, end]) - runningTagsLength += pre.length + post.length - } - return ranges -} - -/** - * Wrap substrings given by [start, end] ranges with pre/post tags - * - * This function could extend `F.highlight` functionality to accept ranges. For - * example: - * - * ```javascript - * const braceHighlight = F.highlight("{", "}") - * braceHighlight([[2, 4], [9, 10]], "hello world") // -> "he{llo} wor{ld}" - * ```` - */ -const highlightFromRanges = (pre, post, str, ranges) => { - const starts = _.fromPairs(_.map((x) => [x[0]], ranges)) - const ends = _.fromPairs(_.map((x) => [x[1]], ranges)) - const highlighted = str.replace(/./g, (match, index) => { - if (index in starts) return `${pre}${match}` - if (index in ends) return `${post}${match}` - return match - }) - // Sometimes the last word is highlighted so the index for the last tag is - // `str.length` but `replace` only makes it up to `str.length - 1`. - return _.last(_.last(ranges)) === str.length - ? `${highlighted}${post}` - : highlighted -} - -export const mergeHighlights = (pre, post, ...strs) => { - // This may look unnecessary but merging highlights is not cheap and many - // times is not even needed - if (_.size(strs) <= 1) return _.head(strs) - const ranges = F.mergeRanges( - _.flatMap((str) => getHighlightRanges(pre, post, str), strs) - ) - const plain = _.head(strs).replaceAll(pre, '').replaceAll(post, '') - return highlightFromRanges(pre, post, plain, ranges) -} - -const stripTags = _.curry((pre, post, fragment) => - fragment.replaceAll(pre, '').replaceAll(post, '') -) - -/** Merge highlighted fragments onto a source array */ -export const highlightArray = (array, fragments, config) => { - if (_.isEmpty(array)) { - return _.map( - (fragment) => setOrReturn(config.fragmentPath, fragment, {}), - fragments - ) - } - const fragmentsMap = F.arrayToObject( - stripTags(config.pre_tag, config.post_tag), - _.identity, - fragments - ) - return _.reduce( - (acc, item) => { - const plain = F.getOrReturn(config.fragmentPath, item) - const fragment = fragmentsMap[plain] - return config.filterSourceArrays && fragment === undefined - ? acc - : F.push(setOrReturn(config.fragmentPath, fragment ?? plain, item), acc) - }, - [], - array - ) -} - -// Best-effort naming on this function :/ -export const alignHighlightsWithSourceStructure = (schema, highlightConfig) => { - const arrayFields = _.pickBy( - { elasticsearch: { meta: { subType: 'array' } } }, - schema.fields - ) - const emptyArrayFields = _.mapValues(_.constant([]), arrayFields) - const arrayFieldsNames = _.keys(arrayFields) - const getArrayFieldName = (field) => - _.find((k) => field.startsWith(k), arrayFieldsNames) - - const lastWordRegex = /\.(\w+)$/ - // Ex: `title` and `title.exact` both result in `title` - const getMultiFieldName = (field) => { - const [multi, sub] = field.split(lastWordRegex) - return schema.fields[multi]?.elasticsearch?.fields?.[sub] ? multi : field - } - - // Merge highlighted fragments onto a source array - const getHighlightedArray = (fragments, field, source) => { - const arrayPath = getArrayFieldName(field) - return highlightArray(_.get(arrayPath, source), fragments, { - ...highlightConfig, - fragmentPath: field.slice(arrayPath.length + 1), // +1 strips off leading dot - }) - } - - // Transform highlighted fragments into something that can be used to replace - // source values - const handleHighlightedFragments = (hit) => (fragments, field) => - schema.fields[field]?.elasticsearch?.meta?.subType === 'blob' - ? fragments - : getArrayFieldName(field) - ? getHighlightedArray(fragments, field, hit._source) - : // Assumming we sent `number_of_fragments:0` to elastic, there should be - // at most one fragment per multi-field (ex: `title`) and at most one - // fragment for each sub-field (ex: `title.exact`, `title.keyword`). - mergeHighlights( - highlightConfig.pre_tag, - highlightConfig.post_tag, - ...fragments - ) - - return (hit) => - _.flow( - // Group `city` and `city.exact` under `city` - groupByIndexed((v, k) => getMultiFieldName(k)), - _.mapValues(_.flatten), - F.mapValuesIndexed(handleHighlightedFragments(hit)), - // Rename `streets.name` to `streets` if `streets` is an array field so - // that we can simply replace arrays wholesale in the source. - _.mapKeys((field) => getArrayFieldName(field) ?? field), - // Default to empty arrays if source arrays should be filtered but no - // highlights come back for them. - _.defaults(highlightConfig.filterSourceArrays ? emptyArrayFields : {}) - )(hit.highlight) -} diff --git a/packages/provider-elasticsearch/src/example-types/results/highlight.test.js b/packages/provider-elasticsearch/src/example-types/results/highlight.test.js deleted file mode 100644 index 2c279d8a3..000000000 --- a/packages/provider-elasticsearch/src/example-types/results/highlight.test.js +++ /dev/null @@ -1,679 +0,0 @@ -import { - getHighlightFields, - mergeHighlights, - alignHighlightsWithSourceStructure, - highlightArray, -} from './highlight.js' - -const highlightConfig = { pre_tag: '', post_tag: '' } - -describe('getHighlightFields()', () => { - it('should exclude fields without mappings', () => { - const actual = getHighlightFields( - { - fields: { - other: {}, - state: { elasticsearch: {} }, - 'city.street': { elasticsearch: {} }, - }, - }, - {} - ) - const expected = { - state: {}, - 'city.street': {}, - } - expect(actual).toEqual(expected) - }) - - it('should exclude group fields', () => { - const actual = getHighlightFields( - { - fields: { - all: { elasticsearch: {} }, - address: { elasticsearch: {} }, - state: { elasticsearch: { copy_to: ['all', 'address'] } }, - 'city.street': { elasticsearch: { copy_to: ['all', 'address'] } }, - }, - }, - {} - ) - const expected = { - state: {}, - 'city.street': {}, - } - expect(actual).toEqual(expected) - }) - - it('should include whitelisted sub fields', () => { - const actual = getHighlightFields( - { - elasticsearch: { - subFields: { - keyword: { highlight: false }, - exact: { highlight: true }, - }, - }, - fields: { - state: { - elasticsearch: { - fields: { keyword: {}, exact: {} }, - }, - }, - 'city.street': { - elasticsearch: { - fields: { keyword: {}, exact: {} }, - }, - }, - }, - }, - {} - ) - const expected = { - state: {}, - 'state.exact': {}, - 'city.street': {}, - 'city.street.exact': {}, - } - expect(actual).toEqual(expected) - }) - - it('should generate configuration for blob text fields', () => { - const actual = getHighlightFields( - { - elasticsearch: { - subFields: { - exact: { highlight: true }, - }, - }, - fields: { - state: { - elasticsearch: { - meta: { subType: 'blob' }, - fields: { exact: {} }, - }, - }, - }, - }, - {} - ) - const expected = { - state: { - fragment_size: 250, - number_of_fragments: 3, - }, - 'state.exact': { - fragment_size: 250, - number_of_fragments: 3, - }, - } - expect(actual).toEqual(expected) - }) - - it('should generate highlight_query with field groups replaced', () => { - const queryWith = (field) => ({ - bool: { - must: [ - { terms: { [field]: 'memphis' } }, - { query_string: { query: 'memphis', default_field: field } }, - ], - }, - }) - const actual = getHighlightFields( - { - fields: { - address: { elasticsearch: {} }, - state: { elasticsearch: { copy_to: ['address'] } }, - 'city.street': { elasticsearch: { copy_to: ['address'] } }, - }, - }, - queryWith('address') - ) - const expected = { - state: { - highlight_query: queryWith('state'), - }, - 'city.street': { - highlight_query: queryWith('city.street'), - }, - } - expect(actual).toEqual(expected) - }) - - it('should generate highlight_query with field groups replaced for sub fields', () => { - const queryWith = (field) => ({ - bool: { - must: [ - { terms: { [field]: 'memphis' } }, - { query_string: { query: 'memphis', default_field: field } }, - ], - }, - }) - const actual = getHighlightFields( - { - elasticsearch: { - subFields: { - exact: { highlight: true }, - }, - }, - fields: { - address: { - elasticsearch: {}, - }, - state: { - elasticsearch: { - copy_to: ['address'], - fields: { exact: {} }, - }, - }, - 'city.street': { - elasticsearch: { - copy_to: ['address'], - fields: { exact: {} }, - }, - }, - }, - }, - queryWith('address.exact') - ) - const expected = { - state: {}, - 'state.exact': { highlight_query: queryWith('state.exact') }, - 'city.street': {}, - 'city.street.exact': { highlight_query: queryWith('city.street.exact') }, - } - expect(actual).toEqual(expected) - }) -}) - -describe('mergeHighlights()', () => { - it('should merge highlights that do not overlap', () => { - const actual = mergeHighlights( - highlightConfig.pre_tag, - highlightConfig.post_tag, - 'The quick brown fox jumps over the lazy dog', - 'The quick brown fox jumps over the lazy dog' - ) - const expected = - 'The quick brown fox jumps over the lazy dog' - expect(actual).toEqual(expected) - }) - - it('should merge highlights that overlap', () => { - const actual = mergeHighlights( - highlightConfig.pre_tag, - highlightConfig.post_tag, - 'The quick brown fox jumps over the lazy dog', - 'The quick brown fox jumps over the lazy dog' - ) - const expected = 'The quick brown fox jumps over the lazy dog' - expect(actual).toEqual(expected) - }) - - it('should merge highlights that are contained within another', () => { - const actual = mergeHighlights( - highlightConfig.pre_tag, - highlightConfig.post_tag, - 'The quick brown fox jumps over the lazy dog', - 'The quick brown fox jumps over the lazy dog' - ) - const expected = 'The quick brown fox jumps over the lazy dog' - expect(actual).toEqual(expected) - }) - - it('should merge highlights at the end of the string', () => { - const actual = mergeHighlights( - highlightConfig.pre_tag, - highlightConfig.post_tag, - 'The quick brown fox jumps over the lazy dog', - 'The quick brown fox jumps over the lazy dog' - ) - const expected = - 'The quick brown fox jumps over the lazy dog' - expect(actual).toEqual(expected) - }) -}) - -describe('highlightArray()', () => { - it('should return ordered array of fragments', () => { - const actual = highlightArray( - ['Meridian St.', 'Collins Ave.'], - ['Collins Ave.', 'Meridian St.'], - highlightConfig - ) - const expected = ['Meridian St.', 'Collins Ave.'] - expect(actual).toEqual(expected) - }) - - it('should return ordered and filtered array of fragments', () => { - const actual = highlightArray( - ['Meridian St.', 'Collins Ave.', 'Raunch Rd.'], - ['Collins Ave.', 'Meridian St.'], - { ...highlightConfig, filterSourceArrays: true } - ) - const expected = ['Meridian St.', 'Collins Ave.'] - expect(actual).toEqual(expected) - }) - - it('should return ordered array of objects with fragments', () => { - const actual = highlightArray( - [ - { state: 'Florida', city: { number: 405, street: 'Meridian St.' } }, - { state: 'Georgia', city: { number: 235, street: 'Collins Ave.' } }, - ], - ['Collins Ave.', 'Meridian St.'], - { ...highlightConfig, fragmentPath: 'city.street' } - ) - const expected = [ - { - state: 'Florida', - city: { number: 405, street: 'Meridian St.' }, - }, - { - state: 'Georgia', - city: { number: 235, street: 'Collins Ave.' }, - }, - ] - expect(actual).toEqual(expected) - }) - - it('should return ordered and filtered array of objects with fragments', () => { - const actual = highlightArray( - [ - { state: 'Florida', city: { number: 405, street: 'Meridian St.' } }, - { state: 'Georgia', city: { number: 235, street: 'Collins Ave.' } }, - { state: 'Iowa', city: { number: 111, street: 'Raunch Rd.' } }, - ], - ['Collins Ave.', 'Meridian St.'], - { - ...highlightConfig, - fragmentPath: 'city.street', - filterSourceArrays: true, - } - ) - const expected = [ - { - state: 'Florida', - city: { number: 405, street: 'Meridian St.' }, - }, - { - state: 'Georgia', - city: { number: 235, street: 'Collins Ave.' }, - }, - ] - expect(actual).toEqual(expected) - }) -}) - -describe('alignHighlightsWithSourceStructure()', () => { - describe('text fields', () => { - const schema = { - elasticsearch: { - subFields: { - exact: { highlight: true }, - }, - }, - fields: { - state: { - elasticsearch: { - fields: { exact: {} }, - }, - }, - 'city.street': { - elasticsearch: { - fields: { exact: {} }, - }, - }, - }, - } - - it('should merge fragments', () => { - const hit = { - _source: { - name: 'John Wayne', - state: 'New Jersey', - city: { street: 'Jefferson Ave' }, - }, - highlight: { - state: ['New Jersey'], - 'state.exact': ['New Jersey'], - 'city.street': ['Jefferson Ave'], - 'city.street.exact': ['Jefferson Ave'], - }, - } - const actual = alignHighlightsWithSourceStructure( - schema, - highlightConfig - )(hit) - const expected = { - state: 'New Jersey', - 'city.street': 'Jefferson Ave', - } - expect(actual).toEqual(expected) - }) - }) - - describe('blob text fields', () => { - const schema = { - elasticsearch: { - subFields: { - exact: { highlight: true }, - }, - }, - fields: { - blob: { - elasticsearch: { - meta: { subType: 'blob' }, - fields: { exact: {} }, - }, - }, - }, - } - - it('should not merge fragments', () => { - const hit = { - _source: {}, - highlight: { - blob: [ - 'Meridian St.', - 'Collins Ave.', - 'Ocean Drive', - ], - 'blob.exact': [ - 'Jefferson Ave.', - 'Washington St.', - 'Lincoln Rd.', - ], - }, - } - const actual = alignHighlightsWithSourceStructure( - schema, - highlightConfig - )(hit) - const expected = { - blob: [ - 'Meridian St.', - 'Collins Ave.', - 'Ocean Drive', - 'Jefferson Ave.', - 'Washington St.', - 'Lincoln Rd.', - ], - } - expect(actual).toEqual(expected) - }) - }) - - describe('arrays of strings', () => { - const schema = { - fields: { - state: {}, - 'city.street': { elasticsearch: { meta: { subType: 'array' } } }, - }, - } - - it('should inline array of strings when source is empty', () => { - const hit = { - _source: {}, - highlight: { - 'city.street': ['Collins Ave.', 'Meridian St.'], - }, - } - const actual = alignHighlightsWithSourceStructure( - schema, - highlightConfig - )(hit) - const expected = { - 'city.street': ['Collins Ave.', 'Meridian St.'], - } - expect(actual).toEqual(expected) - }) - - it('should inline array of strings when source has value', () => { - const hit = { - _source: { - city: { - street: ['Jefferson Ave.', 'Meridian St.', 'Collins Ave.'], - }, - }, - highlight: { - 'city.street': ['Collins Ave.', 'Meridian St.'], - }, - } - const actual = alignHighlightsWithSourceStructure( - schema, - highlightConfig - )(hit) - const expected = { - 'city.street': [ - 'Jefferson Ave.', - 'Meridian St.', - 'Collins Ave.', - ], - } - expect(actual).toEqual(expected) - }) - - it('should inline and filter array of strings when source is empty', () => { - const hit = { - _source: {}, - highlight: { - 'city.street': ['Collins Ave.', 'Meridian St.'], - }, - } - const actual = alignHighlightsWithSourceStructure(schema, { - ...highlightConfig, - filterSourceArrays: true, - })(hit) - const expected = { - 'city.street': ['Collins Ave.', 'Meridian St.'], - } - expect(actual).toEqual(expected) - }) - - it('should inline and filter array of strings when source has value', () => { - const hit = { - _source: { - city: { - street: [ - 'Jefferson Ave.', - 'Washington St.', - 'Meridian St.', - 'Collins Ave.', - 'Ocean Drive', - ], - }, - }, - highlight: { - 'city.street': ['Collins Ave.', 'Meridian St.'], - }, - } - const actual = alignHighlightsWithSourceStructure(schema, { - ...highlightConfig, - filterSourceArrays: true, - })(hit) - const expected = { - 'city.street': ['Meridian St.', 'Collins Ave.'], - } - expect(actual).toEqual(expected) - }) - - it('should inline source array with empty array when there are no highlights', () => { - const hit = { - _source: { - state: 'New Jersey', - city: { - street: [ - 'Jefferson Ave.', - 'Washington St.', - 'Meridian St.', - 'Collins Ave.', - 'Ocean Drive', - ], - }, - }, - highlight: { - state: 'New Jersey', - }, - } - const actual = alignHighlightsWithSourceStructure(schema, { - ...highlightConfig, - filterSourceArrays: true, - })(hit) - const expected = { - state: 'New Jersey', - 'city.street': [], - } - expect(actual).toEqual(expected) - }) - }) - - describe('arrays of objects', () => { - const schema = { - fields: { - 'city.street': { elasticsearch: { meta: { subType: 'array' } } }, - 'city.street.name': {}, - }, - } - - it('should inline array of objects when source is empty', () => { - const hit = { - _source: {}, - highlight: { - 'city.street.name': [ - 'Collins Ave.', - 'Meridian St.', - ], - }, - } - const actual = alignHighlightsWithSourceStructure( - schema, - highlightConfig - )(hit) - const expected = { - 'city.street': [ - { name: 'Collins Ave.' }, - { name: 'Meridian St.' }, - ], - } - expect(actual).toEqual(expected) - }) - - it('should inline array of objects when source has value', () => { - const hit = { - _source: { - city: { - street: [ - { number: 101, name: 'Jefferson Ave.' }, - { number: 235, name: 'Meridian St.' }, - { number: 9, name: 'Collins Ave.' }, - ], - }, - }, - highlight: { - 'city.street.name': [ - 'Collins Ave.', - 'Meridian St.', - ], - }, - } - const actual = alignHighlightsWithSourceStructure( - schema, - highlightConfig - )(hit) - const expected = { - 'city.street': [ - { number: 101, name: 'Jefferson Ave.' }, - { number: 235, name: 'Meridian St.' }, - { number: 9, name: 'Collins Ave.' }, - ], - } - expect(actual).toEqual(expected) - }) - - it('should inline and filter array of objects when source is empty', () => { - const hit = { - _source: {}, - highlight: { - 'city.street.name': [ - 'Collins Ave.', - 'Meridian St.', - ], - }, - } - const actual = alignHighlightsWithSourceStructure(schema, { - ...highlightConfig, - filterSourceArrays: true, - })(hit) - const expected = { - 'city.street': [ - { name: 'Collins Ave.' }, - { name: 'Meridian St.' }, - ], - } - expect(actual).toEqual(expected) - }) - - it('should inline and filter array of objects when source has value', () => { - const hit = { - _source: { - city: { - street: [ - { number: 101, name: 'Jefferson Ave.' }, - { number: 789, name: 'Washington St.' }, - { number: 235, name: 'Meridian St.' }, - { number: 9, name: 'Collins Ave.' }, - { number: 655, name: 'Ocean Drive' }, - ], - }, - }, - highlight: { - 'city.street.name': [ - 'Collins Ave.', - 'Meridian St.', - ], - }, - } - const actual = alignHighlightsWithSourceStructure(schema, { - ...highlightConfig, - filterSourceArrays: true, - })(hit) - const expected = { - 'city.street': [ - { number: 235, name: 'Meridian St.' }, - { number: 9, name: 'Collins Ave.' }, - ], - } - expect(actual).toEqual(expected) - }) - - it('should inline source array with empty array when there are no highlights', () => { - const hit = { - _source: { - state: 'New Jersey', - city: { - street: [ - { number: 101, name: 'Jefferson Ave.' }, - { number: 789, name: 'Washington St.' }, - { number: 235, name: 'Meridian St.' }, - { number: 9, name: 'Collins Ave.' }, - { number: 655, name: 'Ocean Drive' }, - ], - }, - }, - highlight: { - state: 'New Jersey', - }, - } - const actual = alignHighlightsWithSourceStructure(schema, { - ...highlightConfig, - filterSourceArrays: true, - })(hit) - const expected = { - state: 'New Jersey', - 'city.street': [], - } - expect(actual).toEqual(expected) - }) - }) -}) diff --git a/packages/provider-elasticsearch/src/example-types/results/highlighting.md b/packages/provider-elasticsearch/src/example-types/results/highlighting/README.md similarity index 69% rename from packages/provider-elasticsearch/src/example-types/results/highlighting.md rename to packages/provider-elasticsearch/src/example-types/results/highlighting/README.md index 3debccfaa..c179edd36 100644 --- a/packages/provider-elasticsearch/src/example-types/results/highlighting.md +++ b/packages/provider-elasticsearch/src/example-types/results/highlighting/README.md @@ -1,6 +1,14 @@ -## Request +# Highlighting -Our approach to highlighting is designed to be as out of the box as possible, without too many configuration options. +Our approach to highlighting is designed to be as out of the box as possible, without too many configuration options. See `./type.d.ts` for more details on the API. + +There are three pieces involved in highlighting: + +1. Building out highlight configuration to send with an elastic request. +2. Transforming highlighted fragments in the elastic response into a structure similar to that of `_source`. +3. Merging such structure into a hit's `_source`. + +## 1. Request ### Fields sent for highlighting @@ -151,15 +159,15 @@ In the spirit of keeping our API simple, we generate opinionated highlighting co -## Response +## 2. Response -Currently the only supported behavior is to merge highlighted fields into source fields (we may provide an option to opt-out in the future). Fields present in the highlighted results but not in the source still get merged onto the source. For this approach to work, the highlighted fields must contain the entire field value (as opposed to only fragments), so we set [number_of_fragments](https://www.elastic.co/guide/en/elasticsearch/reference/current/highlighting.html#highlighting-settings) to `0` in the request. The exception being blob text fields which we default to return highlighted fragments instead of the entire highlighted value. +Currently the only supported behavior is to merge highlighted fragments into `_source` (we may provide an option to opt-out in the future). For this approach to work, fragments must contain the entire field value, so we set [number_of_fragments](https://www.elastic.co/guide/en/elasticsearch/reference/current/highlighting.html#highlighting-settings) to `0` in the request. The exception being blob text fields which set `number_of_fragments` to something `> 0` since they're too big to highlight in their entirety. -Before merging, highlighted results need to be transformed. Assumming `exact` to be a sub-field of `details`, the following rules apply: +Assumming `exact` to be a sub-field of `details`, the following rules apply when transforming the highlight response: #### 1. Text fields -The first fragments of each field (which should contain the entire field value because of `number_of_fragments: 0`) are merged into one value +The first fragments of each field (which should contain the entire field value because of `number_of_fragments: 0`) are merged into a single fragment ```json { @@ -180,7 +188,7 @@ Merging of highlighted fragments could be handled by elastic but this is still [ #### 2. Blob text fields -Blob text fields get their highlighted fragments joined, because there is no other behavior we could take here: +Blob text fields fragments are concatenated because that's the only sensible thing to do: ```json { @@ -226,11 +234,11 @@ Elastic doesn't have a concept of array fields, so we rely again on the `subType -which allows us to order highlighted array items based on the source array (as long as it's present) +which allows us to order highlighted array items based on the source array (as long as the source array is present in the response)
-ordering.test.js +scalar-array.test.js ```javascript import assert from 'node:assert' @@ -244,7 +252,9 @@ const hit = { }, } -const actual = order(hit.highlight.names, hit._source.names) +// `fn` is just for illustration purposes +const actual = fn(hit.highlight.names, hit._source.names) + const expected = [undefined, 'Smith', undefined, 'Austen'] assert.deepEqual(actual, expected) @@ -253,3 +263,45 @@ assert.deepEqual(actual, expected)
Ideally elastic's response would include enough information to deduce the array index for each highlighted fragment but unfortunately this is still [an open issue](https://github.com/elastic/elasticsearch/issues/7416). + +Arrays of objects are equally ordered. Additionally, their structure is made to follow the source array's structure + +
+ +object-array.test.js + +```javascript +import assert from 'node:assert' + +const hit = { + _source: { + people: [ + { name: 'John' }, + { name: 'Smith' }, + { name: 'Jane' }, + { name: 'Austen' }, + ], + }, + highlight: { + 'people.name': ['Austen', 'Smith'], + }, +} + +// `fn` is just for illustration purposes +const actual = fn(hit.highlight['people.name'], hit._source.people) + +const expected = [ + undefined, + { name: 'Smith' }, + undefined, + { name: 'Austen' }, +] + +assert.deepEqual(actual, expected) +``` + +
+ +## 3. Source Merging + +Fields present in the highlighted results but not in the source still get merged onto the source. diff --git a/packages/provider-elasticsearch/src/example-types/results/highlighting/merging.js b/packages/provider-elasticsearch/src/example-types/results/highlighting/merging.js new file mode 100644 index 000000000..1fa3ac923 --- /dev/null +++ b/packages/provider-elasticsearch/src/example-types/results/highlighting/merging.js @@ -0,0 +1,33 @@ +import _ from 'lodash/fp.js' +import F from 'futil' +import { getArrayFieldsPaths } from './util.js' + +export const mergeHighlightsOnSource = (schema, config, source, highlights) => { + // Account for an edge case where source arrays should only contain + // highlighted items but there are no highlights in the results. + if (config?.filterSourceArrays) { + for (const path of getArrayFieldsPaths(schema)) { + if (!_.has(path, highlights)) { + F.setOn(path, [], highlights) + } + } + } + + // Mutate source only for performance reasons + _.convert({ immutable: false }).mergeWith( + (src, hi) => { + if (_.isArray(src) && config?.filterSourceArrays) { + return F.reduceIndexed( + (acc, v, i) => + _.isUndefined(v) + ? acc + : F.push(_.isPlainObject(v) ? _.merge(src[i], v) : v, acc), + [], + hi + ) + } + }, + source, + highlights + ) +} diff --git a/packages/provider-elasticsearch/src/example-types/results/highlighting/merging.test.js b/packages/provider-elasticsearch/src/example-types/results/highlighting/merging.test.js new file mode 100644 index 000000000..2100c5fec --- /dev/null +++ b/packages/provider-elasticsearch/src/example-types/results/highlighting/merging.test.js @@ -0,0 +1,201 @@ +import { mergeHighlightsOnSource } from './merging.js' + +describe('mergeHighlightsOnSource()', () => { + const schema = { + fields: { + segments: { + elasticsearch: { meta: { subType: 'array' } }, + }, + }, + } + + it('should merge highlights', () => { + const source = { + state: 'California', + city: 'San Francisco', + book: { + title: 'Don Quixote', + blob: 'The quick brown fox jumped over the lazy dog', + authors: ['John Snow', 'Neo'], + }, + } + const highlights = { + city: 'San Francisco', + book: { + blob: ['The quick', 'jumped over'], + }, + } + mergeHighlightsOnSource(schema, {}, source, highlights) + expect(source).toEqual({ + state: 'California', + city: 'San Francisco', + book: { + title: 'Don Quixote', + blob: ['The quick', 'jumped over'], + authors: ['John Snow', 'Neo'], + }, + }) + }) + + it('should not clear source array when highlight result is missing field', () => { + const source = { + segments: ['The quick', 'brown fox', 'jumped over', 'the lazy dog'], + } + const highlights = {} + mergeHighlightsOnSource(schema, {}, source, highlights) + expect(source).toEqual({ + segments: ['The quick', 'brown fox', 'jumped over', 'the lazy dog'], + }) + }) + + it('should merge highlights on array of strings when source has value', () => { + const source = { + segments: ['The quick', 'brown fox', 'jumped over', 'the lazy dog'], + } + const highlights = { + segments: [ + 'The quick', + undefined, + 'jumped over', + undefined, + ], + } + mergeHighlightsOnSource(schema, {}, source, highlights) + expect(source).toEqual({ + segments: [ + 'The quick', + 'brown fox', + 'jumped over', + 'the lazy dog', + ], + }) + }) + + it('should merge highlights on array of strings when source is empty', () => { + const source = {} + const highlights = { + segments: ['The quick', 'jumped over'], + } + mergeHighlightsOnSource(schema, {}, source, highlights) + expect(source).toEqual({ + segments: ['The quick', 'jumped over'], + }) + }) + + it('should merge highlights on array of objects when source has value', () => { + const source = { + segments: [ + { start: 0, text: 'The quick' }, + { start: 10, text: 'brown fox' }, + { start: 20, text: 'jumped over' }, + { start: 30, text: 'the lazy dog' }, + ], + } + const highlights = { + segments: [ + { text: 'The quick' }, + undefined, + { text: 'jumped over' }, + undefined, + ], + } + mergeHighlightsOnSource(schema, {}, source, highlights) + expect(source).toEqual({ + segments: [ + { start: 0, text: 'The quick' }, + { start: 10, text: 'brown fox' }, + { start: 20, text: 'jumped over' }, + { start: 30, text: 'the lazy dog' }, + ], + }) + }) + + it('should merge highlights on array of objects when source is empty', () => { + const source = {} + const highlights = { + segments: [ + { text: 'The quick' }, + { text: 'jumped over' }, + ], + } + mergeHighlightsOnSource(schema, {}, source, highlights) + expect(source).toEqual({ + segments: [ + { text: 'The quick' }, + { text: 'jumped over' }, + ], + }) + }) + + describe('filterSourceArrays', () => { + it('should clear source array when highlight result is missing field', () => { + const source = { + segments: ['The quick', 'brown fox', 'jumped over', 'the lazy dog'], + } + const highlights = {} + mergeHighlightsOnSource( + schema, + { filterSourceArrays: true }, + source, + highlights + ) + expect(source).toEqual({ + segments: [], + }) + }) + + it('should remove non-highlighted items from array of strings', () => { + const source = { + segments: ['The quick', 'brown fox', 'jumped over', 'the lazy dog'], + } + const highlights = { + segments: [ + 'The quick', + undefined, + 'jumped over', + undefined, + ], + } + mergeHighlightsOnSource( + schema, + { filterSourceArrays: true }, + source, + highlights + ) + expect(source).toEqual({ + segments: ['The quick', 'jumped over'], + }) + }) + + it('should remove non-highlighted items from array of objects', () => { + const source = { + segments: [ + { start: 0, text: 'The quick' }, + { start: 10, text: 'brown fox' }, + { start: 20, text: 'jumped over' }, + { start: 30, text: 'the lazy dog' }, + ], + } + const highlights = { + segments: [ + { text: 'The quick' }, + undefined, + { text: 'jumped over' }, + undefined, + ], + } + mergeHighlightsOnSource( + schema, + { filterSourceArrays: true }, + source, + highlights + ) + expect(source).toEqual({ + segments: [ + { start: 0, text: 'The quick' }, + { start: 20, text: 'jumped over' }, + ], + }) + }) + }) +}) diff --git a/packages/provider-elasticsearch/src/example-types/results/highlighting/request.js b/packages/provider-elasticsearch/src/example-types/results/highlighting/request.js new file mode 100644 index 000000000..a23ee49dc --- /dev/null +++ b/packages/provider-elasticsearch/src/example-types/results/highlighting/request.js @@ -0,0 +1,108 @@ +import _ from 'lodash/fp.js' +import F from 'futil' +import { CartesianProduct } from 'js-combinatorics' + +/** + * Return names for all fields groups and their sub-fields in `schema`. + */ +const getFieldsGroupsNames = _.memoize((schema) => { + const subFields = _.keys( + _.pickBy('highlight', schema.elasticsearch?.subFields) + ) + + const fields = _.flatMap((field) => { + const copy_to = field.elasticsearch?.copy_to + if (!_.isEmpty(copy_to)) { + const product = new CartesianProduct(copy_to, subFields) + return [...copy_to, ...Array.from(product).map(_.join('.'))] + } + return copy_to + }, schema.fields) + + return new Set(fields) +}, _.get('elasticsearch.index')) + +/** + * Return mappings for all sub-fields that can be highlighted in `mapping`. + */ +const getSubFieldsMappings = (schema, mapping) => + F.reduceIndexed( + (acc, sfMapping, sfName) => { + if (schema.elasticsearch.subFields?.[sfName]?.highlight) { + acc[sfName] = { + ...sfMapping, + meta: mapping.meta, + copy_to: _.map((k) => `${k}.${sfName}`, mapping.copy_to), + } + } + return acc + }, + {}, + mapping.fields + ) + +/** + * Return mappings for all fields and their sub-fields that can be highlighted + * in `schema`. + */ +const getFieldsMappings = _.memoize((schema) => { + const fieldsGroups = getFieldsGroupsNames(schema) + return F.reduceIndexed( + (acc, { elasticsearch: mapping }, name) => { + // Only include leaf fields (have mapping) and do not include fields + // groups. + if (mapping && !fieldsGroups.has(name)) { + Object.assign(acc, { + [name]: mapping, + ..._.mapKeys( + (k) => `${name}.${k}`, + getSubFieldsMappings(schema, mapping) + ), + }) + } + return acc + }, + {}, + schema.fields + ) +}, _.get('elasticsearch.index')) + +export const getRequestBodyHighlight = (schema, node, config) => { + const query = node._meta?.relevantFilters + const querystr = JSON.stringify(query) + const allFieldsGroups = getFieldsGroupsNames(schema) + + // Pre-computed list of fields groups present in the query + const queryFieldsGroups = [] + F.walk()((val, key) => { + if (allFieldsGroups.has(val)) queryFieldsGroups.push(val) + if (allFieldsGroups.has(key)) queryFieldsGroups.push(key) + })(query) + + const getHighlightQuery = (mapping, name) => { + const toReplace = _.intersection(queryFieldsGroups, mapping.copy_to) + if (!_.isEmpty(toReplace)) { + const regexp = new RegExp(_.join('|', toReplace), 'g') + return JSON.parse(_.replace(regexp, name, querystr)) + } + } + + const mappingToHighlightConfig = (mapping, name) => { + const isBlob = mapping.meta?.subType === 'blob' + return F.omitBlank({ + fragment_size: isBlob ? 250 : null, + number_of_fragments: isBlob ? 3 : null, + highlight_query: getHighlightQuery(mapping, name), + }) + } + + return { + pre_tags: [config.pre_tag], + post_tags: [config.post_tag], + number_of_fragments: 0, + fields: F.mapValuesIndexed( + mappingToHighlightConfig, + getFieldsMappings(schema) + ), + } +} diff --git a/packages/provider-elasticsearch/src/example-types/results/highlighting/request.test.js b/packages/provider-elasticsearch/src/example-types/results/highlighting/request.test.js new file mode 100644 index 000000000..8fcbea61c --- /dev/null +++ b/packages/provider-elasticsearch/src/example-types/results/highlighting/request.test.js @@ -0,0 +1,170 @@ +import { getRequestBodyHighlight } from './request.js' + +describe('getRequestBodyHighlight()', () => { + it('should exclude fields without mappings', () => { + const schema = { + fields: { + other: {}, + state: { elasticsearch: {} }, + 'city.street': { elasticsearch: {} }, + }, + } + const actual = getRequestBodyHighlight(schema, {}, {}).fields + const expected = { + state: {}, + 'city.street': {}, + } + expect(actual).toEqual(expected) + }) + + it('should exclude group fields', () => { + const schema = { + fields: { + all: { elasticsearch: {} }, + address: { elasticsearch: {} }, + state: { elasticsearch: { copy_to: ['all', 'address'] } }, + 'city.street': { elasticsearch: { copy_to: ['all', 'address'] } }, + }, + } + const actual = getRequestBodyHighlight(schema, {}, {}).fields + const expected = { + state: {}, + 'city.street': {}, + } + expect(actual).toEqual(expected) + }) + + it('should include whitelisted sub fields', () => { + const schema = { + elasticsearch: { + subFields: { + keyword: { highlight: false }, + exact: { highlight: true }, + }, + }, + fields: { + state: { + elasticsearch: { + fields: { keyword: {}, exact: {} }, + }, + }, + 'city.street': { + elasticsearch: { + fields: { keyword: {}, exact: {} }, + }, + }, + }, + } + const actual = getRequestBodyHighlight(schema, {}, {}).fields + const expected = { + state: {}, + 'state.exact': {}, + 'city.street': {}, + 'city.street.exact': {}, + } + expect(actual).toEqual(expected) + }) + + it('should generate configuration for blob text fields', () => { + const schema = { + elasticsearch: { + subFields: { + exact: { highlight: true }, + }, + }, + fields: { + state: { + elasticsearch: { + meta: { subType: 'blob' }, + fields: { exact: {} }, + }, + }, + }, + } + const actual = getRequestBodyHighlight(schema, {}, {}).fields + const expected = { + state: { + fragment_size: 250, + number_of_fragments: 3, + }, + 'state.exact': { + fragment_size: 250, + number_of_fragments: 3, + }, + } + expect(actual).toEqual(expected) + }) + + it('should generate highlight_query with fields groups replaced', () => { + const schema = { + fields: { + address: { elasticsearch: {} }, + state: { elasticsearch: { copy_to: ['address'] } }, + 'city.street': { elasticsearch: { copy_to: ['address'] } }, + }, + } + const query = (field) => ({ + bool: { + must: [ + { terms: { [field]: 'memphis' } }, + { query_string: { query: 'memphis', default_field: field } }, + ], + }, + }) + const node = { _meta: { relevantFilters: query('address') } } + const actual = getRequestBodyHighlight(schema, node, {}).fields + const expected = { + state: { + highlight_query: query('state'), + }, + 'city.street': { + highlight_query: query('city.street'), + }, + } + expect(actual).toEqual(expected) + }) + + it('should generate highlight_query with fields groups replaced for sub fields', () => { + const schema = { + elasticsearch: { + subFields: { + exact: { highlight: true }, + }, + }, + fields: { + address: { + elasticsearch: {}, + }, + state: { + elasticsearch: { + copy_to: ['address'], + fields: { exact: {} }, + }, + }, + 'city.street': { + elasticsearch: { + copy_to: ['address'], + fields: { exact: {} }, + }, + }, + }, + } + const query = (field) => ({ + bool: { + must: [ + { terms: { [field]: 'memphis' } }, + { query_string: { query: 'memphis', default_field: field } }, + ], + }, + }) + const node = { _meta: { relevantFilters: query('address.exact') } } + const actual = getRequestBodyHighlight(schema, node, {}).fields + const expected = { + state: {}, + 'state.exact': { highlight_query: query('state.exact') }, + 'city.street': {}, + 'city.street.exact': { highlight_query: query('city.street.exact') }, + } + expect(actual).toEqual(expected) + }) +}) diff --git a/packages/provider-elasticsearch/src/example-types/results/highlighting/response.js b/packages/provider-elasticsearch/src/example-types/results/highlighting/response.js new file mode 100644 index 000000000..e5c214b20 --- /dev/null +++ b/packages/provider-elasticsearch/src/example-types/results/highlighting/response.js @@ -0,0 +1,66 @@ +import _ from 'lodash/fp.js' +import F from 'futil' +import { groupByIndexed, setOrReturn } from '../../../utils/futil.js' +import { getArrayFieldsPaths, mergeHighlights } from './util.js' + +const stripTags = _.curry((pre, post, str) => + str.replaceAll(pre, '').replaceAll(post, '') +) + +const getParentArrayPath = (schema, field) => + _.find((k) => field.startsWith(k), getArrayFieldsPaths(schema)) + +const lastWordRegex = /\.(\w+)$/ + +const getMultiFieldName = (schema, field) => { + const [multi, sub] = field.split(lastWordRegex) + return schema.fields[multi]?.elasticsearch?.fields?.[sub] ? multi : field +} + +export const transformHighlightResponse = (schema, config, hit) => { + // Group `city` and `city.exact` under `city` + const grouped = _.flow( + groupByIndexed((v, k) => getMultiFieldName(schema, k)), + _.mapValues(_.flatten) + )(hit.highlight) + + const getOrderedArrayFragments = (fragments, field) => { + const arrayPath = getParentArrayPath(schema, field) + const fragmentPath = field.slice(arrayPath.length + 1) // +1 strips off leading dot + const sourceArray = _.get(arrayPath, hit._source) + + if (_.isEmpty(sourceArray)) { + return _.map( + (fragment) => setOrReturn(fragmentPath, fragment, {}), + fragments + ) + } + + // Map of `array item -> highlighted fragment` to speed up ordering the + // highlighted fragments. + const fragmentsMap = F.arrayToObject( + stripTags(config.pre_tag, config.post_tag), + _.identity, + fragments + ) + + return _.map((item) => { + const plain = F.getOrReturn(fragmentPath, item) + const fragment = fragmentsMap[plain] + return fragment && setOrReturn(fragmentPath, fragment, {}) + }, sourceArray) + } + + return F.reduceIndexed( + (acc, fragments, field) => { + const path = getParentArrayPath(schema, field) + return path + ? _.set(path, getOrderedArrayFragments(fragments, field), acc) + : schema.fields[field]?.elasticsearch?.meta?.subType === 'blob' + ? _.set(field, fragments, acc) + : _.set(field, mergeHighlights(config, ...fragments), acc) + }, + {}, + grouped + ) +} diff --git a/packages/provider-elasticsearch/src/example-types/results/highlighting/response.test.js b/packages/provider-elasticsearch/src/example-types/results/highlighting/response.test.js new file mode 100644 index 000000000..1cf162fcc --- /dev/null +++ b/packages/provider-elasticsearch/src/example-types/results/highlighting/response.test.js @@ -0,0 +1,214 @@ +import { transformHighlightResponse } from './response.js' + +const config = { pre_tag: '', post_tag: '' } + +describe('transformHighlightResponse()', () => { + describe('text fields', () => { + const schema = { + elasticsearch: { + subFields: { + exact: { highlight: true }, + }, + }, + fields: { + state: { + elasticsearch: { + fields: { exact: {} }, + }, + }, + 'city.street': { + elasticsearch: { + fields: { exact: {} }, + }, + }, + }, + } + + it('should merge fragments', () => { + const hit = { + _source: { + name: 'John Wayne', + state: 'New Jersey', + city: { street: 'Jefferson Ave' }, + }, + highlight: { + state: ['New Jersey'], + 'state.exact': ['New Jersey'], + 'city.street': ['Jefferson Ave'], + 'city.street.exact': ['Jefferson Ave'], + }, + } + const actual = transformHighlightResponse(schema, config, hit) + const expected = { + state: 'New Jersey', + city: { street: 'Jefferson Ave' }, + } + expect(actual).toEqual(expected) + }) + }) + + describe('blob text fields', () => { + const schema = { + elasticsearch: { + subFields: { + exact: { highlight: true }, + }, + }, + fields: { + blob: { + elasticsearch: { + meta: { subType: 'blob' }, + fields: { exact: {} }, + }, + }, + }, + } + + it('should not merge fragments', () => { + const hit = { + _source: {}, + highlight: { + blob: [ + 'Meridian St.', + 'Collins Ave.', + 'Ocean Drive', + ], + 'blob.exact': [ + 'Jefferson Ave.', + 'Washington St.', + 'Lincoln Rd.', + ], + }, + } + const actual = transformHighlightResponse(schema, config, hit) + const expected = { + blob: [ + 'Meridian St.', + 'Collins Ave.', + 'Ocean Drive', + 'Jefferson Ave.', + 'Washington St.', + 'Lincoln Rd.', + ], + } + expect(actual).toEqual(expected) + }) + }) + + describe('arrays of strings', () => { + const schema = { + fields: { + state: {}, + 'city.street': { elasticsearch: { meta: { subType: 'array' } } }, + }, + } + + it('should do nothing when source is empty', () => { + const hit = { + _source: {}, + highlight: { + 'city.street': ['Collins Ave.', 'Meridian St.'], + }, + } + const actual = transformHighlightResponse(schema, config, hit) + const expected = { + city: { street: ['Collins Ave.', 'Meridian St.'] }, + } + expect(actual).toEqual(expected) + }) + + it('should order items when source has value', () => { + const hit = { + _source: { + city: { + street: [ + 'Jefferson Ave.', + 'Meridian St.', + 'Washington St.', + 'Collins Ave.', + ], + }, + }, + highlight: { + 'city.street': ['Collins Ave.', 'Meridian St.'], + }, + } + const actual = transformHighlightResponse(schema, config, hit) + const expected = { + city: { + street: [ + undefined, + 'Meridian St.', + undefined, + 'Collins Ave.', + ], + }, + } + expect(actual).toEqual(expected) + }) + }) + + describe('arrays of objects', () => { + const schema = { + fields: { + 'city.street': { elasticsearch: { meta: { subType: 'array' } } }, + 'city.street.name': {}, + }, + } + + it('should do nothing when source is empty', () => { + const hit = { + _source: {}, + highlight: { + 'city.street.name': [ + 'Collins Ave.', + 'Meridian St.', + ], + }, + } + const actual = transformHighlightResponse(schema, config, hit) + const expected = { + city: { + street: [ + { name: 'Collins Ave.' }, + { name: 'Meridian St.' }, + ], + }, + } + expect(actual).toEqual(expected) + }) + + it('should order items when source has value', () => { + const hit = { + _source: { + city: { + street: [ + { number: 101, name: 'Jefferson Ave.' }, + { number: 235, name: 'Meridian St.' }, + { number: 88, name: 'Washington St.' }, + { number: 9, name: 'Collins Ave.' }, + ], + }, + }, + highlight: { + 'city.street.name': [ + 'Collins Ave.', + 'Meridian St.', + ], + }, + } + const actual = transformHighlightResponse(schema, config, hit) + const expected = { + city: { + street: [ + undefined, + { name: 'Meridian St.' }, + undefined, + { name: 'Collins Ave.' }, + ], + }, + } + expect(actual).toEqual(expected) + }) + }) +}) diff --git a/packages/provider-elasticsearch/src/example-types/results/highlighting/util.js b/packages/provider-elasticsearch/src/example-types/results/highlighting/util.js new file mode 100644 index 000000000..f3674bc0e --- /dev/null +++ b/packages/provider-elasticsearch/src/example-types/results/highlighting/util.js @@ -0,0 +1,73 @@ +import _ from 'lodash/fp.js' +import F from 'futil' + +export const getArrayFieldsPaths = _.memoize( + (schema) => + _.keys( + _.pickBy({ elasticsearch: { meta: { subType: 'array' } } }, schema.fields) + ), + _.get('elasticsearch.index') +) + +/** + * Returns an array of [start, end] ranges that correspond to substrings + * enclosed in pre/post tags. The ranges correspond to the plain string without + * tags. For example given the tags `{ pre: '', post: '' }`, this + * function will return [[2, 5], [6, 9]] for the string + * + * `A red car` + */ +const getHighlightRanges = (pre, post, str) => { + let runningTagsLength = 0 + const ranges = [] + const regexp = new RegExp(`${pre}(?.*?)${post}`, 'g') + for (const match of str.matchAll(regexp)) { + const start = match.index - runningTagsLength + const end = start + match.groups.capture.length + ranges.push([start, end]) + runningTagsLength += pre.length + post.length + } + return ranges +} + +/** + * Wrap substrings given by [start, end] ranges with pre/post tags + * + * This function could extend `F.highlight` functionality to accept ranges. For + * example: + * + * ```javascript + * const braceHighlight = F.highlight("{", "}") + * braceHighlight([[2, 4], [9, 10]], "hello world") // -> "he{llo} wor{ld}" + * ```` + */ +const highlightFromRanges = (pre, post, ranges, str) => { + const starts = _.fromPairs(_.map((x) => [x[0]], ranges)) + const ends = _.fromPairs(_.map((x) => [x[1]], ranges)) + const highlighted = str.replace(/./g, (match, index) => { + if (index in starts) return `${pre}${match}` + if (index in ends) return `${post}${match}` + return match + }) + // Sometimes the last word is highlighted so the index for the last tag is + // `str.length` but `replace` only makes it up to `str.length - 1`. + return _.last(_.last(ranges)) === str.length + ? `${highlighted}${post}` + : highlighted +} + +export const mergeHighlights = (config, ...strs) => { + // This may look unnecessary but merging highlights is not cheap and many + // times is not even needed + if (_.size(strs) <= 1) return _.head(strs) + const { pre_tag: pre, post_tag: post } = config + const ranges = F.mergeRanges( + _.flatMap((str) => getHighlightRanges(pre, post, str), strs) + ) + return highlightFromRanges( + pre, + post, + ranges, + _.head(strs).replaceAll(pre, '').replaceAll(post, '') + ) +} diff --git a/packages/provider-elasticsearch/src/example-types/results/highlighting/util.test.js b/packages/provider-elasticsearch/src/example-types/results/highlighting/util.test.js new file mode 100644 index 000000000..387d4f0c3 --- /dev/null +++ b/packages/provider-elasticsearch/src/example-types/results/highlighting/util.test.js @@ -0,0 +1,47 @@ +import { mergeHighlights } from './util.js' + +const config = { pre_tag: '', post_tag: '' } + +describe('mergeHighlights()', () => { + it('should merge highlights that do not overlap', () => { + const actual = mergeHighlights( + config, + 'The quick brown fox jumps over the lazy dog', + 'The quick brown fox jumps over the lazy dog' + ) + const expected = + 'The quick brown fox jumps over the lazy dog' + expect(actual).toEqual(expected) + }) + + it('should merge highlights that overlap', () => { + const actual = mergeHighlights( + config, + 'The quick brown fox jumps over the lazy dog', + 'The quick brown fox jumps over the lazy dog' + ) + const expected = 'The quick brown fox jumps over the lazy dog' + expect(actual).toEqual(expected) + }) + + it('should merge highlights that are contained within another', () => { + const actual = mergeHighlights( + config, + 'The quick brown fox jumps over the lazy dog', + 'The quick brown fox jumps over the lazy dog' + ) + const expected = 'The quick brown fox jumps over the lazy dog' + expect(actual).toEqual(expected) + }) + + it('should merge highlights at the end of the string', () => { + const actual = mergeHighlights( + config, + 'The quick brown fox jumps over the lazy dog', + 'The quick brown fox jumps over the lazy dog' + ) + const expected = + 'The quick brown fox jumps over the lazy dog' + expect(actual).toEqual(expected) + }) +}) diff --git a/packages/provider-elasticsearch/src/example-types/results/index.js b/packages/provider-elasticsearch/src/example-types/results/index.js index c531a39b6..5beec0344 100644 --- a/packages/provider-elasticsearch/src/example-types/results/index.js +++ b/packages/provider-elasticsearch/src/example-types/results/index.js @@ -1,13 +1,9 @@ -// https://stackoverflow.com/questions/70177601/does-elasticsearch-provide-highlighting-on-copy-to-field-in-their-newer-versio -// https://github.com/elastic/elasticsearch/issues/5172 - import _ from 'lodash/fp.js' import F from 'futil' -import { - getHighlightFields, - alignHighlightsWithSourceStructure, -} from './highlight.js' import { getField } from '../../utils/fields.js' +import { getRequestBodyHighlight } from './highlighting/request.js' +import { transformHighlightResponse } from './highlighting/response.js' +import { mergeHighlightsOnSource } from './highlighting/merging.js' const defaultHighlightConfig = { pre_tag: '', @@ -33,28 +29,28 @@ export default { explain: node.explain, // Without this, ES7+ stops counting at 10k instead of returning the actual count track_total_hits: true, + highlight: + highlightConfig.enable && + getRequestBodyHighlight(schema, node, highlightConfig), _source: F.omitBlank({ includes: node.include, excludes: node.exclude }), - highlight: highlightConfig.behavior && { - pre_tags: [highlightConfig.pre_tag], - post_tags: [highlightConfig.post_tag], - number_of_fragments: - highlightConfig.behavior === 'mergeOnSource' ? 0 : undefined, - fields: getHighlightFields(schema, node._meta.relevantFilters), - }, }) const response = await search(body) const results = response.hits.hits - if (highlightConfig.behavior === 'mergeOnSource') { - const getHighlights = alignHighlightsWithSourceStructure( - schema, - highlightConfig - ) - for (const result of results) { - for (const [k, v] of _.toPairs(getHighlights(result))) { - F.setOn(k, v, result._source) - } + if (highlightConfig.enable) { + for (const hit of results) { + const highlights = transformHighlightResponse( + schema, + highlightConfig, + hit + ) + mergeHighlightsOnSource( + schema, + highlightConfig, + hit._source, + highlights + ) } } diff --git a/packages/provider-elasticsearch/src/example-types/results/type.d.ts b/packages/provider-elasticsearch/src/example-types/results/type.d.ts new file mode 100644 index 000000000..05625ba25 --- /dev/null +++ b/packages/provider-elasticsearch/src/example-types/results/type.d.ts @@ -0,0 +1,38 @@ +/** + * Typings for the result example type. + */ + +interface HighlightConfig { + /** + * Whether to send highlighting configuration to elastic and merge + * highlighting results onto source. Defaults to `false`. + */ + enable?: boolean + /** + * Whether to remove non-highlighted items in source arrays. Defaults to + * `false`. + */ + filterSourceArrays?: boolean + /** + * Just like elastic's + * [pre_tags](https://www.elastic.co/guide/en/elasticsearch/reference/8.11/highlighting.html#highlighting-settings), + * except we only support one tag for now. Defaults to ``. + */ + pre_tag?: string + /** + * Just like elastic's + * [post_tags](https://www.elastic.co/guide/en/elasticsearch/reference/8.11/highlighting.html#highlighting-settings), + * except we only support one tag for now. Defaults to ``. + */ + post_tag?: string +} + +interface Node { + /** + * Custom configuration to control highlighting of results. Currently we don't + * pick up properties from elastic's + * [highlighting configuration](https://www.elastic.co/guide/en/elasticsearch/reference/current/highlighting.html#highlighting-settings) + * so including them here won't have any effect. + */ + highlight: HighlightConfig +} From dab946a7ed688f124cb22dcbbd6305638fbbdc41 Mon Sep 17 00:00:00 2001 From: Alejandro Hernandez Date: Thu, 16 Nov 2023 09:54:33 -0500 Subject: [PATCH 17/30] Finish documentation --- .../src/example-types/results/highlighting/README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/packages/provider-elasticsearch/src/example-types/results/highlighting/README.md b/packages/provider-elasticsearch/src/example-types/results/highlighting/README.md index c179edd36..288f4f1b0 100644 --- a/packages/provider-elasticsearch/src/example-types/results/highlighting/README.md +++ b/packages/provider-elasticsearch/src/example-types/results/highlighting/README.md @@ -304,4 +304,6 @@ assert.deepEqual(actual, expected) ## 3. Source Merging -Fields present in the highlighted results but not in the source still get merged onto the source. +Merging highlighted results into `_source` is done via a straightforward lodash's `merge`. Highlighted fields not present in `_source` still get merged onto it. + +Arrays get special treatment when `filterSourceArrays` is set: non-highlighted items are discarded. From 193e91844d663af7bdf4d66e068c97c685a844ea Mon Sep 17 00:00:00 2001 From: Alejandro Hernandez Date: Fri, 17 Nov 2023 10:27:23 -0500 Subject: [PATCH 18/30] Address feedback --- .../results/highlighting/request.js | 30 +++++++------------ 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/packages/provider-elasticsearch/src/example-types/results/highlighting/request.js b/packages/provider-elasticsearch/src/example-types/results/highlighting/request.js index a23ee49dc..bf0bd3387 100644 --- a/packages/provider-elasticsearch/src/example-types/results/highlighting/request.js +++ b/packages/provider-elasticsearch/src/example-types/results/highlighting/request.js @@ -12,11 +12,9 @@ const getFieldsGroupsNames = _.memoize((schema) => { const fields = _.flatMap((field) => { const copy_to = field.elasticsearch?.copy_to - if (!_.isEmpty(copy_to)) { - const product = new CartesianProduct(copy_to, subFields) - return [...copy_to, ...Array.from(product).map(_.join('.'))] - } - return copy_to + if (_.isEmpty(copy_to)) return copy_to + const product = new CartesianProduct(copy_to, subFields) + return [...copy_to, ...Array.from(product).map(_.join('.'))] }, schema.fields) return new Set(fields) @@ -26,20 +24,14 @@ const getFieldsGroupsNames = _.memoize((schema) => { * Return mappings for all sub-fields that can be highlighted in `mapping`. */ const getSubFieldsMappings = (schema, mapping) => - F.reduceIndexed( - (acc, sfMapping, sfName) => { - if (schema.elasticsearch.subFields?.[sfName]?.highlight) { - acc[sfName] = { - ...sfMapping, - meta: mapping.meta, - copy_to: _.map((k) => `${k}.${sfName}`, mapping.copy_to), - } - } - return acc - }, - {}, - mapping.fields - ) + _.flow( + F.pickByIndexed((v, k) => schema.elasticsearch.subFields?.[k]?.highlight), + F.mapValuesIndexed((v, k) => ({ + ...v, + meta: mapping.meta, + copy_to: _.map((multi) => `${multi}.${k}`, mapping.copy_to), + })) + )(mapping.fields) /** * Return mappings for all fields and their sub-fields that can be highlighted From 7fd4f4fb0bd11243da8395aafc5fcf76455c16e8 Mon Sep 17 00:00:00 2001 From: Alejandro Hernandez Date: Fri, 17 Nov 2023 10:40:01 -0500 Subject: [PATCH 19/30] Fix tests --- .../src/example-types/results/highlighting/request.js | 2 +- .../results/highlighting/response.test.js | 10 ---------- .../src/schema-data/schema-with-types.js | 8 -------- .../src/schema-data/schema-without-types.js | 8 -------- 4 files changed, 1 insertion(+), 27 deletions(-) diff --git a/packages/provider-elasticsearch/src/example-types/results/highlighting/request.js b/packages/provider-elasticsearch/src/example-types/results/highlighting/request.js index bf0bd3387..861f1b725 100644 --- a/packages/provider-elasticsearch/src/example-types/results/highlighting/request.js +++ b/packages/provider-elasticsearch/src/example-types/results/highlighting/request.js @@ -25,7 +25,7 @@ const getFieldsGroupsNames = _.memoize((schema) => { */ const getSubFieldsMappings = (schema, mapping) => _.flow( - F.pickByIndexed((v, k) => schema.elasticsearch.subFields?.[k]?.highlight), + F.pickByIndexed((v, k) => schema.elasticsearch?.subFields?.[k]?.highlight), F.mapValuesIndexed((v, k) => ({ ...v, meta: mapping.meta, diff --git a/packages/provider-elasticsearch/src/example-types/results/highlighting/response.test.js b/packages/provider-elasticsearch/src/example-types/results/highlighting/response.test.js index 1cf162fcc..96408562e 100644 --- a/packages/provider-elasticsearch/src/example-types/results/highlighting/response.test.js +++ b/packages/provider-elasticsearch/src/example-types/results/highlighting/response.test.js @@ -5,11 +5,6 @@ const config = { pre_tag: '', post_tag: '' } describe('transformHighlightResponse()', () => { describe('text fields', () => { const schema = { - elasticsearch: { - subFields: { - exact: { highlight: true }, - }, - }, fields: { state: { elasticsearch: { @@ -49,11 +44,6 @@ describe('transformHighlightResponse()', () => { describe('blob text fields', () => { const schema = { - elasticsearch: { - subFields: { - exact: { highlight: true }, - }, - }, fields: { blob: { elasticsearch: { diff --git a/packages/provider-elasticsearch/src/schema-data/schema-with-types.js b/packages/provider-elasticsearch/src/schema-data/schema-with-types.js index 657b321a4..f8b8186b2 100644 --- a/packages/provider-elasticsearch/src/schema-data/schema-with-types.js +++ b/packages/provider-elasticsearch/src/schema-data/schema-with-types.js @@ -3,10 +3,6 @@ export default { elasticsearch: { index: 'movies', type: 'movie', - subFields: { - keyword: { highlight: false }, - exact: { highlight: true }, - }, }, fields: { actors: { @@ -307,10 +303,6 @@ export default { index: 'imdb', type: 'movie', aliasOf: 'movies', - subFields: { - keyword: { highlight: false }, - exact: { highlight: true }, - }, }, fields: { actors: { diff --git a/packages/provider-elasticsearch/src/schema-data/schema-without-types.js b/packages/provider-elasticsearch/src/schema-data/schema-without-types.js index 4b3c90156..a2005067d 100644 --- a/packages/provider-elasticsearch/src/schema-data/schema-without-types.js +++ b/packages/provider-elasticsearch/src/schema-data/schema-without-types.js @@ -2,10 +2,6 @@ export default { movies: { elasticsearch: { index: 'movies', - subFields: { - keyword: { highlight: false }, - exact: { highlight: true }, - }, }, fields: { actors: { @@ -49,10 +45,6 @@ export default { elasticsearch: { index: 'imdb', aliasOf: 'movies', - subFields: { - keyword: { highlight: false }, - exact: { highlight: true }, - }, }, fields: { actors: { From 1c39cc6992cb3313b9887c621d8d56bc60d3ff7e Mon Sep 17 00:00:00 2001 From: Alejandro Hernandez Date: Fri, 17 Nov 2023 10:46:33 -0500 Subject: [PATCH 20/30] Update alpha version --- packages/provider-elasticsearch/package.json | 2 +- yarn.lock | 18 +++++++++++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/packages/provider-elasticsearch/package.json b/packages/provider-elasticsearch/package.json index 730b8170e..60397808f 100644 --- a/packages/provider-elasticsearch/package.json +++ b/packages/provider-elasticsearch/package.json @@ -1,6 +1,6 @@ { "name": "contexture-elasticsearch", - "version": "1.25.5", + "version": "1.26.0-alpha.1", "description": "ElasticSearch Provider for Contexture", "type": "module", "exports": { diff --git a/yarn.lock b/yarn.lock index 41e31de9c..e2e9d5adb 100644 --- a/yarn.lock +++ b/yarn.lock @@ -7765,7 +7765,23 @@ __metadata: languageName: unknown linkType: soft -"contexture-elasticsearch@^1.22.3, contexture-elasticsearch@workspace:packages/provider-elasticsearch": +"contexture-elasticsearch@npm:^1.22.3": + version: 1.25.5 + resolution: "contexture-elasticsearch@npm:1.25.5" + dependencies: + "@elastic/datemath": ^2.3.0 + debug: ^4.3.1 + futil: ^1.76.0 + js-combinatorics: ^2.1.1 + lodash: ^4.17.4 + moment: ^2.18.1 + moment-timezone: ^0.5.28 + unidecode: ^0.1.8 + checksum: 9f599cafe0cdb21fcff255fb43b32735852ec1a2ef486c0dc205814e17708756ae202536d67bb3cf82d39a86a244592a7be92c5634fbf82129b333b1c0403703 + languageName: node + linkType: hard + +"contexture-elasticsearch@workspace:packages/provider-elasticsearch": version: 0.0.0-use.local resolution: "contexture-elasticsearch@workspace:packages/provider-elasticsearch" dependencies: From fa127615c84c1cf0c3f4e364945de802293d237b Mon Sep 17 00:00:00 2001 From: Alejandro Hernandez Date: Mon, 20 Nov 2023 13:02:22 -0500 Subject: [PATCH 21/30] Address feedback --- .../src/example-types/results/highlighting.js | 293 ------------------ .../results/highlighting/merging.js | 13 +- 2 files changed, 8 insertions(+), 298 deletions(-) delete mode 100644 packages/provider-elasticsearch/src/example-types/results/highlighting.js diff --git a/packages/provider-elasticsearch/src/example-types/results/highlighting.js b/packages/provider-elasticsearch/src/example-types/results/highlighting.js deleted file mode 100644 index 242e336fb..000000000 --- a/packages/provider-elasticsearch/src/example-types/results/highlighting.js +++ /dev/null @@ -1,293 +0,0 @@ -import F from 'futil' -import _ from 'lodash/fp.js' - -export let anyRegexesMatch = (regexes, criteria) => - !!_.find((pattern) => new RegExp(pattern).test(criteria), regexes) - -export let replaceHighlightTagRegex = (nodeHighlight) => { - let { pre_tags, post_tags } = nodeHighlight - return new RegExp(_.join('|', _.concat(pre_tags, post_tags)), 'g') -} - -export let containsHighlightTagRegex = (nodeHighlight) => { - let { pre_tags, post_tags } = nodeHighlight - let tagRegexes = _.map( - ([pre, post]) => `${pre}.+?${post}`, - _.zip(pre_tags, post_tags) - ) - return new RegExp(_.join('|', tagRegexes)) -} - -// Convert the fields array to object map where we only pick the first key from the objects -// Highlight fields can be either strings or objects with a single key which value is the ES highlights object config -// If the highlight field is specific as a string only then it uses the default highlights config -export let arrayToHighlightsFieldMap = _.flow( - _.map(F.when(_.isString, (x) => ({ [x]: {} }))), - F.ifElse(_.isEmpty, _.always({}), _.mergeAll) -) - -// Replace _source value with highlighted result for `fieldName` -let inlineHighlightInSource = (hit, fieldName) => { - if (fieldName.endsWith('.*')) { - // Get the root key e.g. "documents" from "documents.*" - let root = fieldName.split('.*')[0] - // Get all the highlights that start with the root key - let matchedKeys = _.filter( - (key) => _.startsWith(`${root}.`, key), - _.keys(hit.highlight) - ) - _.each((key) => F.setOn(key, hit.highlight[key], hit._source), matchedKeys) - } else { - let highlights = hit.highlight[fieldName] - if (highlights) { - F.setOn( - fieldName, - highlights.length > 1 ? highlights : highlights[0], - hit._source - ) - } - } -} - -let getAdditionalFields = ({ schemaHighlight, hit, include, inlineKeys }) => { - let additionalFields = [] - let { additional, additionalExclusions, inline, nested, nestedPath } = - schemaHighlight - - F.eachIndexed((highlightedValue, fieldName) => { - // Whether `fieldName` is matched by any field name in `additional` - let additionalMatches = anyRegexesMatch(additional, fieldName) - - // Exclude explicit exclusions, inline, and nested highlight fields - let additionalExclusionMatches = - anyRegexesMatch(additionalExclusions, fieldName) || - anyRegexesMatch(inline, fieldName) || - anyRegexesMatch(nested, fieldName) - - // Whether there is an include array and `fieldName` is contained in - // `inline` but is not in `include` - let inlineButNotIncluded = - include && _.includes(fieldName, _.difference(inlineKeys, include)) - - if ( - inlineButNotIncluded || - (additionalMatches && !additionalExclusionMatches) - ) { - additionalFields.push({ - label: fieldName, - value: highlightedValue[0], - }) - } - - if ( - _.includes(fieldName, nested) && - _.isArray(highlightedValue) && - !_.includes(nestedPath, fieldName) - ) { - additionalFields.push({ - label: fieldName, - value: highlightedValue, - }) - } - }, hit.highlight) - - return additionalFields -} - -let handleNested = ({ - schemaHighlight, - nodeHighlight, - hit, - additionalFields, -}) => { - let { nested, nestedPath, filterNested } = schemaHighlight - let replaceTagRegex = replaceHighlightTagRegex(nodeHighlight) - let containsTagRegex = containsHighlightTagRegex(nodeHighlight) - - F.eachIndexed((highlightedValue, fieldName) => { - if ( - _.includes(fieldName, nested) && - !_.find({ label: fieldName }, additionalFields) - ) { - // Clarify [{a}, {b}] case and not [a,b] case. See - // https://github.com/elastic/elasticsearch/issues/7416 - // TODO: We can support arrays of scalars as long as we make sure that - // `number_of_fragments` is 0 for the highlighted field so that we can - // compare the array items in full. - if (fieldName === nestedPath) { - throw new Error('Arrays of scalars not supported') - } - - let field = fieldName.replace(`${nestedPath}.`, '') - - // For arrays, strip the highlighting wrapping and compare to the array - // contents to match up - for (let val of highlightedValue) { - let originalValue = val.replace(replaceTagRegex, '') - let childItem = _.find( - // TODO: Remove this asap - (item) => _.trim(_.get(field, item)) === _.trim(originalValue), - _.get(nestedPath, hit._source) - ) - if (childItem) F.setOn(field, val, childItem) - } - - if (filterNested) { - let filtered = _.filter( - (arrayField) => containsTagRegex.test(_.get(field, arrayField)), - _.get(nestedPath, hit._source) - ) - F.setOn(nestedPath, filtered, hit._source) - } - } - }, hit.highlight) - - if (filterNested) { - for (const path of nested) { - if (!_.has(path, hit.highlight)) { - F.setOn(nestedPath, [], hit._source) - } - } - } -} - -// TODO: Support multiple nestedPaths... -// TODO: Support Regex and Function basis for all options -// TODO: Make this function pure, do not mutate `hit._source` -export let highlightResults = ({ - schemaHighlight, // The schema highlight configuration - nodeHighlight, // The result node's highlight configuration - hit, // The ES result - include, // The columns to return -}) => { - let { inline, inlineAliases } = schemaHighlight - let inlineKeys = _.keys(arrayToHighlightsFieldMap(inline)) - - let additionalFields = getAdditionalFields({ - schemaHighlight, - hit, - include, - inlineKeys, - }) - - // TODO: Make this function pure, do not mutate `hit._source` - handleNested({ - schemaHighlight, - nodeHighlight, - hit, - additionalFields, - }) - - // Copy over all inline highlighted fields - if (hit.highlight) { - for (let field of inlineKeys) { - // TODO: Make this function pure, do not mutate `hit._source` - inlineHighlightInSource(hit, field) - } - - // Do the field replacement for the inlineAliases fields - for (let [field, mapToField] of _.toPairs(inlineAliases)) { - // if we have a highlight result matching the inlineAliases TO field - if (hit.highlight[mapToField]) { - // if the field is only in inlineAliases OR it is in both but not inlined/highlighted already by the inline section - if ( - !_.includes(field, inlineKeys) || - (_.includes(field, inlineKeys) && !hit.highlight[field]) - ) { - // TODO: Do not mutate `hit._source` - F.setOn(field, hit.highlight[mapToField][0], hit._source) - } - } - } - } - - return { additionalFields } -} - -const mergeReplacingArrays = _.mergeWith((target, src) => { - if (_.isArray(src)) return src -}) - -export let getHighlightSettings = (schema, node) => { - // Users can opt-out of highlighting by setting `node.highlight` to `false` - // explicitly. - // TODO: Reconsider if it makes more sense to opt-in instead of opt-out since - // highlighting decreases performance. - let shouldHighlight = - node.highlight !== false && _.isPlainObject(schema.elasticsearch?.highlight) - - // Highlighting starts with defaults in the schema first - if (shouldHighlight) { - // Result nodes can override schema highlighting configuration - let schemaHighlight = mergeReplacingArrays( - schema.elasticsearch.highlight, - node.highlight - ) - - let showOtherMatches = _.getOr(false, 'showOtherMatches', node) - let schemaInline = _.getOr([], 'inline', schemaHighlight) - - // Get field names from `inlineAliases` that are also in `node.include` - let schemaInlineAliases = _.flow( - _.getOr({}, 'inlineAliases'), - _.entries, - _.filter(([k]) => _.includes(k, node.include)), - _.flatten - )(schemaHighlight) - - // Add field names from `node.highlight.fields` to - // `schema.elasticsearch.highlight.inline` so we have them as targets for - // highlight replacement - schemaHighlight = _.set( - 'inline', - _.concat(schemaInline, _.keys(node.highlight?.fields)), - schemaHighlight - ) - - // Convert the highlight fields from array to an object map - let fields = _.flow( - _.pick(['inline', 'additionalFields', 'nested']), // Get the highlight fields we will be working with - _.values, - _.flatten, - _.concat(schemaInlineAliases), // Include the provided field aliases if any - _.uniq, - arrayToHighlightsFieldMap, // Convert the array to object map so we can simply _.pick again - _.merge(schemaHighlight.fields || {}), - (filtered) => - showOtherMatches - ? // Highlight on all fields specified in the initial _.pick above. - filtered - : // Only highlight on the fields listed in the node include section and their aliases (if any) - _.pick(_.concat(node.include, schemaInlineAliases), filtered) - )(schemaHighlight) - - // Properties we support as part of the highlighting configuration that - // elastic does not have knowledge of. - let nonElasticProperties = [ - 'inline', - 'inlineAliases', - 'additional', - 'additionalExclusions', - 'additionalFields', - 'nested', - 'nestedPath', - 'filterNested', - ] - - let nodeHighlight = _.merge( - { - // The default schema highlighting settings w/o the fields - pre_tags: [''], - post_tags: [''], - require_field_match: false, - number_of_fragments: 0, - fields, - }, - _.omit(nonElasticProperties, node.highlight) - ) - - return { schemaHighlight, nodeHighlight } - } - - return {} -} diff --git a/packages/provider-elasticsearch/src/example-types/results/highlighting/merging.js b/packages/provider-elasticsearch/src/example-types/results/highlighting/merging.js index 1fa3ac923..440fee301 100644 --- a/packages/provider-elasticsearch/src/example-types/results/highlighting/merging.js +++ b/packages/provider-elasticsearch/src/example-types/results/highlighting/merging.js @@ -15,15 +15,18 @@ export const mergeHighlightsOnSource = (schema, config, source, highlights) => { // Mutate source only for performance reasons _.convert({ immutable: false }).mergeWith( - (src, hi) => { + (src, target) => { if (_.isArray(src) && config?.filterSourceArrays) { return F.reduceIndexed( - (acc, v, i) => - _.isUndefined(v) + (acc, value, index) => + _.isUndefined(value) ? acc - : F.push(_.isPlainObject(v) ? _.merge(src[i], v) : v, acc), + : F.push( + _.isPlainObject(value) ? _.merge(src[index], value) : value, + acc + ), [], - hi + target ) } }, From c78951679309c361527dd547b3229bd61853f203 Mon Sep 17 00:00:00 2001 From: Alejandro Hernandez Date: Thu, 21 Dec 2023 14:18:59 -0500 Subject: [PATCH 22/30] Better comment --- .../src/example-types/results/highlighting/response.js | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/packages/provider-elasticsearch/src/example-types/results/highlighting/response.js b/packages/provider-elasticsearch/src/example-types/results/highlighting/response.js index bdde933b7..5b48543b2 100644 --- a/packages/provider-elasticsearch/src/example-types/results/highlighting/response.js +++ b/packages/provider-elasticsearch/src/example-types/results/highlighting/response.js @@ -135,11 +135,11 @@ export const removePathsFromSource = (schema, hit, paths) => { /* * Merge elastic hit highlights onto hit source. * - * Future developer: once you clever lil beast discover that the following - * function is a dirty and unholy `_.merge` you will want to refactor it so that - * all is good can prevail again. However, before you do that, consider that - * this implementation is about 100x faster than `_.merge`. Query 100 records - * with arrays of thousands of elements each and convince yourself. + * As a clever developer, you will notice that the following function is a dirty + * and unholy version `_.merge`. So before you refactor it to use exactly that, + * consider that this implementation is about 100x faster than (immutable) + * `_.merge`. Query 100 records with arrays of thousands of elements each and + * convince yourself. */ export const mergeHighlightsOnSource = (schema, hit) => { for (const path in hit.highlight) { From 84319ebd9ecd875b88e60f5eb89b7107962029d9 Mon Sep 17 00:00:00 2001 From: Alejandro Hernandez Date: Fri, 22 Dec 2023 00:36:55 -0500 Subject: [PATCH 23/30] More documentation --- .../src/example-types/results/README.md | 78 +++++++++---------- .../results/highlighting/request.js | 16 ++-- .../results/highlighting/request.test.js | 8 ++ .../results/highlighting/response.js | 35 +++++---- .../results/highlighting/response.test.js | 8 +- .../results/highlighting/search.js | 22 +++--- .../results/highlighting/util.js | 6 +- .../src/example-types/results/index.js | 6 +- .../src/example-types/results/type.d.ts | 35 +++++++++ 9 files changed, 131 insertions(+), 83 deletions(-) create mode 100644 packages/provider-elasticsearch/src/example-types/results/type.d.ts diff --git a/packages/provider-elasticsearch/src/example-types/results/README.md b/packages/provider-elasticsearch/src/example-types/results/README.md index 1d95cebde..1c8fe5f83 100644 --- a/packages/provider-elasticsearch/src/example-types/results/README.md +++ b/packages/provider-elasticsearch/src/example-types/results/README.md @@ -1,24 +1,16 @@ -# Included fields - -TODO: Talk about include/exclude and how highlighting gets affected by it - -If fields inside arrays of objects are specified in `node.include` (after wildcards are expanded), they will be also included in the highlighted results for the array of objects regardless of whether they are excluded from source. - # Highlighting Our approach to highlighting is designed to be as out of the box as possible, without too many configuration options. See `./type.d.ts` for more details on the API. -There are three pieces involved in highlighting: +## 1. Request -1. Building out highlight configuration to send with an elastic request. -2. Transforming highlighted fragments in the elastic response into a structure similar to that of `_source`. -3. Merging such structure into a hit's `_source`. +### Fields included in `_source` -## 1. Request +For the most part, we pass the `include` and `exclude` properties on the results node verbatim to elastic. We do however include paths for fields in arrays of objects as-needed (e.g. if not already included). This is strictly an implementation detail that allows us to correlate highlighted results for arrays of objects to the array items they belong to. Fields that were not originally included in the node are removed from the response since it would be surprising for users to get values for fields they did not request. ### Fields sent for highlighting -We assume that users want to highlight all the fields present in the query. The most logical approach is to extract relevant fields from the query and send them for highlighting, but for simplicity's sake we send every field in the schema, with some caveats. +We assume that users want to highlight all the fields present in the query. The most logical approach is to extract relevant fields from the query and send them for highlighting, but for simplicity's sake we send every field in the schema, with the following caveats. #### 1. Sub-fields @@ -34,8 +26,8 @@ Whitelisted sub-fields are sent for highlighting, since they could be present in "subFields": { // `{field}.keyword` will *not* be sent for highlighting. "keyword": { "highlight": false }, - // `{field}.exact` will be sent for highlighting. - "exact": { "highlight": true } + // `{field}.subfield` will be sent for highlighting. + "subfield": { "highlight": true } } }, "fields": { @@ -44,8 +36,8 @@ Whitelisted sub-fields are sent for highlighting, since they could be present in "elasticsearch": { "fields": { "keyword": {}, - // `state.exact` will be sent for highlighting. - "exact": {} + // `state.subfield` will be sent for highlighting. + "subfield": {} } } } @@ -167,9 +159,9 @@ In the spirit of keeping our API simple, we generate opinionated highlighting co ## 2. Response -Currently the only supported behavior is to merge highlighted fragments into `_source` (we may provide an option to opt-out in the future). For this approach to work, fragments must contain the entire field value, so we set [number_of_fragments](https://www.elastic.co/guide/en/elasticsearch/reference/current/highlighting.html#highlighting-settings) to `0` in the request. The exception being blob text fields which set `number_of_fragments` to something `> 0` since they're too big to highlight in their entirety. +Currently the only supported behavior is to merge highlighted fragments into `_source` (we may provide an option to opt-out in the future). For this approach to work, fragments must contain the entire field value, so we set [number_of_fragments](https://www.elastic.co/guide/en/elasticsearch/reference/current/highlighting.html#highlighting-settings) to `0` in the request. The exception being blob text fields which set `number_of_fragments` to a number `> 0` since they're too big to highlight in their entirety. -Assumming `exact` to be a sub-field of `details`, the following rules apply when transforming the highlight response: +Assumming `subfield` to be a sub-field of `details`, the following rules apply when transforming the highlight response: #### 1. Text fields @@ -178,7 +170,7 @@ The first fragments of each field (which should contain the entire field value b ```json { "details": ["The lazy fox"], - "details.exact": ["The lazy fox"] + "details.subfield": ["The lazy fox"] } ``` @@ -199,7 +191,7 @@ Blob text fields fragments are concatenated because that's the only sensible thi ```json { "details": ["The lazy fox", "jumped over"], - "details.exact": ["The lazy fox", "jumped over"] + "details.subfield": ["The lazy fox", "jumped over"] } ``` @@ -227,7 +219,7 @@ Elastic doesn't have a concept of array fields, so we rely again on the `subType ```jsonc { "fields": { - "gloriousArrayField": { + "library.books": { "elasticsearch": { "meta": { "subType": "array" @@ -240,7 +232,7 @@ Elastic doesn't have a concept of array fields, so we rely again on the `subType -which allows us to order highlighted array items based on the source array (as long as the source array is present in the response) +which allows us to order highlighted array items based on the source array
@@ -261,7 +253,10 @@ const hit = { // `fn` is just for illustration purposes const actual = fn(hit.highlight.names, hit._source.names) -const expected = [undefined, 'Smith', undefined, 'Austen'] +const expected = { + 1: 'Smith', + 3: 'Austen', +} assert.deepEqual(actual, expected) ``` @@ -270,7 +265,7 @@ assert.deepEqual(actual, expected) Ideally elastic's response would include enough information to deduce the array index for each highlighted fragment but unfortunately this is still [an open issue](https://github.com/elastic/elasticsearch/issues/7416). -Arrays of objects are equally ordered. Additionally, their structure is made to follow the source array's structure +Arrays of objects are equally ordered. Additionally, their structure follows the source array's structure
@@ -281,35 +276,36 @@ import assert from 'node:assert' const hit = { _source: { - people: [ - { name: 'John' }, - { name: 'Smith' }, - { name: 'Jane' }, - { name: 'Austen' }, + friends: [ + { name: 'John', age: 34 }, + { name: 'Smith', age: 21 }, + { name: 'Jane', age: 83 }, + { name: 'Austen', age: 3 }, ], }, highlight: { - 'people.name': ['Austen', 'Smith'], + 'friends.name': ['Austen', 'Smith'], }, } // `fn` is just for illustration purposes -const actual = fn(hit.highlight['people.name'], hit._source.people) +const actual = fn(hit.highlight['friends.name'], hit._source.friends) -const expected = [ - undefined, - { name: 'Smith' }, - undefined, - { name: 'Austen' }, -] +const expected = { + 1: { name: 'Smith' }, + 3: { name: 'Austen' }, +} assert.deepEqual(actual, expected) ```
-## 3. Source Merging - -Merging highlighted results into `_source` is done via a straightforward lodash's `merge`. Highlighted fields not present in `_source` still get merged onto it. +`nestedArrayIncludes` are also handled in this step. Assumming the example above and `nestedArrayIncludes = { friends: ["age"] }`, the highlighted results become -Arrays get special treatment when `filterSourceArrays` is set: non-highlighted items are discarded. +```javascript +{ + 1: { name: 'Smith', age: 21 }, + 3: { name: 'Austen', age: 3 }, +} +``` diff --git a/packages/provider-elasticsearch/src/example-types/results/highlighting/request.js b/packages/provider-elasticsearch/src/example-types/results/highlighting/request.js index 1156b2933..6363f5405 100644 --- a/packages/provider-elasticsearch/src/example-types/results/highlighting/request.js +++ b/packages/provider-elasticsearch/src/example-types/results/highlighting/request.js @@ -96,15 +96,13 @@ const getHighlightSubFieldsNames = (schema) => /* * Paths of all fields groups and their subfields that can be highlighted. */ -const getHighlightFieldsGroupsPaths = _.memoize((schema) => { +export const getHighlightFieldsGroupsPaths = _.memoize((schema) => { const subFieldsNames = getHighlightSubFieldsNames(schema) return _.flatMap((field) => { const copy_to = field.elasticsearch?.copy_to - if (!_.isEmpty(copy_to)) { - const product = new CartesianProduct(copy_to, subFieldsNames) - return [...copy_to, ..._.map(_.join('.'), Array.from(product))] - } - return copy_to ?? [] + if (_.isEmpty(copy_to)) return copy_to ?? [] + const product = new CartesianProduct(copy_to, subFieldsNames) + return [...(copy_to ?? []), ..._.map(_.join('.'), [...product])] }, schema.fields) }, _.get('elasticsearch.index')) @@ -114,7 +112,7 @@ const isFieldsGroupPath = (schema, path) => /* * Object of all fields and their subfields that can be highlighted. */ -const getAllHighlightFields = _.memoize((schema) => { +export const getAllHighlightFields = _.memoize((schema) => { const subFieldsNames = getHighlightSubFieldsNames(schema) return F.reduceIndexed( (acc, field, path) => { @@ -157,7 +155,7 @@ export const getRequestHighlightFields = (schema, node) => { node._meta?.relevantFilters ) - const querystr = JSON.stringify(node._meta?.relevantFilters) + const queryStr = JSON.stringify(node._meta?.relevantFilters) const getHighlightQuery = (field, path) => { const pathsToReplace = _.intersection( @@ -166,7 +164,7 @@ export const getRequestHighlightFields = (schema, node) => { ) if (!_.isEmpty(pathsToReplace)) { const regexp = new RegExp(_.join('|', pathsToReplace), 'g') - return JSON.parse(_.replace(regexp, path, querystr)) + return JSON.parse(_.replace(regexp, path, queryStr)) } } diff --git a/packages/provider-elasticsearch/src/example-types/results/highlighting/request.test.js b/packages/provider-elasticsearch/src/example-types/results/highlighting/request.test.js index e9f5bf0c1..c092252d6 100644 --- a/packages/provider-elasticsearch/src/example-types/results/highlighting/request.test.js +++ b/packages/provider-elasticsearch/src/example-types/results/highlighting/request.test.js @@ -4,6 +4,14 @@ import { getRequestHighlightFields, } from './request.js' +describe('getHighlightFieldsGroupsPaths', () => { + it('todo', () => {}) +}) + +describe('getAllHighlightFields', () => { + it('todo', () => {}) +}) + describe('addPathsToRequestSource()', () => { describe('paths with no wildcards', () => { it('should not add path when source is empty', () => { diff --git a/packages/provider-elasticsearch/src/example-types/results/highlighting/response.js b/packages/provider-elasticsearch/src/example-types/results/highlighting/response.js index 5b48543b2..60bbd95f6 100644 --- a/packages/provider-elasticsearch/src/example-types/results/highlighting/response.js +++ b/packages/provider-elasticsearch/src/example-types/results/highlighting/response.js @@ -32,7 +32,7 @@ export const transformResponseHighlight = ( schema, hit, tags, - arrayIncludes = {} + nestedArrayIncludes = {} ) => { const arrayOfObjectsPaths = _.keys(getArrayOfObjectsPathsMap(schema)) @@ -47,7 +47,7 @@ export const transformResponseHighlight = ( mergeHighlights(tags, ...fragments), acc ) - for (const itemPath of arrayIncludes[arrayPath] ?? []) { + for (const itemPath of nestedArrayIncludes[arrayPath] ?? []) { F.updateOn( F.dotJoin([`${index}`, itemPath]), (highlight) => highlight ?? _.get(itemPath, item), @@ -89,25 +89,30 @@ export const transformResponseHighlight = ( )(hit.highlight) } +/** + * Remove each path in `paths` from `hit._source`. + */ export const removePathsFromSource = (schema, hit, paths) => { // Nothing to do if (_.isEmpty(paths)) return - // "aoo" stands for "array of objects", because I was tired of typing all of - // that out every time. + // "aoo" stands for "array of objects", because I was tired of typing it out + // over and over again. const aooMap = getArrayOfObjectsPathsMap(schema) - const aooPaths = _.keys(aooMap) - const getAooPath = (path) => findByPrefix(path, aooPaths) - const [arrayPaths, normalPaths] = _.partition(getAooPath, paths) + const allAooPaths = _.keys(aooMap) + const getAooPath = (path) => findByPrefix(path, allAooPaths) + const [aooPaths, otherPaths] = _.partition(getAooPath, paths) + const toRemove = { - ...F.arrayToObject(_.identity, _.constant(true), normalPaths), - ...F.mapValuesIndexed( - (paths, aooPath) => - areArraysEqual(paths, aooMap[aooPath]) || _.includes(aooPath, paths) - ? true - : _.map(removePrefix(`${aooPath}.`), paths), - _.groupBy(getAooPath, arrayPaths) - ), + ...F.arrayToObject(_.identity, _.constant(true), otherPaths), + ...F.mapValuesIndexed((paths, aooPath) => { + const removeEntireArray = + // All nested fields in array of objects should be removed + areArraysEqual(paths, aooMap[aooPath]) || + // Or... the path for the array of objects field should be removed + _.includes(aooPath, paths) + return removeEntireArray || _.map(removePrefix(`${aooPath}.`), paths) + }, _.groupBy(getAooPath, aooPaths)), } const removePathsFromArray = (paths) => (arr) => diff --git a/packages/provider-elasticsearch/src/example-types/results/highlighting/response.test.js b/packages/provider-elasticsearch/src/example-types/results/highlighting/response.test.js index e4590382f..46f755be5 100644 --- a/packages/provider-elasticsearch/src/example-types/results/highlighting/response.test.js +++ b/packages/provider-elasticsearch/src/example-types/results/highlighting/response.test.js @@ -164,8 +164,8 @@ describe('transformResponseHighlight()', () => { ], }, } - const arrayIncludes = { 'library.books': ['cover.author'] } - transformResponseHighlight(schema, hit, tags, arrayIncludes) + const nestedArrayIncludes = { 'library.books': ['cover.author'] } + transformResponseHighlight(schema, hit, tags, nestedArrayIncludes) expect(hit.highlight).toEqual({ 'library.books': { 0: { @@ -208,8 +208,8 @@ describe('transformResponseHighlight()', () => { ], }, } - const arrayIncludes = { 'library.books': ['cover.title'] } - transformResponseHighlight(schema, hit, tags, arrayIncludes) + const nestedArrayIncludes = { 'library.books': ['cover.title'] } + transformResponseHighlight(schema, hit, tags, nestedArrayIncludes) expect(hit.highlight).toEqual({ 'library.books': { 0: { diff --git a/packages/provider-elasticsearch/src/example-types/results/highlighting/search.js b/packages/provider-elasticsearch/src/example-types/results/highlighting/search.js index c2872b00f..cd0ff5974 100644 --- a/packages/provider-elasticsearch/src/example-types/results/highlighting/search.js +++ b/packages/provider-elasticsearch/src/example-types/results/highlighting/search.js @@ -15,15 +15,14 @@ const tags = { post: '
', } -export const wrapSearch = (node, search, schema) => async (body) => { - body._source ??= {} - const addedPaths = addPathsToRequestSource( - schema, - body._source, - // Paths for all fields we'd like to retrieve no matter what. - // Currently only paths for fields in arrays of objects. - _.flatten(_.values(getArrayOfObjectsPathsMap(schema))) - ) +export const searchWithHighlights = (node, search, schema) => async (body) => { + // Paths for fields to always include regardless of whether the user included + // them. They will be removed from the response hits so there's no harm done. + const pathsToAdd = _.flatten(_.values(getArrayOfObjectsPathsMap(schema))) + + // body._source is mutated here + const addedPaths = addPathsToRequestSource(schema, body._source, pathsToAdd) + const response = await search({ ...body, highlight: { @@ -33,10 +32,13 @@ export const wrapSearch = (node, search, schema) => async (body) => { fields: getRequestHighlightFields(schema, node), }, }) + for (const hit of response.hits.hits) { - transformResponseHighlight(schema, hit, tags, node.highlight.arrayIncludes) + const nestedArrayIncludes = node.highlight.nestedArrayIncludes + transformResponseHighlight(schema, hit, tags, nestedArrayIncludes) removePathsFromSource(schema, hit, addedPaths) mergeHighlightsOnSource(schema, hit) } + return response } diff --git a/packages/provider-elasticsearch/src/example-types/results/highlighting/util.js b/packages/provider-elasticsearch/src/example-types/results/highlighting/util.js index 1d3bcf27f..f3d7add65 100644 --- a/packages/provider-elasticsearch/src/example-types/results/highlighting/util.js +++ b/packages/provider-elasticsearch/src/example-types/results/highlighting/util.js @@ -18,8 +18,10 @@ export const isArrayOfScalarsField = (field) => export const isArrayOfObjectsField = (field) => isArrayField(field) && !isLeafField(field) -// Object keys are paths of arrays of objects fields in the schema and values -// are lists of paths for fields under each array field. +/** + * Object where keys are paths for fields that are arrays of objects and values + * are all the paths under them. + */ export const getArrayOfObjectsPathsMap = _.memoize((schema) => { const fieldsPaths = _.keys(schema.fields) return F.reduceIndexed( diff --git a/packages/provider-elasticsearch/src/example-types/results/index.js b/packages/provider-elasticsearch/src/example-types/results/index.js index a18a64344..6a00673ac 100644 --- a/packages/provider-elasticsearch/src/example-types/results/index.js +++ b/packages/provider-elasticsearch/src/example-types/results/index.js @@ -1,6 +1,6 @@ import F from 'futil' import { getField } from '../../utils/fields.js' -import { wrapSearch } from './highlighting/search.js' +import { searchWithHighlights } from './highlighting/search.js' export default { validContext: () => true, @@ -12,7 +12,9 @@ export default { ? getField(schema, node.sortField) : '_score' - search = node.highlight?.enable ? wrapSearch(node, search, schema) : search + search = node.highlight?.enable + ? searchWithHighlights(node, search, schema) + : search const response = await search( F.omitBlank({ diff --git a/packages/provider-elasticsearch/src/example-types/results/type.d.ts b/packages/provider-elasticsearch/src/example-types/results/type.d.ts new file mode 100644 index 000000000..40fe42db9 --- /dev/null +++ b/packages/provider-elasticsearch/src/example-types/results/type.d.ts @@ -0,0 +1,35 @@ +/** + * Typings for the result example type. + */ + +type Path = string + +interface HighlightConfig { + /** + * Whether to send highlighting configuration to elastic and merge + * highlighting results onto source. Defaults to `false`. + */ + enable?: boolean + /** + * Nested paths in arrays of objects that should be copied from source into + * highlighted results. + * + * For example `{ "library.books": ["cover.author"] }` will make it so + * `cover.author` is copied over from the source array to the highlighted + * results for the `library.books` array. The motivation being that sometimes + * arrays are large and it's expensive to include the whole thing in the + * hits source but some of the array items fields are needed to correctly + * display the array. + */ + nestedArrayIncludes?: Record> +} + +interface Node { + /** + * Custom configuration to control highlighting of results. Currently we don't + * pick up properties from elastic's + * [highlighting configuration](https://www.elastic.co/guide/en/elasticsearch/reference/current/highlighting.html#highlighting-settings) + * so including them here won't have any effect. + */ + highlight: HighlightConfig +} From f4c3492ccf77242c0c52a440e2885586133ca39a Mon Sep 17 00:00:00 2001 From: Alejandro Hernandez Date: Fri, 22 Dec 2023 10:44:39 -0500 Subject: [PATCH 24/30] Add unit tests --- packages/export/src/nodes/results.test.js | 1 + .../src/example-types/results/README.md | 54 ++-- .../results/highlighting/request.js | 148 ++++----- .../results/highlighting/request.test.js | 300 ++++++++++++------ .../results/highlighting/response.js | 83 ++--- .../results/highlighting/response.test.js | 42 +-- .../results/highlighting/schema.test.js | 64 ---- .../results/highlighting/search.js | 22 +- .../results/highlighting/testSchema.js | 90 ++++++ .../results/highlighting/util.js | 49 ++- .../results/highlighting/util.test.js | 10 +- .../src/example-types/results/index.js | 18 +- .../src/example-types/results/type.d.ts | 4 +- packages/provider-elasticsearch/src/schema.js | 2 +- .../provider-elasticsearch/src/utils/futil.js | 6 +- .../src/utils/futil.test.js | 202 ++++++++++++ .../ResultTable/HighlightedColumn.js | 57 ++++ packages/react/src/utils/futil.js | 18 ++ 18 files changed, 800 insertions(+), 370 deletions(-) delete mode 100644 packages/provider-elasticsearch/src/example-types/results/highlighting/schema.test.js create mode 100644 packages/provider-elasticsearch/src/example-types/results/highlighting/testSchema.js create mode 100644 packages/react/src/exampleTypes/ResultTable/HighlightedColumn.js diff --git a/packages/export/src/nodes/results.test.js b/packages/export/src/nodes/results.test.js index 1ace3c0ca..7eaf51514 100644 --- a/packages/export/src/nodes/results.test.js +++ b/packages/export/src/nodes/results.test.js @@ -46,6 +46,7 @@ describe('results', () => { page: 1, totalPages: 1, include, + highlight: { disable: true }, sortField: 'a', sortDir: 'desc', ...strategyParams, diff --git a/packages/provider-elasticsearch/src/example-types/results/README.md b/packages/provider-elasticsearch/src/example-types/results/README.md index 1c8fe5f83..ffd01cef1 100644 --- a/packages/provider-elasticsearch/src/example-types/results/README.md +++ b/packages/provider-elasticsearch/src/example-types/results/README.md @@ -1,12 +1,12 @@ # Highlighting -Our approach to highlighting is designed to be as out of the box as possible, without too many configuration options. See `./type.d.ts` for more details on the API. +Our approach to highlighting is designed to be as out of the box as possible, without too many configuration options. See [./type.d.ts](./type.d.ts) for more details on the API. ## 1. Request ### Fields included in `_source` -For the most part, we pass the `include` and `exclude` properties on the results node verbatim to elastic. We do however include paths for fields in arrays of objects as-needed (e.g. if not already included). This is strictly an implementation detail that allows us to correlate highlighted results for arrays of objects to the array items they belong to. Fields that were not originally included in the node are removed from the response since it would be surprising for users to get values for fields they did not request. +For the most part, we pass the results node `include` and `exclude` properties verbatim to elastic. We do however include paths for fields in arrays of objects as-needed (e.g. if not already included). This is strictly an implementation detail that allows us to correlate highlighted results for arrays of objects to the array items they belong to. Fields that were not originally included in the results node are removed from the response since it would be surprising for users to get values for fields they did not request. ### Fields sent for highlighting @@ -34,10 +34,12 @@ Whitelisted sub-fields are sent for highlighting, since they could be present in // `state` will be sent for highlighting. "state": { "elasticsearch": { - "fields": { - "keyword": {}, - // `state.subfield` will be sent for highlighting. - "subfield": {} + "mapping": { + "fields": { + "keyword": {}, + // `state.subfield` will be sent for highlighting. + "subfield": {} + } } } } @@ -65,7 +67,9 @@ Fields groups are not sent for highlighting because we assume users want to high // `state` will be sent for highlighting. "state": { "elasticsearch": { - "copy_to": ["address"] + "mapping": { + "copy_to": ["address"] + } } } } @@ -135,7 +139,7 @@ In order to fix this, we make use of elastic's [highlight_query](https://www.ela #### 3. Text blobs -In the spirit of keeping our API simple, we generate opinionated highlighting configuration for large text blobs to improve highlighting performance. More often than not, it makes sense to only display highlighted fragments instead of the whole blob for these types of fields. Since elastic does not have a "blob" or "large text" type, we've adopted the convention of specifying a field's "subType" using elastic's [meta property](https://www.elastic.co/guide/en/elasticsearch/reference/8.11/mapping-field-meta.html): +In the spirit of keeping our API simple, we generate opinionated highlighting configuration for large text blobs to improve highlighting performance. More often than not, it makes sense to only display highlighted fragments instead of the whole blob for these types of fields. Since elastic does not have a "blob" or "large text" type, we've adopted the convention of specifying a field's "subType" in the schema:
@@ -145,11 +149,7 @@ In the spirit of keeping our API simple, we generate opinionated highlighting co { "fields": { "donQuixoteText": { - "elasticsearch": { - "meta": { - "subType": "blob" - } - } + "subType": "blob" } } } @@ -210,7 +210,7 @@ will be transformed into #### 3. Array fields -Elastic doesn't have a concept of array fields, so we rely again on the `subType` convention used for text blobs to identify them +Elastic doesn't have a concept of array fields, so we rely on the `subType` convention used for text blobs to identify them
@@ -220,11 +220,7 @@ Elastic doesn't have a concept of array fields, so we rely again on the `subType { "fields": { "library.books": { - "elasticsearch": { - "meta": { - "subType": "array" - } - } + "subType": "array" } } } @@ -241,7 +237,7 @@ which allows us to order highlighted array items based on the source array ```javascript import assert from 'node:assert' -const hit = { +let hit = { _source: { names: ['John', 'Smith', 'Jane', 'Austen'], }, @@ -251,9 +247,9 @@ const hit = { } // `fn` is just for illustration purposes -const actual = fn(hit.highlight.names, hit._source.names) +let actual = fn(hit.highlight.names, hit._source.names) -const expected = { +let expected = { 1: 'Smith', 3: 'Austen', } @@ -274,7 +270,7 @@ Arrays of objects are equally ordered. Additionally, their structure follows the ```javascript import assert from 'node:assert' -const hit = { +let hit = { _source: { friends: [ { name: 'John', age: 34 }, @@ -289,9 +285,9 @@ const hit = { } // `fn` is just for illustration purposes -const actual = fn(hit.highlight['friends.name'], hit._source.friends) +let actual = fn(hit.highlight['friends.name'], hit._source.friends) -const expected = { +let expected = { 1: { name: 'Smith' }, 3: { name: 'Austen' }, } @@ -301,7 +297,13 @@ assert.deepEqual(actual, expected)
-`nestedArrayIncludes` are also handled in this step. Assumming the example above and `nestedArrayIncludes = { friends: ["age"] }`, the highlighted results become +`nestedArrayIncludes` are handled when ordering the array of objects. Assumming the example above and + +```javascript +let nestedArrayIncludes = { friends: ['age'] } +``` + +the highlighted results become ```javascript { diff --git a/packages/provider-elasticsearch/src/example-types/results/highlighting/request.js b/packages/provider-elasticsearch/src/example-types/results/highlighting/request.js index 6363f5405..8467753be 100644 --- a/packages/provider-elasticsearch/src/example-types/results/highlighting/request.js +++ b/packages/provider-elasticsearch/src/example-types/results/highlighting/request.js @@ -2,7 +2,6 @@ import _ from 'lodash/fp.js' import F from 'futil' import { minimatch } from 'minimatch' import { CartesianProduct } from 'js-combinatorics' -import { areArraysEqual } from '../../../utils/futil.js' import { isLeafField, isBlobField, @@ -16,10 +15,10 @@ import { * example if `friends` is an array of people, it gets expanded into * `[friends.name, friends.height, ...]` */ -const expandGlobs = (schema, globs) => { - const fieldsNames = _.keys(schema.fields) +let expandGlobs = (schema, globs) => { + let fieldsNames = _.keys(schema.fields) - const expandGlob = (glob) => + let expandGlob = (glob) => isLeafField(schema.fields[glob]) ? [glob] : minimatch.match(fieldsNames, `${glob}*`) @@ -37,10 +36,10 @@ const expandGlobs = (schema, globs) => { * * Returns added paths. */ -export const addPathsToRequestSource = (schema, source, paths) => { +export let addPathsToRequestSource = (schema, source = {}, pathsToAdd = []) => { // There's nothing to add. - if (_.isEmpty(paths) || _.isEmpty(F.omitBlank(source))) { - return [] + if (_.isEmpty(pathsToAdd) || _.isEmpty(F.omitBlank(source))) { + return source } // A source array is just includes. @@ -48,90 +47,85 @@ export const addPathsToRequestSource = (schema, source, paths) => { source = { includes: source } } + let result = _.cloneDeep(source) + // With wildcards expanded. - const expanded = { + let expanded = { includes: expandGlobs(schema, source.includes), excludes: expandGlobs(schema, source.excludes), } - // To understand this, visualize a Venn diagram with three intersecting sets - // one for each of includes, excludes, and paths. - const pathsToAdd = _.union( - // Any path we "unexclude" is technically "added". - _.intersection(paths, expanded.excludes), - // Also added are paths that were not originally included. - _.isEmpty(source.includes) ? [] : _.difference(paths, expanded.includes) - ) - - // There's nothing to add. - if (_.isEmpty(pathsToAdd)) { - return [] + // Any path we "unexclude" is technically "added". + let excludedFromExcludes = _.intersection(pathsToAdd, expanded.excludes) + if (!_.isEmpty(excludedFromExcludes)) { + result.excludes = _.difference(expanded.excludes, excludedFromExcludes) } - const withAddedPaths = F.omitBlank({ - includes: _.union(pathsToAdd, expanded.includes), - excludes: _.difference(expanded.excludes, pathsToAdd), - }) - - const shouldAddPaths = (key) => - !areArraysEqual(expanded[key], withAddedPaths[key]) - - if (!_.isEmpty(source.includes) && shouldAddPaths('includes')) { - source.includes = withAddedPaths.includes + // Also added are paths that were not originally included. + let addedToIncludes = _.isEmpty(expanded.includes) + ? [] + : _.difference(pathsToAdd, expanded.includes) + if (!_.isEmpty(addedToIncludes)) { + result.includes = _.union(expanded.includes, addedToIncludes) } - if (shouldAddPaths('excludes')) { - source.excludes = withAddedPaths.excludes - } + let addedPaths = _.union(addedToIncludes, excludedFromExcludes) - return pathsToAdd + return F.omitBlank({ ...result, addedPaths }) } /* * Names of all subfields that can be highlighted. */ -const getHighlightSubFieldsNames = (schema) => +let getHighlightSubFieldsNames = (schema) => _.keys(_.pickBy('highlight', schema.elasticsearch?.subFields)) /* * Paths of all fields groups and their subfields that can be highlighted. */ -export const getHighlightFieldsGroupsPaths = _.memoize((schema) => { - const subFieldsNames = getHighlightSubFieldsNames(schema) +export let getHighlightFieldsGroupsPaths = _.memoize((schema) => { + let subFieldsNames = getHighlightSubFieldsNames(schema) return _.flatMap((field) => { - const copy_to = field.elasticsearch?.copy_to - if (_.isEmpty(copy_to)) return copy_to ?? [] - const product = new CartesianProduct(copy_to, subFieldsNames) - return [...(copy_to ?? []), ..._.map(_.join('.'), [...product])] + let copy_to = field.elasticsearch?.mapping?.copy_to + if (_.isEmpty(copy_to)) return [] + let subFieldTuples = [...new CartesianProduct(copy_to, subFieldsNames)] + let product = [...copy_to, ..._.map(_.join('.'), subFieldTuples)] + return product }, schema.fields) }, _.get('elasticsearch.index')) -const isFieldsGroupPath = (schema, path) => - !!findByPrefix(path, getHighlightFieldsGroupsPaths(schema)) +let isFieldsGroupPath = _.curry((schema, path) => + _.find(_.eq(path), getHighlightFieldsGroupsPaths(schema)) +) /* * Object of all fields and their subfields that can be highlighted. */ -export const getAllHighlightFields = _.memoize((schema) => { - const subFieldsNames = getHighlightSubFieldsNames(schema) +export let getAllHighlightFields = _.memoize((schema) => { + let subFieldsNames = getHighlightSubFieldsNames(schema) return F.reduceIndexed( (acc, field, path) => { if (!isLeafField(field) || isFieldsGroupPath(schema, path)) { return acc } acc[path] = field - const subFields = _.pick(subFieldsNames, field.elasticsearch.fields) - for (const name in subFields) { - acc[`${path}.${name}`] = { - elasticsearch: { - ...subFields[name], - meta: field.elasticsearch.meta, - copy_to: _.map( - (path) => `${path}.${name}`, - field.elasticsearch.copy_to - ), - }, - } + let subFields = _.pick( + subFieldsNames, + field.elasticsearch?.mapping?.fields + ) + for (let name in subFields) { + acc[`${path}.${name}`] = F.omitBlank({ + subType: field.subType, + elasticsearch: F.omitBlank({ + mapping: F.omitBlank({ + ...subFields[name], + copy_to: _.map( + (path) => `${path}.${name}`, + field.elasticsearch.mapping?.copy_to + ), + }), + }), + }) } return acc }, @@ -140,30 +134,39 @@ export const getAllHighlightFields = _.memoize((schema) => { ) }, _.get('elasticsearch.index')) +let collectKeysAndValues = (f, coll) => + F.reduceTree()( + (acc, val, key) => + f(val) ? F.push(val, acc) : f(key) ? F.push(key, acc) : acc, + [], + coll + ) + +let blobConfiguration = { + fragment_size: 250, + number_of_fragments: 3, +} + /* * Get configuration for highlight fields to send in the elastic request. */ -export const getRequestHighlightFields = (schema, node) => { - const fieldGroupsInQuery = F.reduceTree()( - (acc, val, key) => - isFieldsGroupPath(schema, val) - ? F.push(val, acc) - : isFieldsGroupPath(schema, key) - ? F.push(key, acc) - : acc, - [], +export let getRequestHighlightFields = (schema, node) => { + let fieldGroupsInQuery = collectKeysAndValues( + isFieldsGroupPath(schema), node._meta?.relevantFilters ) - const queryStr = JSON.stringify(node._meta?.relevantFilters) + // Stringifying once and then replacing paths and parsing the query again is + // more performant than walking the query. + let queryStr = JSON.stringify(node._meta?.relevantFilters) - const getHighlightQuery = (field, path) => { - const pathsToReplace = _.intersection( + let getHighlightQuery = (field, path) => { + let pathsToReplace = _.intersection( fieldGroupsInQuery, - field.elasticsearch?.copy_to + field.elasticsearch?.mapping?.copy_to ) if (!_.isEmpty(pathsToReplace)) { - const regexp = new RegExp(_.join('|', pathsToReplace), 'g') + let regexp = new RegExp(_.join('|', pathsToReplace), 'g') return JSON.parse(_.replace(regexp, path, queryStr)) } } @@ -171,8 +174,7 @@ export const getRequestHighlightFields = (schema, node) => { return F.mapValuesIndexed( (field, path) => F.omitBlank({ - fragment_size: isBlobField(field) ? 250 : null, - number_of_fragments: isBlobField(field) ? 3 : null, + ...(isBlobField(field) && blobConfiguration), highlight_query: getHighlightQuery(field, path), }), getAllHighlightFields(schema) diff --git a/packages/provider-elasticsearch/src/example-types/results/highlighting/request.test.js b/packages/provider-elasticsearch/src/example-types/results/highlighting/request.test.js index c092252d6..d75dc8d66 100644 --- a/packages/provider-elasticsearch/src/example-types/results/highlighting/request.test.js +++ b/packages/provider-elasticsearch/src/example-types/results/highlighting/request.test.js @@ -1,185 +1,289 @@ -import { schema } from './schema.test.js' +import { schema } from './testSchema.js' import { addPathsToRequestSource, + getAllHighlightFields, + getHighlightFieldsGroupsPaths, getRequestHighlightFields, } from './request.js' describe('getHighlightFieldsGroupsPaths', () => { - it('todo', () => {}) + it('should return all combinations of fields groups and sub-fields', () => { + let schema = { + elasticsearch: { + subFields: { + keyword: { highlight: false }, + subfield1: { highlight: true }, + subfield2: { highlight: true }, + }, + }, + fields: { + fieldsGroup1: { + elasticsearch: { + dataType: 'text', + mapping: { + fields: { keyword: {}, subfield1: {}, subfield2: {} }, + }, + }, + }, + fieldsGroup2: { + elasticsearch: { + dataType: 'text', + mapping: { + fields: { keyword: {}, subfield1: {}, subfield2: {} }, + }, + }, + }, + state: { + elasticsearch: { + dataType: 'text', + mapping: { + copy_to: ['fieldsGroup1', 'fieldsGroup2'], + }, + }, + }, + }, + } + expect(getHighlightFieldsGroupsPaths(schema)).toEqual([ + 'fieldsGroup1', + 'fieldsGroup2', + 'fieldsGroup1.subfield1', + 'fieldsGroup2.subfield1', + 'fieldsGroup1.subfield2', + 'fieldsGroup2.subfield2', + ]) + }) }) describe('getAllHighlightFields', () => { - it('todo', () => {}) + it('should include subfields that can be highlighted', () => { + let schema = { + elasticsearch: { + subFields: { + keyword: { highlight: false }, + subfield: { highlight: true }, + }, + }, + fields: { + state: { + elasticsearch: { + dataType: 'text', + mapping: { fields: { keyword: {}, subfield: {} } }, + }, + }, + }, + } + let actual = getAllHighlightFields(schema) + expect(actual).toEqual({ + state: { + elasticsearch: { + dataType: 'text', + mapping: { fields: { keyword: {}, subfield: {} } }, + }, + }, + 'state.subfield': {}, + }) + }) + + it('should exclude groups fields', () => { + let schema = { + fields: { + all: { elasticsearch: { dataType: 'text' } }, + address: { elasticsearch: { dataType: 'text' } }, + state: { + elasticsearch: { + dataType: 'text', + mapping: { copy_to: ['all', 'address'] }, + }, + }, + 'city.street': { + elasticsearch: { + dataType: 'text', + mapping: { copy_to: ['all', 'address'] }, + }, + }, + }, + } + let actual = getAllHighlightFields(schema) + expect(actual).toEqual({ + state: { + elasticsearch: { + dataType: 'text', + mapping: { copy_to: ['all', 'address'] }, + }, + }, + 'city.street': { + elasticsearch: { + dataType: 'text', + mapping: { copy_to: ['all', 'address'] }, + }, + }, + }) + }) }) describe('addPathsToRequestSource()', () => { describe('paths with no wildcards', () => { it('should not add path when source is empty', () => { - const source = {} - const added = addPathsToRequestSource(schema, source, ['library.name']) - expect(source).toEqual({}) - expect(added).toEqual([]) + let source = {} + let result = addPathsToRequestSource(schema, source, ['library.name']) + expect(result).toEqual(source) }) it('should not add path when there are no paths to add', () => { - const source = { + let source = { includes: ['library.name'], excludes: ['library.about'], } - const added = addPathsToRequestSource(schema, source) - expect(source).toEqual({ + let result = addPathsToRequestSource(schema, source) + expect(result).toEqual({ includes: ['library.name'], excludes: ['library.about'], }) - expect(added).toEqual([]) }) it('should not add path when includes is empty and path is not excluded', () => { - const source = { + let source = { excludes: ['library.about'], } - const added = addPathsToRequestSource(schema, source, ['library.name']) - expect(source).toEqual({ + let result = addPathsToRequestSource(schema, source, ['library.name']) + expect(result).toEqual({ excludes: ['library.about'], }) - expect(added).toEqual([]) }) it('should add path when includes is empty and path is excluded', () => { - const source = { + let source = { excludes: ['library.name'], } - const added = addPathsToRequestSource(schema, source, ['library.name']) - expect(source).toEqual({}) - expect(added).toEqual(['library.name']) + let result = addPathsToRequestSource(schema, source, ['library.name']) + expect(result).toEqual({ addedPaths: ['library.name'] }) }) it('should add path when includes is not empty and path is not excluded', () => { - const source = { + let source = { includes: ['library.categories'], excludes: ['library.about'], } - const added = addPathsToRequestSource(schema, source, ['library.name']) - expect(source).toEqual({ - includes: ['library.categories', 'library.name'], + let result = addPathsToRequestSource(schema, source, ['library.name']) + expect(result).toEqual({ + includes: ['library.name', 'library.categories'], excludes: ['library.about'], + addedPaths: ['library.name'], }) - expect(added).toEqual(['library.name']) }) it('should add path when includes is not empty and path is excluded', () => { - const source = { + let source = { includes: ['library.categories'], excludes: ['library.name'], } - const added = addPathsToRequestSource(schema, source, ['library.name']) - expect(source).toEqual({ - includes: ['library.categories', 'library.name'], + let result = addPathsToRequestSource(schema, source, ['library.name']) + expect(result).toEqual({ + includes: ['library.name', 'library.categories'], + addedPaths: ['library.name'], }) - expect(added).toEqual(['library.name']) }) it('should add path in array of objects and adjust excludes accordingly', () => { - const source = { + let source = { includes: ['library.about'], excludes: ['library.books'], } - const added = addPathsToRequestSource(schema, source, [ + let result = addPathsToRequestSource(schema, source, [ 'library.books.cover.title', ]) - expect(source).toEqual({ - includes: ['library.about', 'library.books.cover.title'], + expect(result).toEqual({ + includes: ['library.books.cover.title', 'library.about'], excludes: ['library.books.cover.author'], + addedPaths: ['library.books.cover.title'], }) - expect(added).toEqual(['library.books.cover.title']) }) }) describe('paths with wildcards', () => { it('should not add path when includes is empty and path is not excluded', () => { - const source = { + let source = { excludes: ['library.books.*'], } - const added = addPathsToRequestSource(schema, source, ['library.name']) - expect(source).toEqual({ + let result = addPathsToRequestSource(schema, source, ['library.name']) + expect(result).toEqual({ excludes: ['library.books.*'], }) - expect(added).toEqual([]) }) it('should add path when includes is empty and path is excluded', () => { - const source = { + let source = { excludes: ['library.*'], } - const added = addPathsToRequestSource(schema, source, ['library.about']) - expect(source).toEqual({ + let result = addPathsToRequestSource(schema, source, ['library.about']) + expect(result).toEqual({ excludes: [ 'library.name', 'library.categories', 'library.books.cover.title', 'library.books.cover.author', ], + addedPaths: ['library.about'], }) - expect(added).toEqual(['library.about']) }) it('should add path when includes is not empty and path is not excluded', () => { - const source = { + let source = { includes: ['library.about'], excludes: ['library.books.*'], } - const added = addPathsToRequestSource(schema, source, ['library.name']) - expect(source).toEqual({ - includes: ['library.about', 'library.name'], + let result = addPathsToRequestSource(schema, source, ['library.name']) + expect(result).toEqual({ + includes: ['library.name', 'library.about'], excludes: ['library.books.*'], + addedPaths: ['library.name'], }) - expect(added).toEqual(['library.name']) }) it('should add path when includes is not empty and path is excluded', () => { - const source = { + let source = { includes: ['library.*'], excludes: ['library.books.*'], } - const added = addPathsToRequestSource(schema, source, [ + let result = addPathsToRequestSource(schema, source, [ 'library.books.cover.title', ]) - expect(source).toEqual({ + expect(result).toEqual({ includes: ['library.*'], excludes: ['library.books.cover.author'], + addedPaths: ['library.books.cover.title'], }) - expect(added).toEqual(['library.books.cover.title']) }) it('should expand includes when adding to it', () => { - const source = { + let source = { includes: ['library.books.*'], excludes: ['library.name'], } - const added = addPathsToRequestSource(schema, source, ['library.name']) - expect(source).toEqual({ + let result = addPathsToRequestSource(schema, source, ['library.name']) + expect(result).toEqual({ includes: [ + 'library.name', 'library.books.cover.title', 'library.books.cover.author', - 'library.name', ], + addedPaths: ['library.name'], }) - expect(added).toEqual(['library.name']) }) }) }) describe('getRequestHighlightFields()', () => { it('should exclude fields without mappings', () => { - const schema = { + let schema = { fields: { other: {}, state: { elasticsearch: { dataType: 'text' } }, 'city.street': { elasticsearch: { dataType: 'text' } }, }, } - const node = {} - const actual = getRequestHighlightFields(schema, node) + let node = {} + let actual = getRequestHighlightFields(schema, node) expect(actual).toEqual({ state: {}, 'city.street': {}, @@ -187,20 +291,26 @@ describe('getRequestHighlightFields()', () => { }) it('should exclude group fields', () => { - const schema = { + let schema = { fields: { all: { elasticsearch: { dataType: 'text' } }, address: { elasticsearch: { dataType: 'text' } }, state: { - elasticsearch: { dataType: 'text', copy_to: ['all', 'address'] }, + elasticsearch: { + dataType: 'text', + mapping: { copy_to: ['all', 'address'] }, + }, }, 'city.street': { - elasticsearch: { dataType: 'text', copy_to: ['all', 'address'] }, + elasticsearch: { + dataType: 'text', + mapping: { copy_to: ['all', 'address'] }, + }, }, }, } - const node = {} - const actual = getRequestHighlightFields(schema, node) + let node = {} + let actual = getRequestHighlightFields(schema, node) expect(actual).toEqual({ state: {}, 'city.street': {}, @@ -208,7 +318,7 @@ describe('getRequestHighlightFields()', () => { }) it('should include whitelisted sub fields', () => { - const schema = { + let schema = { elasticsearch: { subFields: { keyword: { highlight: false }, @@ -219,19 +329,19 @@ describe('getRequestHighlightFields()', () => { state: { elasticsearch: { dataType: 'text', - fields: { keyword: {}, subfield: {} }, + mapping: { fields: { keyword: {}, subfield: {} } }, }, }, 'city.street': { elasticsearch: { dataType: 'text', - fields: { keyword: {}, subfield: {} }, + mapping: { fields: { keyword: {}, subfield: {} } }, }, }, }, } - const node = {} - const actual = getRequestHighlightFields(schema, node) + let node = {} + let actual = getRequestHighlightFields(schema, node) expect(actual).toEqual({ state: {}, 'state.subfield': {}, @@ -241,7 +351,7 @@ describe('getRequestHighlightFields()', () => { }) it('should generate configuration for blob text fields', () => { - const schema = { + let schema = { elasticsearch: { subFields: { subfield: { @@ -251,20 +361,22 @@ describe('getRequestHighlightFields()', () => { }, fields: { state: { + subType: 'blob', elasticsearch: { dataType: 'text', - meta: { subType: 'blob' }, - fields: { - subfield: { - dataType: 'text', + mapping: { + fields: { + subfield: { + type: 'text', + }, }, }, }, }, }, } - const node = {} - const actual = getRequestHighlightFields(schema, node) + let node = {} + let actual = getRequestHighlightFields(schema, node) expect(actual).toEqual({ state: { fragment_size: 250, @@ -278,7 +390,7 @@ describe('getRequestHighlightFields()', () => { }) it('should generate highlight_query with fields groups replaced', () => { - const schema = { + let schema = { fields: { address: { elasticsearch: { @@ -288,18 +400,18 @@ describe('getRequestHighlightFields()', () => { state: { elasticsearch: { dataType: 'text', - copy_to: ['address'], + mapping: { copy_to: ['address'] }, }, }, 'city.street': { elasticsearch: { dataType: 'text', - copy_to: ['address'], + mapping: { copy_to: ['address'] }, }, }, }, } - const query = (field) => ({ + let query = (field) => ({ bool: { must: [ { terms: { [field]: 'memphis' } }, @@ -307,12 +419,12 @@ describe('getRequestHighlightFields()', () => { ], }, }) - const node = { + let node = { _meta: { relevantFilters: query('address'), }, } - const actual = getRequestHighlightFields(schema, node) + let actual = getRequestHighlightFields(schema, node) expect(actual).toEqual({ state: { highlight_query: query('state'), @@ -324,7 +436,7 @@ describe('getRequestHighlightFields()', () => { }) it('should generate highlight_query with fields groups replaced for sub fields', () => { - const schema = { + let schema = { elasticsearch: { subFields: { subfield: { highlight: true }, @@ -339,24 +451,28 @@ describe('getRequestHighlightFields()', () => { state: { elasticsearch: { dataType: 'text', - copy_to: ['address'], - fields: { - subfield: { dataType: 'text' }, + mapping: { + copy_to: ['address'], + fields: { + subfield: { type: 'text' }, + }, }, }, }, 'city.street': { elasticsearch: { dataType: 'text', - copy_to: ['address'], - fields: { - subfield: { dataType: 'text' }, + mapping: { + copy_to: ['address'], + fields: { + subfield: { type: 'text' }, + }, }, }, }, }, } - const query = (field) => ({ + let query = (field) => ({ bool: { must: [ { terms: { [field]: 'memphis' } }, @@ -364,12 +480,12 @@ describe('getRequestHighlightFields()', () => { ], }, }) - const node = { + let node = { _meta: { relevantFilters: query('address.subfield'), }, } - const actual = getRequestHighlightFields(schema, node) + let actual = getRequestHighlightFields(schema, node) expect(actual).toEqual({ state: {}, 'state.subfield': { highlight_query: query('state.subfield') }, diff --git a/packages/provider-elasticsearch/src/example-types/results/highlighting/response.js b/packages/provider-elasticsearch/src/example-types/results/highlighting/response.js index 60bbd95f6..ee3b60077 100644 --- a/packages/provider-elasticsearch/src/example-types/results/highlighting/response.js +++ b/packages/provider-elasticsearch/src/example-types/results/highlighting/response.js @@ -1,10 +1,6 @@ import _ from 'lodash/fp.js' import F from 'futil' -import { - removePrefix, - areArraysEqual, - groupByIndexed, -} from '../../../utils/futil.js' +import { isArraysEqual, groupByIndexed } from '../../../utils/futil.js' import { stripTags, mergeHighlights, @@ -15,39 +11,47 @@ import { getArrayOfObjectsPathsMap, } from './util.js' -const lastWordRegex = /\.(\w+)$/ +let lastWordRegex = /\.(\w+)$/ /* * Group highlight results by their multifield. For example `city` and * `city.subfield` will be grouped under `city`. */ -const groupByMultiField = _.curry((schema, highlight) => +let groupByMultiField = _.curry((schema, highlight) => groupByIndexed((v, path) => { - const [multi, sub] = path.split(lastWordRegex) - return schema.fields[multi]?.elasticsearch?.fields?.[sub] ? multi : path + let [multi, sub] = path.split(lastWordRegex) + return schema.fields[multi]?.elasticsearch?.mapping?.fields?.[sub] + ? multi + : path }, highlight) ) -export const transformResponseHighlight = ( +/** + * Mutate hit `highlight`: + * 1. Fragments for text fields get merged. + * 2. Fragments for large (blob) text fields get concatenated. + * 3. Fragments for arrays get ordered based on the source array + */ +export let transformResponseHighlight = ( schema, hit, tags, nestedArrayIncludes = {} ) => { - const arrayOfObjectsPaths = _.keys(getArrayOfObjectsPathsMap(schema)) + let arrayOfObjectsPaths = _.keys(getArrayOfObjectsPathsMap(schema)) - const getIndexedArrayObject = (arr, fragments, arrayPath, itemPath) => { - const fragmentsMap = _.groupBy(stripTags(tags), fragments) + let getIndexedArrayObject = (arr, fragments, arrayPath, itemPath) => { + let fragmentsMap = _.groupBy(stripTags(tags), fragments) return F.reduceIndexed( (acc, item, index) => { - const fragments = fragmentsMap[itemPath ? _.get(itemPath, item) : item] + let fragments = fragmentsMap[itemPath ? _.get(itemPath, item) : item] if (fragments) { F.setOn( F.dotJoin([`${index}`, itemPath]), mergeHighlights(tags, ...fragments), acc ) - for (const itemPath of nestedArrayIncludes[arrayPath] ?? []) { + for (let itemPath of nestedArrayIncludes[arrayPath] ?? []) { F.updateOn( F.dotJoin([`${index}`, itemPath]), (highlight) => highlight ?? _.get(itemPath, item), @@ -62,22 +66,22 @@ export const transformResponseHighlight = ( ) } - const getArrayPath = (path) => + let getArrayPath = (path) => isArrayOfScalarsField(schema.fields[path]) ? path : findByPrefix(path, arrayOfObjectsPaths) - hit.highlight = _.flow( + let highlight = _.flow( groupByMultiField(schema), _.mapValues(_.flatten), F.reduceIndexed((acc, fragments, path) => { - const arrayPath = getArrayPath(path) + let arrayPath = getArrayPath(path) if (arrayPath) { acc[arrayPath] = getIndexedArrayObject( acc[arrayPath] ?? {}, fragments, arrayPath, - removePrefix(`${arrayPath}.`, path) + path.slice(arrayPath.length + 1) ) } else if (isBlobField(schema.fields[path])) { acc[path] = fragments @@ -87,45 +91,50 @@ export const transformResponseHighlight = ( return acc }, {}) )(hit.highlight) + + if (!_.isEmpty(highlight)) hit.highlight = highlight } /** * Remove each path in `paths` from `hit._source`. */ -export const removePathsFromSource = (schema, hit, paths) => { +export let removePathsFromSource = (schema, hit, paths) => { // Nothing to do if (_.isEmpty(paths)) return // "aoo" stands for "array of objects", because I was tired of typing it out // over and over again. - const aooMap = getArrayOfObjectsPathsMap(schema) - const allAooPaths = _.keys(aooMap) - const getAooPath = (path) => findByPrefix(path, allAooPaths) - const [aooPaths, otherPaths] = _.partition(getAooPath, paths) + let aooMap = getArrayOfObjectsPathsMap(schema) + let allAooPaths = _.keys(aooMap) + let getAooPath = (path) => findByPrefix(path, allAooPaths) + let [aooPaths, otherPaths] = _.partition(getAooPath, paths) - const toRemove = { + let toRemove = { ...F.arrayToObject(_.identity, _.constant(true), otherPaths), ...F.mapValuesIndexed((paths, aooPath) => { - const removeEntireArray = + let removeEntireArray = // All nested fields in array of objects should be removed - areArraysEqual(paths, aooMap[aooPath]) || + isArraysEqual(paths, aooMap[aooPath]) || // Or... the path for the array of objects field should be removed _.includes(aooPath, paths) - return removeEntireArray || _.map(removePrefix(`${aooPath}.`), paths) + return ( + removeEntireArray || + _.map((path) => path.slice(aooPath.length + 1), paths) + ) }, _.groupBy(getAooPath, aooPaths)), } - const removePathsFromArray = (paths) => (arr) => + let removePathsFromArray = (paths) => (arr) => _.reduce( (acc, item) => { - for (const path of paths) F.unsetOn(path, item) + for (let path of paths) F.unsetOn(path, item) return _.isEmpty(item) ? acc : F.push(item, acc) }, [], arr ) - for (const [path, value] of _.toPairs(toRemove)) { + for (let [path, value] of _.toPairs(toRemove)) { if (value === true) { F.unsetOn(path, hit._source) } else { @@ -146,10 +155,10 @@ export const removePathsFromSource = (schema, hit, paths) => { * `_.merge`. Query 100 records with arrays of thousands of elements each and * convince yourself. */ -export const mergeHighlightsOnSource = (schema, hit) => { - for (const path in hit.highlight) { - const fragments = hit.highlight[path] - const field = schema.fields[path] +export let mergeHighlightsOnSource = (schema, hit) => { + for (let path in hit.highlight) { + let fragments = hit.highlight[path] + let field = schema.fields[path] // Set highlight fragments on source. if (!isArrayField(field)) { @@ -160,7 +169,7 @@ export const mergeHighlightsOnSource = (schema, hit) => { // Array fragments get transformed into an object where keys are array // indexes from the source array so this function can stay performant. hit.highlight[path] = _.values(fragments) - const sourceArray = _.get(path, hit._source) + let sourceArray = _.get(path, hit._source) // There is no source array so just set highlight fragments on source. if (!sourceArray) { @@ -169,7 +178,7 @@ export const mergeHighlightsOnSource = (schema, hit) => { } // Set each fragment on the correct index in the source array. - for (const index in fragments) { + for (let index in fragments) { if (isArrayOfScalarsField(field)) { sourceArray[index] = fragments[index] } else { diff --git a/packages/provider-elasticsearch/src/example-types/results/highlighting/response.test.js b/packages/provider-elasticsearch/src/example-types/results/highlighting/response.test.js index 46f755be5..a03bc6d11 100644 --- a/packages/provider-elasticsearch/src/example-types/results/highlighting/response.test.js +++ b/packages/provider-elasticsearch/src/example-types/results/highlighting/response.test.js @@ -1,17 +1,17 @@ import _ from 'lodash/fp.js' -import { schema } from './schema.test.js' +import { schema } from './testSchema.js' import { mergeHighlightsOnSource, removePathsFromSource, transformResponseHighlight, } from './response.js' -const tags = { pre: '', post: '' } +let tags = { pre: '', post: '' } describe('transformResponseHighlight()', () => { describe('text fields', () => { it('should merge fragments', () => { - const hit = { + let hit = { highlight: { 'library.name': [ 'Imperial College London Abdus Salam Library', @@ -31,7 +31,7 @@ describe('transformResponseHighlight()', () => { describe('blob text fields', () => { it('should not merge fragments', () => { - const hit = { + let hit = { highlight: { 'library.about': [ 'The Abdus Salam Library is', @@ -57,7 +57,7 @@ describe('transformResponseHighlight()', () => { describe('arrays of strings', () => { it('should resolve highlights indexes and merge fragments', () => { - const hit = { + let hit = { _source: { library: { categories: [ @@ -90,7 +90,7 @@ describe('transformResponseHighlight()', () => { describe('arrays of objects', () => { it('should resolve highlights indexes and merge fragments', () => { - const hit = { + let hit = { _source: { library: { // prettier-ignore @@ -145,7 +145,7 @@ describe('transformResponseHighlight()', () => { }) it('should copy source fields', () => { - const hit = { + let hit = { _source: { library: { // prettier-ignore @@ -164,7 +164,7 @@ describe('transformResponseHighlight()', () => { ], }, } - const nestedArrayIncludes = { 'library.books': ['cover.author'] } + let nestedArrayIncludes = { 'library.books': ['cover.author'] } transformResponseHighlight(schema, hit, tags, nestedArrayIncludes) expect(hit.highlight).toEqual({ 'library.books': { @@ -185,7 +185,7 @@ describe('transformResponseHighlight()', () => { }) it('should not overwrite highlights when copying source fields', () => { - const hit = { + let hit = { _source: { library: { // prettier-ignore @@ -208,7 +208,7 @@ describe('transformResponseHighlight()', () => { ], }, } - const nestedArrayIncludes = { 'library.books': ['cover.title'] } + let nestedArrayIncludes = { 'library.books': ['cover.title'] } transformResponseHighlight(schema, hit, tags, nestedArrayIncludes) expect(hit.highlight).toEqual({ 'library.books': { @@ -236,7 +236,7 @@ describe('transformResponseHighlight()', () => { }) describe('removePathsFromSource()', () => { - const hit = { + let hit = { _source: { library: { categories: [ @@ -255,13 +255,13 @@ describe('removePathsFromSource()', () => { } it('should not remove fields from source if additional includes is empty', () => { - const cloned = _.cloneDeep(hit) + let cloned = _.cloneDeep(hit) removePathsFromSource(schema, cloned) expect(cloned).toEqual(hit) }) it('should remove array of scalars', () => { - const cloned = _.cloneDeep(hit) + let cloned = _.cloneDeep(hit) removePathsFromSource(schema, cloned, ['library.categories']) expect(cloned).toEqual({ _source: { @@ -278,7 +278,7 @@ describe('removePathsFromSource()', () => { }) it('should remove array of objects', () => { - const cloned = _.cloneDeep(hit) + let cloned = _.cloneDeep(hit) removePathsFromSource(schema, cloned, ['library.books']) expect(cloned).toEqual({ _source: { @@ -294,7 +294,7 @@ describe('removePathsFromSource()', () => { }) it('should remove nested field in array of objects', () => { - const cloned = _.cloneDeep(hit) + let cloned = _.cloneDeep(hit) removePathsFromSource(schema, cloned, ['library.books.author']) expect(cloned).toEqual({ _source: { @@ -316,7 +316,7 @@ describe('removePathsFromSource()', () => { }) it('should remove array of objects when all its nested fields are removed', () => { - const cloned = _.cloneDeep(hit) + let cloned = _.cloneDeep(hit) removePathsFromSource(schema, cloned, [ 'library.books.title', 'library.books.author', @@ -337,7 +337,7 @@ describe('removePathsFromSource()', () => { describe('mergeHighlightsOnSource()', () => { it('should merge onto source', () => { - const hit = { + let hit = { _source: { library: { name: 'Imperial College London Abdus Salam Library', @@ -376,7 +376,7 @@ describe('mergeHighlightsOnSource()', () => { describe('arrays of strings', () => { it('should merge onto source', () => { - const hit = { + let hit = { _source: { library: { categories: [ @@ -414,7 +414,7 @@ describe('mergeHighlightsOnSource()', () => { }) it('should merge onto source when source array is missing', () => { - const hit = { + let hit = { _source: {}, highlight: { 'library.categories': { @@ -445,7 +445,7 @@ describe('mergeHighlightsOnSource()', () => { describe('arrays of objects', () => { it('should merge onto source', () => { - const hit = { + let hit = { _source: { library: { // prettier-ignore @@ -534,7 +534,7 @@ describe('mergeHighlightsOnSource()', () => { }) it('should merge onto source when source array is missing', () => { - const hit = { + let hit = { _source: {}, highlight: { 'library.books': { diff --git a/packages/provider-elasticsearch/src/example-types/results/highlighting/schema.test.js b/packages/provider-elasticsearch/src/example-types/results/highlighting/schema.test.js deleted file mode 100644 index 013f54c1a..000000000 --- a/packages/provider-elasticsearch/src/example-types/results/highlighting/schema.test.js +++ /dev/null @@ -1,64 +0,0 @@ -export const schema = { - fields: { - 'library.name': { - elasticsearch: { - dataType: 'text', - fields: { - subfield: { - dataType: 'text', - }, - }, - }, - }, - 'library.about': { - elasticsearch: { - dataType: 'text', - meta: { subType: 'blob' }, - fields: { - subfield: { - dataType: 'text', - }, - }, - }, - }, - 'library.categories': { - elasticsearch: { - dataType: 'text', - meta: { subType: 'array' }, - fields: { - subfield: { - dataType: 'text', - }, - }, - }, - }, - 'library.books': { - elasticsearch: { - meta: { subType: 'array' }, - }, - }, - 'library.books.cover.title': { - elasticsearch: { - dataType: 'text', - fields: { - subfield: { - dataType: 'text', - }, - }, - }, - }, - 'library.books.cover.author': { - elasticsearch: { - dataType: 'text', - fields: { - subfield: { - dataType: 'text', - }, - }, - }, - }, - }, -} - -// Bogus test -it.skip('', () => {}) diff --git a/packages/provider-elasticsearch/src/example-types/results/highlighting/search.js b/packages/provider-elasticsearch/src/example-types/results/highlighting/search.js index cd0ff5974..fe45cd6de 100644 --- a/packages/provider-elasticsearch/src/example-types/results/highlighting/search.js +++ b/packages/provider-elasticsearch/src/example-types/results/highlighting/search.js @@ -1,4 +1,5 @@ import _ from 'lodash/fp.js' +import F from 'futil' import { getArrayOfObjectsPathsMap } from './util.js' import { addPathsToRequestSource, @@ -10,21 +11,24 @@ import { transformResponseHighlight, } from './response.js' -const tags = { +let tags = { pre: '', post: '', } -export const searchWithHighlights = (node, search, schema) => async (body) => { +export let searchWithHighlights = (node, search, schema) => async (body) => { // Paths for fields to always include regardless of whether the user included // them. They will be removed from the response hits so there's no harm done. - const pathsToAdd = _.flatten(_.values(getArrayOfObjectsPathsMap(schema))) + let pathsToAdd = _.flatten(_.values(getArrayOfObjectsPathsMap(schema))) + let { addedPaths, ...source } = addPathsToRequestSource( + schema, + body._source, + pathsToAdd + ) - // body._source is mutated here - const addedPaths = addPathsToRequestSource(schema, body._source, pathsToAdd) - - const response = await search({ + let response = await search({ ...body, + _source: F.omitBlank(source), highlight: { pre_tags: [tags.pre], post_tags: [tags.post], @@ -33,8 +37,8 @@ export const searchWithHighlights = (node, search, schema) => async (body) => { }, }) - for (const hit of response.hits.hits) { - const nestedArrayIncludes = node.highlight.nestedArrayIncludes + for (let hit of response.hits.hits) { + let nestedArrayIncludes = node.highlight?.nestedArrayIncludes transformResponseHighlight(schema, hit, tags, nestedArrayIncludes) removePathsFromSource(schema, hit, addedPaths) mergeHighlightsOnSource(schema, hit) diff --git a/packages/provider-elasticsearch/src/example-types/results/highlighting/testSchema.js b/packages/provider-elasticsearch/src/example-types/results/highlighting/testSchema.js new file mode 100644 index 000000000..a1d4faec2 --- /dev/null +++ b/packages/provider-elasticsearch/src/example-types/results/highlighting/testSchema.js @@ -0,0 +1,90 @@ +export let schema = { + elasticsearch: { + subFields: { + keyword: { highlight: false }, + subfield: { highlight: true }, + }, + }, + fields: { + fieldsGroup: { + elasticsearch: { + dataType: 'text', + mapping: { + fields: { + subfield: { + type: 'text', + }, + }, + }, + }, + }, + 'library.name': { + elasticsearch: { + dataType: 'text', + mapping: { + fields: { + subfield: { + type: 'text', + }, + }, + }, + }, + }, + 'library.about': { + subType: 'blob', + elasticsearch: { + dataType: 'text', + mapping: { + copy_to: ['fieldsGroup'], + fields: { + subfield: { + type: 'text', + copy_to: ['fieldsGroup.subfield'], + }, + }, + }, + }, + }, + 'library.categories': { + subType: 'array', + elasticsearch: { + dataType: 'text', + mapping: { + fields: { + subfield: { + type: 'text', + }, + }, + }, + }, + }, + 'library.books': { + subType: 'array', + elasticsearch: {}, + }, + 'library.books.cover.title': { + elasticsearch: { + dataType: 'text', + mapping: { + fields: { + subfield: { + type: 'text', + }, + }, + }, + }, + }, + 'library.books.cover.author': { + elasticsearch: { + dataType: 'text', + mapping: { + fields: { + subfield: { + type: 'text', + }, + }, + }, + }, + }, + }, +} diff --git a/packages/provider-elasticsearch/src/example-types/results/highlighting/util.js b/packages/provider-elasticsearch/src/example-types/results/highlighting/util.js index f3d7add65..6fee3e6d7 100644 --- a/packages/provider-elasticsearch/src/example-types/results/highlighting/util.js +++ b/packages/provider-elasticsearch/src/example-types/results/highlighting/util.js @@ -1,29 +1,28 @@ import _ from 'lodash/fp.js' import F from 'futil' -export const findByPrefix = (str, arr) => - _.find((k) => _.startsWith(k, str), arr) +export let findByPrefix = (str, arr) => _.find((k) => _.startsWith(k, str), arr) -export const isLeafField = (field) => !!field?.elasticsearch?.dataType +export let isLeafField = (field) => + !!field?.elasticsearch?.dataType || !!field?.elasticsearch?.mapping?.type -export const isBlobField = (field) => - field?.elasticsearch?.meta?.subType === 'blob' && isLeafField(field) +export let isBlobField = (field) => + field?.subType === 'blob' && isLeafField(field) -export const isArrayField = (field) => - field?.elasticsearch?.meta?.subType === 'array' +export let isArrayField = (field) => field?.subType === 'array' -export const isArrayOfScalarsField = (field) => +export let isArrayOfScalarsField = (field) => isArrayField(field) && isLeafField(field) -export const isArrayOfObjectsField = (field) => +export let isArrayOfObjectsField = (field) => isArrayField(field) && !isLeafField(field) /** * Object where keys are paths for fields that are arrays of objects and values * are all the paths under them. */ -export const getArrayOfObjectsPathsMap = _.memoize((schema) => { - const fieldsPaths = _.keys(schema.fields) +export let getArrayOfObjectsPathsMap = _.memoize((schema) => { + let fieldsPaths = _.keys(schema.fields) return F.reduceIndexed( (acc, field, arrayPath) => { if (isArrayOfObjectsField(field)) { @@ -36,11 +35,11 @@ export const getArrayOfObjectsPathsMap = _.memoize((schema) => { ) }, _.get('elasticsearch.index')) -export const stripTags = _.curry((tags, str) => +export let stripTags = _.curry((tags, str) => str.replaceAll(tags.pre, '').replaceAll(tags.post, '') ) -const getRangesRegexp = _.memoize( +let getRangesRegexp = _.memoize( (tags) => new RegExp(`${tags.pre}(?.*?)${tags.post}`, 'g') ) @@ -52,12 +51,12 @@ const getRangesRegexp = _.memoize( * * `A red car` */ -const getHighlightRanges = _.curry((tags, str) => { +let getHighlightRanges = _.curry((tags, str) => { let runningTagsLength = 0 - const ranges = [] - for (const match of str.matchAll(getRangesRegexp(tags))) { - const start = match.index - runningTagsLength - const end = start + match.groups.capture.length + let ranges = [] + for (let match of str.matchAll(getRangesRegexp(tags))) { + let start = match.index - runningTagsLength + let end = start + match.groups.capture.length ranges.push([start, end]) runningTagsLength += match[0].length - match[1].length } @@ -71,14 +70,14 @@ const getHighlightRanges = _.curry((tags, str) => { * example: * * ```javascript - * const braceHighlight = F.highlight("{", "}") + * let braceHighlight = F.highlight("{", "}") * braceHighlight([[2, 4], [9, 10]], "hello world") // -> "he{llo} wor{ld}" * ```` */ -const highlightFromRanges = (pre, post, ranges, str) => { - const starts = _.fromPairs(_.map((x) => [x[0]], ranges)) - const ends = _.fromPairs(_.map((x) => [x[1]], ranges)) - const highlighted = str.replace(/./g, (match, index) => { +let highlightFromRanges = (pre, post, ranges, str) => { + let starts = _.fromPairs(_.map((x) => [x[0]], ranges)) + let ends = _.fromPairs(_.map((x) => [x[1]], ranges)) + let highlighted = str.replace(/./g, (match, index) => { if (index in starts) return `${pre}${match}` if (index in ends) return `${post}${match}` return match @@ -90,11 +89,11 @@ const highlightFromRanges = (pre, post, ranges, str) => { : highlighted } -export const mergeHighlights = (tags, ...strs) => { +export let mergeHighlights = (tags, ...strs) => { // This may look unnecessary but merging highlights is not cheap and many // times is not even needed if (strs.length <= 1) return _.head(strs) - const ranges = F.mergeRanges(_.flatMap(getHighlightRanges(tags), strs)) + let ranges = F.mergeRanges(_.flatMap(getHighlightRanges(tags), strs)) return highlightFromRanges( tags.pre, tags.post, diff --git a/packages/provider-elasticsearch/src/example-types/results/highlighting/util.test.js b/packages/provider-elasticsearch/src/example-types/results/highlighting/util.test.js index 1ea6bf76c..ee80dedd6 100644 --- a/packages/provider-elasticsearch/src/example-types/results/highlighting/util.test.js +++ b/packages/provider-elasticsearch/src/example-types/results/highlighting/util.test.js @@ -1,10 +1,10 @@ import { mergeHighlights } from './util.js' -const tags = { pre: '', post: '' } +let tags = { pre: '', post: '' } describe('mergeHighlights()', () => { it('should merge highlights that do not overlap', () => { - const actual = mergeHighlights( + let actual = mergeHighlights( tags, 'The quick brown fox jumps over the lazy dog', 'The quick brown fox jumps over the lazy dog' @@ -15,7 +15,7 @@ describe('mergeHighlights()', () => { }) it('should merge highlights that overlap', () => { - const actual = mergeHighlights( + let actual = mergeHighlights( tags, 'The quick brown fox jumps over the lazy dog', 'The quick brown fox jumps over the lazy dog' @@ -26,7 +26,7 @@ describe('mergeHighlights()', () => { }) it('should merge highlights that are contained within another', () => { - const actual = mergeHighlights( + let actual = mergeHighlights( tags, 'The quick brown fox jumps over the lazy dog', 'The quick brown fox jumps over the lazy dog' @@ -37,7 +37,7 @@ describe('mergeHighlights()', () => { }) it('should merge highlights at the end of the string', () => { - const actual = mergeHighlights( + let actual = mergeHighlights( tags, 'The quick brown fox jumps over the lazy dog', 'The quick brown fox jumps over the lazy dog' diff --git a/packages/provider-elasticsearch/src/example-types/results/index.js b/packages/provider-elasticsearch/src/example-types/results/index.js index 6a00673ac..e3eccc68b 100644 --- a/packages/provider-elasticsearch/src/example-types/results/index.js +++ b/packages/provider-elasticsearch/src/example-types/results/index.js @@ -5,18 +5,16 @@ import { searchWithHighlights } from './highlighting/search.js' export default { validContext: () => true, async result(node, search, schema) { - const page = (node.page || 1) - 1 - const pageSize = node.pageSize || 10 - const startRecord = page * pageSize - const sortField = node.sortField - ? getField(schema, node.sortField) - : '_score' + let page = (node.page || 1) - 1 + let pageSize = node.pageSize || 10 + let startRecord = page * pageSize + let sortField = node.sortField ? getField(schema, node.sortField) : '_score' - search = node.highlight?.enable - ? searchWithHighlights(node, search, schema) - : search + search = node.highlight?.disable + ? search + : searchWithHighlights(node, search, schema) - const response = await search( + let response = await search( F.omitBlank({ from: startRecord, size: pageSize, diff --git a/packages/provider-elasticsearch/src/example-types/results/type.d.ts b/packages/provider-elasticsearch/src/example-types/results/type.d.ts index 40fe42db9..a2a936c01 100644 --- a/packages/provider-elasticsearch/src/example-types/results/type.d.ts +++ b/packages/provider-elasticsearch/src/example-types/results/type.d.ts @@ -6,10 +6,10 @@ type Path = string interface HighlightConfig { /** - * Whether to send highlighting configuration to elastic and merge + * Whether to not send highlighting configuration to elastic and merge * highlighting results onto source. Defaults to `false`. */ - enable?: boolean + disable?: boolean /** * Nested paths in arrays of objects that should be copied from source into * highlighted results. diff --git a/packages/provider-elasticsearch/src/schema.js b/packages/provider-elasticsearch/src/schema.js index 566e2f339..690250638 100644 --- a/packages/provider-elasticsearch/src/schema.js +++ b/packages/provider-elasticsearch/src/schema.js @@ -37,7 +37,7 @@ let fromEsIndexMapping = (mapping) => { field, label: _.startCase(field), elasticsearch: F.compactObject({ - ...mapping, + mapping, dataType: mapping.type, // Find the child notAnalyzedField to set up facet autocomplete vs word notAnalyzedField: _.findKey( diff --git a/packages/provider-elasticsearch/src/utils/futil.js b/packages/provider-elasticsearch/src/utils/futil.js index 6797b72cf..9e6fe63f7 100644 --- a/packages/provider-elasticsearch/src/utils/futil.js +++ b/packages/provider-elasticsearch/src/utils/futil.js @@ -1,11 +1,7 @@ import _ from 'lodash/fp.js' import F from 'futil' -export const areArraysEqual = _.flow(_.xor, _.isEmpty) - -export const removePrefix = _.curry((prefix, str) => - str && prefix ? str.slice(prefix.length) : '' -) +export const isArraysEqual = _.flow(_.xor, _.isEmpty) export let unsetOnTree = _.curry((prop, tree) => _.compact(F.treeToArrayBy()(F.popProperty(prop), tree)) diff --git a/packages/provider-elasticsearch/src/utils/futil.test.js b/packages/provider-elasticsearch/src/utils/futil.test.js index 275ef6377..ff25ac9ee 100644 --- a/packages/provider-elasticsearch/src/utils/futil.test.js +++ b/packages/provider-elasticsearch/src/utils/futil.test.js @@ -5,6 +5,7 @@ import { writeTreeNode, transmuteTree, virtualConcat, + unsetOnTree, } from './futil.js' import { simplifyBucket } from './elasticDSL.js' @@ -275,4 +276,205 @@ describe('futil candidates', () => { ], }) }) + + describe('unsetOnTree()', () => { + it('Should hoist from tree based on demarcation for hoisting from aggs', () => { + let input = { + aggs: { + groups: { + date_histogram: { + field: 'PO.IssuedDate.fiscal', + interval: 'year', + min_doc_count: 0, + __hoistProps: { + runtime_mappings: { + 'PO.IssuedDate.fiscal': { + script: { + params: { monthOffset: 3 }, + source: `if(doc['PO.IssuedDate'].size()!=0){${''}emit(doc['PO.IssuedDate']${''}.value.plusMonths(params['monthOffset']).toInstant().toEpochMilli())}${''}`, + }, + type: 'date', + }, + }, + }, + }, + aggs: { + min: { min: { field: 'LineItem.TotalPrice' } }, + max: { max: { field: 'LineItem.TotalPrice' } }, + avg: { avg: { field: 'LineItem.TotalPrice' } }, + sum: { + sum: { + field: 'LineItem.TotalPrice', + __hoistProps: { + runtime_mappings: { + 'PO.OtherDate.fiscal': { + script: { + params: { monthOffset: 3 }, + source: `if(doc['PO.OtherDate'].size()!=0){${''}emit(doc['PO.OtherDate']${''}.value.plusMonths(params['monthOffset']).toInstant().toEpochMilli())}${''}`, + }, + type: 'date', + }, + }, + }, + }, + }, + }, + }, + }, + } + + let output = { + result: { + aggs: { + groups: { + date_histogram: { + field: 'PO.IssuedDate.fiscal', + interval: 'year', + min_doc_count: 0, + }, + aggs: { + min: { min: { field: 'LineItem.TotalPrice' } }, + max: { max: { field: 'LineItem.TotalPrice' } }, + avg: { avg: { field: 'LineItem.TotalPrice' } }, + sum: { sum: { field: 'LineItem.TotalPrice' } }, + }, + }, + }, + }, + removed: [ + { + runtime_mappings: { + 'PO.IssuedDate.fiscal': { + script: { + params: { monthOffset: 3 }, + source: `if(doc['PO.IssuedDate'].size()!=0){${''}emit(doc['PO.IssuedDate']${''}.value.plusMonths(params['monthOffset']).toInstant().toEpochMilli())}${''}`, + }, + type: 'date', + }, + }, + }, + { + runtime_mappings: { + 'PO.OtherDate.fiscal': { + script: { + params: { monthOffset: 3 }, + source: `if(doc['PO.OtherDate'].size()!=0){${''}emit(doc['PO.OtherDate']${''}.value.plusMonths(params['monthOffset']).toInstant().toEpochMilli())}${''}`, + }, + type: 'date', + }, + }, + }, + ], + } + let result = { + result: input, + removed: unsetOnTree('__hoistProps', input), + } + expect(result).toEqual(output) + }) + + it('Should hoist from tree based on demarcation for hoisting from filters', () => { + let input = { + index: 'sp-data-lit', + body: { + query: { + constant_score: { + filter: { + bool: { + should: [ + { + bool: { + must: [ + { + __hoistProps: { + runtime_mappings: { + 'FederalDoc.relevantContractDates.signedDate.fiscal': + { + type: 'date', + script: { + source: + "if(doc['FederalDoc.relevantContractDates.signedDate'].size()!=0){emit(doc['FederalDoc.relevantContractDates.signedDate'].value.plusMonths(params['monthOffset']).toInstant().toEpochMilli())}", + params: { + monthOffset: 3, + }, + }, + }, + }, + }, + range: { + 'FederalDoc.relevantContractDates.signedDate.fiscal': + { + gte: '2015-04-01T00:00:00.000Z', + lte: '2015-06-30T23:59:59Z', + }, + }, + }, + ], + }, + }, + ], + minimum_should_match: 1, + }, + }, + }, + }, + }, + } + let output = { + result: { + index: 'sp-data-lit', + body: { + query: { + constant_score: { + filter: { + bool: { + should: [ + { + bool: { + must: [ + { + range: { + 'FederalDoc.relevantContractDates.signedDate.fiscal': + { + gte: '2015-04-01T00:00:00.000Z', + lte: '2015-06-30T23:59:59Z', + }, + }, + }, + ], + }, + }, + ], + minimum_should_match: 1, + }, + }, + }, + }, + }, + }, + removed: [ + { + runtime_mappings: { + 'FederalDoc.relevantContractDates.signedDate.fiscal': { + type: 'date', + script: { + source: + "if(doc['FederalDoc.relevantContractDates.signedDate'].size()!=0){emit(doc['FederalDoc.relevantContractDates.signedDate'].value.plusMonths(params['monthOffset']).toInstant().toEpochMilli())}", + params: { + monthOffset: 3, + }, + }, + }, + }, + }, + ], + } + + let result = { + result: input, + removed: unsetOnTree('__hoistProps', input), + } + expect(result).toEqual(output) + }) + }) }) diff --git a/packages/react/src/exampleTypes/ResultTable/HighlightedColumn.js b/packages/react/src/exampleTypes/ResultTable/HighlightedColumn.js new file mode 100644 index 000000000..d317e985f --- /dev/null +++ b/packages/react/src/exampleTypes/ResultTable/HighlightedColumn.js @@ -0,0 +1,57 @@ +import React from 'react' +import _ from 'lodash/fp.js' +import F from 'futil' +import { flattenObjectsNotArrays } from '../../utils/futil.js' +import { withTheme } from '../../utils/theme.js' + +let labelForField = (schema, field) => + _.getOr(field, 'label', _.find({ field }, schema)) + +// Get fields that were highlighted but not included +export let getAdditionalHighlightedFields = ({ schema, record, node }) => { + let fieldNames = _.difference( + _.keys(flattenObjectsNotArrays(record._highlight)), + [...node.include] + ) + return _.pick(fieldNames, schema.fields) +} + +let HighlightedColumn = ({ schema, node, record, theme: { Modal, Table } }) => { + let viewModal = React.useState(false) + let additionalFields = getAdditionalHighlightedFields({ + schema, + record, + node, + }) + return _.isEmpty(additionalFields) ? null : ( + <> + +

Other Matching Fields

+ + + {_.map( + ({ label, value }) => ( + + + + ), + additionalFields + )} + +
{labelForField(schema, label)} +
+
+ + + ) +} + +export default withTheme(HighlightedColumn) diff --git a/packages/react/src/utils/futil.js b/packages/react/src/utils/futil.js index e20416f8b..b23c3455d 100644 --- a/packages/react/src/utils/futil.js +++ b/packages/react/src/utils/futil.js @@ -24,3 +24,21 @@ export let aspectWrapper = F.aspect({ after: (result) => console.info('"after" aspect fired!', result), onError: (e) => console.error('"onError" aspect fired!', e), }) + +export const flattenObjectWith = _.curryN(2, (fn, input, paths) => + F.isFlatObject(input) + ? input + : F.reduceIndexed( + (output, value, key) => + _.merge(output, fn(value, F.dotJoinWith(F.isNotNil)([paths, key]))), + {}, + input + ) +) + +// { a: [{ b: { c: 1 } }] } => { a: [{ 'b.c': 1 }] } +export const flattenObjectsNotArrays = flattenObjectWith((value, path) => { + if (_.isPlainObject(value)) return flattenObjectsNotArrays(value, path) + if (_.isArray(value)) return { [path]: _.map(flattenObjectsNotArrays, value) } + return { [path]: value } +}) From 243b024f99857c3060d780b70b4ebecb45c1f993 Mon Sep 17 00:00:00 2001 From: Alejandro Hernandez Date: Tue, 23 Jan 2024 10:01:45 -0500 Subject: [PATCH 25/30] Simplify code and address feedback --- .../src/example-types/results/README.md | 6 +- .../results/highlighting/request.js | 16 +- .../results/highlighting/request.test.js | 30 +-- .../results/highlighting/response.js | 220 ++++++++++-------- .../results/highlighting/response.test.js | 134 +++++++++-- .../results/highlighting/search.js | 14 +- .../results/highlighting/testSchema.js | 6 +- .../results/highlighting/util.js | 47 ++-- .../results/highlighting/util.test.js | 29 ++- .../provider-elasticsearch/src/utils/futil.js | 2 +- 10 files changed, 322 insertions(+), 182 deletions(-) diff --git a/packages/provider-elasticsearch/src/example-types/results/README.md b/packages/provider-elasticsearch/src/example-types/results/README.md index ffd01cef1..d9914ce41 100644 --- a/packages/provider-elasticsearch/src/example-types/results/README.md +++ b/packages/provider-elasticsearch/src/example-types/results/README.md @@ -49,9 +49,9 @@ Whitelisted sub-fields are sent for highlighting, since they could be present in
-#### 2. Fields groups +#### 2. Group fields -Fields groups are not sent for highlighting because we assume users want to highlight fields that were copied over instead of the fields groups themselves: +Group fields are not sent for highlighting because we assume users want to highlight fields that were copied over instead of the group fields themselves:
@@ -60,7 +60,7 @@ Fields groups are not sent for highlighting because we assume users want to high ```jsonc { "fields": { - // `address` won't be sent for highlighting since it's a field group. + // `address` won't be sent for highlighting since it's a group field. "address": { "elasticsearch": {} }, diff --git a/packages/provider-elasticsearch/src/example-types/results/highlighting/request.js b/packages/provider-elasticsearch/src/example-types/results/highlighting/request.js index 5b88085ee..87b938fe3 100644 --- a/packages/provider-elasticsearch/src/example-types/results/highlighting/request.js +++ b/packages/provider-elasticsearch/src/example-types/results/highlighting/request.js @@ -76,9 +76,9 @@ let getHighlightSubFieldsNames = (schema) => _.keys(_.pickBy('highlight', schema.elasticsearch?.subFields)) /* - * Paths of all fields groups and their subfields that can be highlighted. + * Paths of all group fields and their subfields that can be highlighted. */ -export let getHighlightFieldsGroupsPaths = _.memoize((schema) => { +export let getHighlightGroupFieldsPaths = _.memoize((schema) => { let subFieldsNames = getHighlightSubFieldsNames(schema) return _.flatMap((field) => { let copy_to = field.elasticsearch?.mapping?.copy_to @@ -89,8 +89,8 @@ export let getHighlightFieldsGroupsPaths = _.memoize((schema) => { }, schema.fields) }, _.get('elasticsearch.index')) -let isFieldsGroupPath = _.curry((schema, path) => - _.find(_.eq(path), getHighlightFieldsGroupsPaths(schema)) +let isGroupFieldPath = _.curry((schema, path) => + _.find(_.eq(path), getHighlightGroupFieldsPaths(schema)) ) /* @@ -100,7 +100,7 @@ export let getAllHighlightFields = _.memoize((schema) => { let subFieldsNames = getHighlightSubFieldsNames(schema) return F.reduceIndexed( (acc, field, path) => { - if (!isLeafField(field) || isFieldsGroupPath(schema, path)) { + if (!isLeafField(field) || isGroupFieldPath(schema, path)) { return acc } acc[path] = field @@ -146,8 +146,8 @@ let blobConfiguration = { * Get configuration for highlight fields to send in the elastic request. */ export let getRequestHighlightFields = (schema, node) => { - let fieldGroupsInQuery = collectKeysAndValues( - isFieldsGroupPath(schema), + let groupFieldsInQuery = collectKeysAndValues( + isGroupFieldPath(schema), node._meta?.relevantFilters ) @@ -157,7 +157,7 @@ export let getRequestHighlightFields = (schema, node) => { let getHighlightQuery = (field, path) => { let pathsToReplace = _.intersection( - fieldGroupsInQuery, + groupFieldsInQuery, field.elasticsearch?.mapping?.copy_to ) if (!_.isEmpty(pathsToReplace)) { diff --git a/packages/provider-elasticsearch/src/example-types/results/highlighting/request.test.js b/packages/provider-elasticsearch/src/example-types/results/highlighting/request.test.js index d75dc8d66..fb336e4b7 100644 --- a/packages/provider-elasticsearch/src/example-types/results/highlighting/request.test.js +++ b/packages/provider-elasticsearch/src/example-types/results/highlighting/request.test.js @@ -2,12 +2,12 @@ import { schema } from './testSchema.js' import { addPathsToRequestSource, getAllHighlightFields, - getHighlightFieldsGroupsPaths, + getHighlightGroupFieldsPaths, getRequestHighlightFields, } from './request.js' -describe('getHighlightFieldsGroupsPaths', () => { - it('should return all combinations of fields groups and sub-fields', () => { +describe('getHighlightGroupFieldsPaths', () => { + it('should return all combinations of group fields and sub-fields', () => { let schema = { elasticsearch: { subFields: { @@ -17,7 +17,7 @@ describe('getHighlightFieldsGroupsPaths', () => { }, }, fields: { - fieldsGroup1: { + groupField1: { elasticsearch: { dataType: 'text', mapping: { @@ -25,7 +25,7 @@ describe('getHighlightFieldsGroupsPaths', () => { }, }, }, - fieldsGroup2: { + groupField2: { elasticsearch: { dataType: 'text', mapping: { @@ -37,19 +37,19 @@ describe('getHighlightFieldsGroupsPaths', () => { elasticsearch: { dataType: 'text', mapping: { - copy_to: ['fieldsGroup1', 'fieldsGroup2'], + copy_to: ['groupField1', 'groupField2'], }, }, }, }, } - expect(getHighlightFieldsGroupsPaths(schema)).toEqual([ - 'fieldsGroup1', - 'fieldsGroup2', - 'fieldsGroup1.subfield1', - 'fieldsGroup2.subfield1', - 'fieldsGroup1.subfield2', - 'fieldsGroup2.subfield2', + expect(getHighlightGroupFieldsPaths(schema)).toEqual([ + 'groupField1', + 'groupField2', + 'groupField1.subfield1', + 'groupField2.subfield1', + 'groupField1.subfield2', + 'groupField2.subfield2', ]) }) }) @@ -389,7 +389,7 @@ describe('getRequestHighlightFields()', () => { }) }) - it('should generate highlight_query with fields groups replaced', () => { + it('should generate highlight_query with group fields replaced', () => { let schema = { fields: { address: { @@ -435,7 +435,7 @@ describe('getRequestHighlightFields()', () => { }) }) - it('should generate highlight_query with fields groups replaced for sub fields', () => { + it('should generate highlight_query with group fields replaced for sub fields', () => { let schema = { elasticsearch: { subFields: { diff --git a/packages/provider-elasticsearch/src/example-types/results/highlighting/response.js b/packages/provider-elasticsearch/src/example-types/results/highlighting/response.js index ee3b60077..c6f98e016 100644 --- a/packages/provider-elasticsearch/src/example-types/results/highlighting/response.js +++ b/packages/provider-elasticsearch/src/example-types/results/highlighting/response.js @@ -1,13 +1,16 @@ import _ from 'lodash/fp.js' import F from 'futil' -import { isArraysEqual, groupByIndexed } from '../../../utils/futil.js' +import { groupByIndexed } from '../../../utils/futil.js' import { stripTags, mergeHighlights, isBlobField, isArrayField, isArrayOfScalarsField, - findByPrefix, + findByPrefixIn, + isArrayOfObjectsField, + getNestedPathsMap, + stripParentPath, getArrayOfObjectsPathsMap, } from './util.js' @@ -27,121 +30,139 @@ let groupByMultiField = _.curry((schema, highlight) => ) /** - * Mutate hit `highlight`: - * 1. Fragments for text fields get merged. - * 2. Fragments for large (blob) text fields get concatenated. - * 3. Fragments for arrays get ordered based on the source array + * Group nested fields under their parent array of objects path. */ -export let transformResponseHighlight = ( - schema, - hit, - tags, - nestedArrayIncludes = {} -) => { +export let groupByArrayOfObjectsFields = _.curry((schema, highlight) => { let arrayOfObjectsPaths = _.keys(getArrayOfObjectsPathsMap(schema)) + return F.reduceIndexed( + (acc, fragments, path) => { + let arrayPath = findByPrefixIn(arrayOfObjectsPaths, path) + if (arrayPath) { + let nestedPath = stripParentPath(arrayPath, path) + return _.update([arrayPath], _.set([nestedPath], fragments), acc) + } + return _.set([path], fragments, acc) + }, + {}, + highlight + ) +}) - let getIndexedArrayObject = (arr, fragments, arrayPath, itemPath) => { - let fragmentsMap = _.groupBy(stripTags(tags), fragments) - return F.reduceIndexed( - (acc, item, index) => { - let fragments = fragmentsMap[itemPath ? _.get(itemPath, item) : item] - if (fragments) { - F.setOn( - F.dotJoin([`${index}`, itemPath]), - mergeHighlights(tags, ...fragments), - acc - ) - for (let itemPath of nestedArrayIncludes[arrayPath] ?? []) { - F.updateOn( - F.dotJoin([`${index}`, itemPath]), - (highlight) => highlight ?? _.get(itemPath, item), - acc - ) - } - } - return acc - }, - arr, - _.get(arrayPath, hit._source) - ) - } +/** + * Convert an array of fragments to an object where keys are corresponding + * indexes in source array and values are fragments. + */ +export let getIndexedFragments = (tags, source, fragments, nestedPath) => { + let fragmentsMap = _.groupBy(stripTags(tags), fragments) + return F.reduceIndexed( + (acc, item, index) => { + let value = F.getOrReturn(nestedPath, item) + return _.has(value, fragmentsMap) + ? F.setOn(index, fragmentsMap[value], acc) + : acc + }, + {}, + source + ) +} - let getArrayPath = (path) => - isArrayOfScalarsField(schema.fields[path]) - ? path - : findByPrefix(path, arrayOfObjectsPaths) +let getIndexedAndMergedFragments = (tags, source, fragments, nestedPath) => + _.mapValues( + mergeHighlights(tags), + getIndexedFragments(tags, source, fragments, nestedPath) + ) - let highlight = _.flow( +export let getArrayOfScalarsFragments = getIndexedAndMergedFragments + +/** + * Ex: `{ "cover.title": [...] }` -> `{ 0: { cover: { title: [...] } } }` + * + * See tests for more details. + */ +export let getArrayOfObjectsFragments = (tags, source, fragmentsMap) => + _.mergeAll( + F.mapIndexed( + (fragments, nestedPath) => + _.mapValues( + (merged) => _.set(nestedPath, merged, {}), + getIndexedAndMergedFragments(tags, source, fragments, nestedPath) + ), + fragmentsMap + ) + ) + +/** + * Get hit `highlight`: + * 1. Fragments for text fields get merged. + * 2. Fragments for large (blob) text fields get concatenated. + * 3. Fragments for arrays get ordered based on the source array + */ +export let getResponseHighlight = (schema, hit, tags, copySourcePaths) => { + let pathsMap = getNestedPathsMap(schema, copySourcePaths) + return _.flow( groupByMultiField(schema), _.mapValues(_.flatten), - F.reduceIndexed((acc, fragments, path) => { - let arrayPath = getArrayPath(path) - if (arrayPath) { - acc[arrayPath] = getIndexedArrayObject( - acc[arrayPath] ?? {}, - fragments, - arrayPath, - path.slice(arrayPath.length + 1) + groupByArrayOfObjectsFields(schema), + F.mapValuesIndexed((fragments, path) => { + let field = schema.fields[path] + + if (isBlobField(field)) { + return fragments + } + + if (isArrayOfScalarsField(field)) { + let sourceArray = _.get(path, hit._source) + return getArrayOfScalarsFragments(tags, sourceArray, fragments) + } + + if (isArrayOfObjectsField(field)) { + let sourceArray = _.get(path, hit._source) + let result = getArrayOfObjectsFragments(tags, sourceArray, fragments) + let copyPaths = pathsMap[path] + if (_.isEmpty(copyPaths)) return result + return F.mapValuesIndexed( + (to, index) => _.merge(_.pick(copyPaths, sourceArray[index]), to), + result ) - } else if (isBlobField(schema.fields[path])) { - acc[path] = fragments - } else { - acc[path] = mergeHighlights(tags, ...fragments) } - return acc - }, {}) - )(hit.highlight) - if (!_.isEmpty(highlight)) hit.highlight = highlight + return mergeHighlights(tags, fragments) + }) + )(hit.highlight) } /** * Remove each path in `paths` from `hit._source`. + * + * This function is more complicated than a simple `filterTree` because it + * needs to be performant since it runs on every hit and may potentially have to + * recurse into large source values. */ export let removePathsFromSource = (schema, hit, paths) => { - // Nothing to do if (_.isEmpty(paths)) return - // "aoo" stands for "array of objects", because I was tired of typing it out - // over and over again. - let aooMap = getArrayOfObjectsPathsMap(schema) - let allAooPaths = _.keys(aooMap) - let getAooPath = (path) => findByPrefix(path, allAooPaths) - let [aooPaths, otherPaths] = _.partition(getAooPath, paths) - - let toRemove = { - ...F.arrayToObject(_.identity, _.constant(true), otherPaths), - ...F.mapValuesIndexed((paths, aooPath) => { - let removeEntireArray = - // All nested fields in array of objects should be removed - isArraysEqual(paths, aooMap[aooPath]) || - // Or... the path for the array of objects field should be removed - _.includes(aooPath, paths) - return ( - removeEntireArray || - _.map((path) => path.slice(aooPath.length + 1), paths) - ) - }, _.groupBy(getAooPath, aooPaths)), - } + let unsetAllPaths = _.curry((paths, obj) => { + for (let path of paths) F.unsetOn(path, obj) + return obj + }) - let removePathsFromArray = (paths) => (arr) => - _.reduce( - (acc, item) => { - for (let path of paths) F.unsetOn(path, item) - return _.isEmpty(item) ? acc : F.push(item, acc) - }, - [], - arr - ) + let allNestedPaths = getArrayOfObjectsPathsMap(schema) - for (let [path, value] of _.toPairs(toRemove)) { - if (value === true) { + for (let [path, nested] of _.toPairs(getNestedPathsMap(schema, paths))) { + let shouldRemovePath = + _.isEmpty(nested) || _.isEqual(nested, allNestedPaths[path]) + + if (!shouldRemovePath) { + // Remove paths from each item in the array. + F.updateOn(path, _.map(unsetAllPaths(nested)), hit._source) + // Remove empty array items. + F.updateOn(path, _.remove(_.isEmpty), hit._source) + // If the array itself is empty, remove it. + shouldRemovePath = _.isEmpty(_.get(path, hit._source)) + } + + if (shouldRemovePath) { F.unsetOn(path, hit._source) - } else { - F.updateOn(path, removePathsFromArray(value), hit._source) - if (_.isEmpty(_.get(path, hit._source))) { - F.unsetOn(path, hit._source) - } } } } @@ -149,11 +170,8 @@ export let removePathsFromSource = (schema, hit, paths) => { /* * Merge elastic hit highlights onto hit source. * - * As a clever developer, you will notice that the following function is a dirty - * and unholy version `_.merge`. So before you refactor it to use exactly that, - * consider that this implementation is about 100x faster than (immutable) - * `_.merge`. Query 100 records with arrays of thousands of elements each and - * convince yourself. + * On 100 hits each with an array of about 10,000 items this implementation is + * ~100x faster than a mutating lodash `_.merge`. */ export let mergeHighlightsOnSource = (schema, hit) => { for (let path in hit.highlight) { diff --git a/packages/provider-elasticsearch/src/example-types/results/highlighting/response.test.js b/packages/provider-elasticsearch/src/example-types/results/highlighting/response.test.js index a03bc6d11..4e7072b7e 100644 --- a/packages/provider-elasticsearch/src/example-types/results/highlighting/response.test.js +++ b/packages/provider-elasticsearch/src/example-types/results/highlighting/response.test.js @@ -3,12 +3,120 @@ import { schema } from './testSchema.js' import { mergeHighlightsOnSource, removePathsFromSource, - transformResponseHighlight, + getResponseHighlight, + groupByArrayOfObjectsFields, + getArrayOfScalarsFragments, + getArrayOfObjectsFragments, } from './response.js' let tags = { pre: '', post: '' } -describe('transformResponseHighlight()', () => { +describe('groupByArrayOfObjectsFields', () => { + it('should group array of objects fields but not array of scalars field', () => { + let highlight = { + 'library.categories': [ + 'Alternative Medicine', + 'Ethnic & Cultural', + ], + 'library.books.cover.title': [ + 'Nineteen Eighty-Four', + 'The Great Gatsby', + ], + 'library.books.cover.author': [ + 'George Orwell', + 'James Joyce', + ], + } + expect(groupByArrayOfObjectsFields(schema, highlight)).toEqual({ + 'library.categories': [ + 'Alternative Medicine', + 'Ethnic & Cultural', + ], + 'library.books': { + 'cover.title': [ + 'Nineteen Eighty-Four', + 'The Great Gatsby', + ], + 'cover.author': ['George Orwell', 'James Joyce'], + }, + }) + }) +}) + +describe('getIndexedAndMergedFragments', () => { + it('should index and merge arrays of strings fragments', () => { + let source = [ + 'Ethnic & Cultural', + 'Computer Science', + 'Alternative Medicine', + ] + let fragments = [ + 'Alternative Medicine', + 'Ethnic & Cultural', + 'Alternative Medicine', + 'Ethnic & Cultural', + ] + expect(getArrayOfScalarsFragments(tags, source, fragments)).toEqual({ + 0: 'Ethnic & Cultural', + 2: 'Alternative Medicine', + }) + }) + + it('should index and merge arrays of objects fragments', () => { + let source = [ + { cover: { title: 'The Great Gatsby', author: 'F. Scott Fitzgerald' } }, + { cover: { title: 'The Grapes of Wrath', author: 'John Steinbeck' } }, + { cover: { title: 'Nineteen Eighty-Four', author: 'George Orwell' } }, + { cover: { title: 'Ulysses', author: 'James Joyce' } }, + ] + let fragments = [ + 'Nineteen Eighty-Four', + 'The Great Gatsby', + 'Nineteen Eighty-Four', + 'The Great Gatsby', + ] + expect( + getArrayOfScalarsFragments(tags, source, fragments, 'cover.title') + ).toEqual({ + 0: 'The Great Gatsby', + 2: 'Nineteen Eighty-Four', + }) + }) +}) + +describe('getArrayOfObjectsFragments()', () => { + it('should return indexed and merged fragments', () => { + let source = [ + { cover: { title: 'The Great Gatsby', author: 'F. Scott Fitzgerald' } }, + { cover: { title: 'The Grapes of Wrath', author: 'John Steinbeck' } }, + { cover: { title: 'Nineteen Eighty-Four', author: 'George Orwell' } }, + { cover: { title: 'Ulysses', author: 'James Joyce' } }, + ] + let fragments = { + 'cover.title': [ + 'Nineteen Eighty-Four', + 'The Great Gatsby', + ], + 'cover.author': ['George Orwell', 'James Joyce'], + } + expect(getArrayOfObjectsFragments(tags, source, fragments)).toEqual({ + 0: { + cover: { title: 'The Great Gatsby' }, + }, + 2: { + cover: { + title: 'Nineteen Eighty-Four', + author: 'George Orwell', + }, + }, + 3: { + cover: { author: 'James Joyce' }, + }, + }) + }) +}) + +describe('getResponseHighlight()', () => { describe('text fields', () => { it('should merge fragments', () => { let hit = { @@ -21,8 +129,7 @@ describe('transformResponseHighlight()', () => { ], }, } - transformResponseHighlight(schema, hit, tags) - expect(hit.highlight).toEqual({ + expect(getResponseHighlight(schema, hit, tags)).toEqual({ 'library.name': 'Imperial College London Abdus Salam Library', }) @@ -43,8 +150,7 @@ describe('transformResponseHighlight()', () => { ], }, } - transformResponseHighlight(schema, hit, tags) - expect(hit.highlight).toEqual({ + expect(getResponseHighlight(schema, hit, tags)).toEqual({ 'library.about': [ 'The Abdus Salam Library is', 'is the largest of', @@ -78,8 +184,7 @@ describe('transformResponseHighlight()', () => { ], }, } - transformResponseHighlight(schema, hit, tags) - expect(hit.highlight).toEqual({ + expect(getResponseHighlight(schema, hit, tags)).toEqual({ 'library.categories': { 0: 'Ethnic & Cultural', 2: 'Alternative Medicine', @@ -121,8 +226,7 @@ describe('transformResponseHighlight()', () => { ], }, } - transformResponseHighlight(schema, hit, tags) - expect(hit.highlight).toEqual({ + expect(getResponseHighlight(schema, hit, tags)).toEqual({ 'library.books': { 0: { cover: { @@ -164,9 +268,8 @@ describe('transformResponseHighlight()', () => { ], }, } - let nestedArrayIncludes = { 'library.books': ['cover.author'] } - transformResponseHighlight(schema, hit, tags, nestedArrayIncludes) - expect(hit.highlight).toEqual({ + let copySourcePaths = ['library.books.cover.author'] + expect(getResponseHighlight(schema, hit, tags, copySourcePaths)).toEqual({ 'library.books': { 0: { cover: { @@ -208,9 +311,8 @@ describe('transformResponseHighlight()', () => { ], }, } - let nestedArrayIncludes = { 'library.books': ['cover.title'] } - transformResponseHighlight(schema, hit, tags, nestedArrayIncludes) - expect(hit.highlight).toEqual({ + let copySourcePaths = ['library.books.cover.title'] + expect(getResponseHighlight(schema, hit, tags, copySourcePaths)).toEqual({ 'library.books': { 0: { cover: { diff --git a/packages/provider-elasticsearch/src/example-types/results/highlighting/search.js b/packages/provider-elasticsearch/src/example-types/results/highlighting/search.js index fe45cd6de..f33bc23c5 100644 --- a/packages/provider-elasticsearch/src/example-types/results/highlighting/search.js +++ b/packages/provider-elasticsearch/src/example-types/results/highlighting/search.js @@ -8,7 +8,7 @@ import { import { mergeHighlightsOnSource, removePathsFromSource, - transformResponseHighlight, + getResponseHighlight, } from './response.js' let tags = { @@ -19,7 +19,13 @@ let tags = { export let searchWithHighlights = (node, search, schema) => async (body) => { // Paths for fields to always include regardless of whether the user included // them. They will be removed from the response hits so there's no harm done. - let pathsToAdd = _.flatten(_.values(getArrayOfObjectsPathsMap(schema))) + let pathsToAdd = _.flatten( + F.mapIndexed( + (paths, arrayPath) => _.map((path) => `${arrayPath}.${path}`, paths), + getArrayOfObjectsPathsMap(schema) + ) + ) + let { addedPaths, ...source } = addPathsToRequestSource( schema, body._source, @@ -38,8 +44,8 @@ export let searchWithHighlights = (node, search, schema) => async (body) => { }) for (let hit of response.hits.hits) { - let nestedArrayIncludes = node.highlight?.nestedArrayIncludes - transformResponseHighlight(schema, hit, tags, nestedArrayIncludes) + let copySourcePaths = node.highlight?.copySourcePaths + hit.highlight = getResponseHighlight(schema, hit, tags, copySourcePaths) removePathsFromSource(schema, hit, addedPaths) mergeHighlightsOnSource(schema, hit) } diff --git a/packages/provider-elasticsearch/src/example-types/results/highlighting/testSchema.js b/packages/provider-elasticsearch/src/example-types/results/highlighting/testSchema.js index a1d4faec2..2eb939729 100644 --- a/packages/provider-elasticsearch/src/example-types/results/highlighting/testSchema.js +++ b/packages/provider-elasticsearch/src/example-types/results/highlighting/testSchema.js @@ -6,7 +6,7 @@ export let schema = { }, }, fields: { - fieldsGroup: { + groupField: { elasticsearch: { dataType: 'text', mapping: { @@ -35,11 +35,11 @@ export let schema = { elasticsearch: { dataType: 'text', mapping: { - copy_to: ['fieldsGroup'], + copy_to: ['groupField'], fields: { subfield: { type: 'text', - copy_to: ['fieldsGroup.subfield'], + copy_to: ['groupField.subfield'], }, }, }, diff --git a/packages/provider-elasticsearch/src/example-types/results/highlighting/util.js b/packages/provider-elasticsearch/src/example-types/results/highlighting/util.js index 6fee3e6d7..f127ed7f1 100644 --- a/packages/provider-elasticsearch/src/example-types/results/highlighting/util.js +++ b/packages/provider-elasticsearch/src/example-types/results/highlighting/util.js @@ -1,8 +1,6 @@ import _ from 'lodash/fp.js' import F from 'futil' -export let findByPrefix = (str, arr) => _.find((k) => _.startsWith(k, str), arr) - export let isLeafField = (field) => !!field?.elasticsearch?.dataType || !!field?.elasticsearch?.mapping?.type @@ -17,24 +15,43 @@ export let isArrayOfScalarsField = (field) => export let isArrayOfObjectsField = (field) => isArrayField(field) && !isLeafField(field) +export let stripParentPath = _.curry((parentPath, path) => + _.startsWith(`${parentPath}.`, path) + ? path.slice(parentPath.length + 1) + : undefined +) + +export let findByPrefixIn = _.curry((arr, str) => + _.find((k) => _.startsWith(k, str), arr) +) + /** * Object where keys are paths for fields that are arrays of objects and values * are all the paths under them. */ export let getArrayOfObjectsPathsMap = _.memoize((schema) => { - let fieldsPaths = _.keys(schema.fields) - return F.reduceIndexed( - (acc, field, arrayPath) => { - if (isArrayOfObjectsField(field)) { - acc[arrayPath] = _.filter(_.startsWith(`${arrayPath}.`), fieldsPaths) - } - return acc - }, - {}, - schema.fields - ) + let paths = _.keys(schema.fields) + return _.flow( + _.pickBy(isArrayOfObjectsField), + F.mapValuesIndexed((_field, arrayPath) => + F.compactMap(stripParentPath(arrayPath), paths) + ) + )(schema.fields) }, _.get('elasticsearch.index')) +/** + * Group nested paths under their parent array of objects path. + */ +export let getNestedPathsMap = (schema, paths) => { + let allPaths = _.keys(getArrayOfObjectsPathsMap(schema)) + return _.flow( + _.groupBy((path) => findByPrefixIn(allPaths, path) ?? path), + F.mapValuesIndexed((nested, path) => + F.compactMap(stripParentPath(path), nested) + ) + )(paths) +} + export let stripTags = _.curry((tags, str) => str.replaceAll(tags.pre, '').replaceAll(tags.post, '') ) @@ -89,7 +106,7 @@ let highlightFromRanges = (pre, post, ranges, str) => { : highlighted } -export let mergeHighlights = (tags, ...strs) => { +export let mergeHighlights = _.curry((tags, strs) => { // This may look unnecessary but merging highlights is not cheap and many // times is not even needed if (strs.length <= 1) return _.head(strs) @@ -100,4 +117,4 @@ export let mergeHighlights = (tags, ...strs) => { ranges, stripTags(tags, _.head(strs)) ) -} +}) diff --git a/packages/provider-elasticsearch/src/example-types/results/highlighting/util.test.js b/packages/provider-elasticsearch/src/example-types/results/highlighting/util.test.js index ee80dedd6..428008721 100644 --- a/packages/provider-elasticsearch/src/example-types/results/highlighting/util.test.js +++ b/packages/provider-elasticsearch/src/example-types/results/highlighting/util.test.js @@ -1,47 +1,44 @@ +import { schema } from './testSchema.js' import { mergeHighlights } from './util.js' let tags = { pre: '', post: '' } describe('mergeHighlights()', () => { it('should merge highlights that do not overlap', () => { - let actual = mergeHighlights( - tags, + let actual = mergeHighlights(tags, [ 'The quick brown fox jumps over the lazy dog', - 'The quick brown fox jumps over the lazy dog' - ) + 'The quick brown fox jumps over the lazy dog', + ]) expect(actual).toEqual( 'The quick brown fox jumps over the lazy dog' ) }) it('should merge highlights that overlap', () => { - let actual = mergeHighlights( - tags, + let actual = mergeHighlights(tags, [ 'The quick brown fox jumps over the lazy dog', - 'The quick brown fox jumps over the lazy dog' - ) + 'The quick brown fox jumps over the lazy dog', + ]) expect(actual).toEqual( 'The quick brown fox jumps over the lazy dog' ) }) it('should merge highlights that are contained within another', () => { - let actual = mergeHighlights( - tags, + let actual = mergeHighlights(tags, [ 'The quick brown fox jumps over the lazy dog', - 'The quick brown fox jumps over the lazy dog' - ) + 'The quick brown fox jumps over the lazy dog', + ]) expect(actual).toEqual( 'The quick brown fox jumps over the lazy dog' ) }) it('should merge highlights at the end of the string', () => { - let actual = mergeHighlights( - tags, + let actual = mergeHighlights(tags, [ 'The quick brown fox jumps over the lazy dog', - 'The quick brown fox jumps over the lazy dog' - ) + 'The quick brown fox jumps over the lazy dog', + ]) expect(actual).toEqual( 'The quick brown fox jumps over the lazy dog' ) diff --git a/packages/provider-elasticsearch/src/utils/futil.js b/packages/provider-elasticsearch/src/utils/futil.js index 9e6fe63f7..d8c932431 100644 --- a/packages/provider-elasticsearch/src/utils/futil.js +++ b/packages/provider-elasticsearch/src/utils/futil.js @@ -1,7 +1,7 @@ import _ from 'lodash/fp.js' import F from 'futil' -export const isArraysEqual = _.flow(_.xor, _.isEmpty) +export const areArraysEqual = _.flow(_.xor, _.isEmpty) export let unsetOnTree = _.curry((prop, tree) => _.compact(F.treeToArrayBy()(F.popProperty(prop), tree)) From c00d5f9b62fb0b41eb33fce344b5ee2183ae04f6 Mon Sep 17 00:00:00 2001 From: Alejandro Hernandez Date: Tue, 23 Jan 2024 12:37:51 -0500 Subject: [PATCH 26/30] Fix tests --- .../src/schema-data/schema-with-types.js | 340 ++++++++++-------- .../src/schema-data/schema-without-types.js | 48 +-- 2 files changed, 224 insertions(+), 164 deletions(-) diff --git a/packages/provider-elasticsearch/src/schema-data/schema-with-types.js b/packages/provider-elasticsearch/src/schema-data/schema-with-types.js index f8b8186b2..1773b02ba 100644 --- a/packages/provider-elasticsearch/src/schema-data/schema-with-types.js +++ b/packages/provider-elasticsearch/src/schema-data/schema-with-types.js @@ -11,11 +11,13 @@ export default { elasticsearch: { dataType: 'text', notAnalyzedField: 'keyword', - type: 'text', - fields: { - keyword: { - type: 'keyword', - ignore_above: 256, + mapping: { + type: 'text', + fields: { + keyword: { + type: 'keyword', + ignore_above: 256, + }, }, }, }, @@ -28,11 +30,13 @@ export default { elasticsearch: { dataType: 'text', notAnalyzedField: 'keyword', - type: 'text', - fields: { - keyword: { - type: 'keyword', - ignore_above: 256, + mapping: { + type: 'text', + fields: { + keyword: { + type: 'keyword', + ignore_above: 256, + }, }, }, }, @@ -45,11 +49,13 @@ export default { elasticsearch: { dataType: 'text', notAnalyzedField: 'keyword', - type: 'text', - fields: { - keyword: { - type: 'keyword', - ignore_above: 256, + mapping: { + type: 'text', + fields: { + keyword: { + type: 'keyword', + ignore_above: 256, + }, }, }, }, @@ -62,11 +68,13 @@ export default { elasticsearch: { dataType: 'text', notAnalyzedField: 'keyword', - type: 'text', - fields: { - keyword: { - type: 'keyword', - ignore_above: 256, + mapping: { + type: 'text', + fields: { + keyword: { + type: 'keyword', + ignore_above: 256, + }, }, }, }, @@ -79,11 +87,13 @@ export default { elasticsearch: { dataType: 'text', notAnalyzedField: 'keyword', - type: 'text', - fields: { - keyword: { - type: 'keyword', - ignore_above: 256, + mapping: { + type: 'text', + fields: { + keyword: { + type: 'keyword', + ignore_above: 256, + }, }, }, }, @@ -96,11 +106,13 @@ export default { elasticsearch: { dataType: 'text', notAnalyzedField: 'keyword', - type: 'text', - fields: { - keyword: { - type: 'keyword', - ignore_above: 256, + mapping: { + type: 'text', + fields: { + keyword: { + type: 'keyword', + ignore_above: 256, + }, }, }, }, @@ -112,7 +124,7 @@ export default { typeOptions: ['number', 'exists'], elasticsearch: { dataType: 'float', - type: 'float', + mapping: { type: 'float' }, }, field: 'imdbRating', label: 'Imdb Rating', @@ -122,7 +134,7 @@ export default { typeOptions: ['number', 'exists'], elasticsearch: { dataType: 'long', - type: 'long', + mapping: { type: 'long' }, }, field: 'imdbVotes', label: 'Imdb Votes', @@ -133,11 +145,13 @@ export default { elasticsearch: { dataType: 'text', notAnalyzedField: 'keyword', - type: 'text', - fields: { - keyword: { - type: 'keyword', - ignore_above: 256, + mapping: { + type: 'text', + fields: { + keyword: { + type: 'keyword', + ignore_above: 256, + }, }, }, }, @@ -149,7 +163,7 @@ export default { typeOptions: ['number', 'exists'], elasticsearch: { dataType: 'long', - type: 'long', + mapping: { type: 'long' }, }, field: 'metaScore', label: 'Meta Score', @@ -160,11 +174,13 @@ export default { elasticsearch: { dataType: 'text', notAnalyzedField: 'keyword', - type: 'text', - fields: { - keyword: { - type: 'keyword', - ignore_above: 256, + mapping: { + type: 'text', + fields: { + keyword: { + type: 'keyword', + ignore_above: 256, + }, }, }, }, @@ -177,11 +193,13 @@ export default { elasticsearch: { dataType: 'text', notAnalyzedField: 'keyword', - type: 'text', - fields: { - keyword: { - type: 'keyword', - ignore_above: 256, + mapping: { + type: 'text', + fields: { + keyword: { + type: 'keyword', + ignore_above: 256, + }, }, }, }, @@ -194,11 +212,13 @@ export default { elasticsearch: { dataType: 'text', notAnalyzedField: 'keyword', - type: 'text', - fields: { - keyword: { - type: 'keyword', - ignore_above: 256, + mapping: { + type: 'text', + fields: { + keyword: { + type: 'keyword', + ignore_above: 256, + }, }, }, }, @@ -210,7 +230,7 @@ export default { typeOptions: ['date', 'exists'], elasticsearch: { dataType: 'date', - type: 'date', + mapping: { type: 'date' }, }, field: 'released', label: 'Released', @@ -220,7 +240,7 @@ export default { typeOptions: ['number', 'exists'], elasticsearch: { dataType: 'long', - type: 'long', + mapping: { type: 'long' }, }, field: 'runtimeMinutes', label: 'Runtime Minutes', @@ -231,11 +251,13 @@ export default { elasticsearch: { dataType: 'text', notAnalyzedField: 'keyword', - type: 'text', - fields: { - keyword: { - type: 'keyword', - ignore_above: 256, + mapping: { + type: 'text', + fields: { + keyword: { + type: 'keyword', + ignore_above: 256, + }, }, }, }, @@ -248,11 +270,13 @@ export default { elasticsearch: { dataType: 'text', notAnalyzedField: 'keyword', - type: 'text', - fields: { - keyword: { - type: 'keyword', - ignore_above: 256, + mapping: { + type: 'text', + fields: { + keyword: { + type: 'keyword', + ignore_above: 256, + }, }, }, }, @@ -265,11 +289,13 @@ export default { elasticsearch: { dataType: 'text', notAnalyzedField: 'keyword', - type: 'text', - fields: { - keyword: { - type: 'keyword', - ignore_above: 256, + mapping: { + type: 'text', + fields: { + keyword: { + type: 'keyword', + ignore_above: 256, + }, }, }, }, @@ -281,7 +307,7 @@ export default { typeOptions: ['number', 'exists'], elasticsearch: { dataType: 'long', - type: 'long', + mapping: { type: 'long' }, }, field: 'year', label: 'Year', @@ -291,7 +317,7 @@ export default { typeOptions: ['number', 'exists'], elasticsearch: { dataType: 'long', - type: 'long', + mapping: { type: 'long' }, }, field: 'yearEnded', label: 'Year Ended', @@ -311,11 +337,13 @@ export default { elasticsearch: { dataType: 'text', notAnalyzedField: 'keyword', - type: 'text', - fields: { - keyword: { - type: 'keyword', - ignore_above: 256, + mapping: { + type: 'text', + fields: { + keyword: { + type: 'keyword', + ignore_above: 256, + }, }, }, }, @@ -328,11 +356,13 @@ export default { elasticsearch: { dataType: 'text', notAnalyzedField: 'keyword', - type: 'text', - fields: { - keyword: { - type: 'keyword', - ignore_above: 256, + mapping: { + type: 'text', + fields: { + keyword: { + type: 'keyword', + ignore_above: 256, + }, }, }, }, @@ -345,11 +375,13 @@ export default { elasticsearch: { dataType: 'text', notAnalyzedField: 'keyword', - type: 'text', - fields: { - keyword: { - type: 'keyword', - ignore_above: 256, + mapping: { + type: 'text', + fields: { + keyword: { + type: 'keyword', + ignore_above: 256, + }, }, }, }, @@ -362,11 +394,13 @@ export default { elasticsearch: { dataType: 'text', notAnalyzedField: 'keyword', - type: 'text', - fields: { - keyword: { - type: 'keyword', - ignore_above: 256, + mapping: { + type: 'text', + fields: { + keyword: { + type: 'keyword', + ignore_above: 256, + }, }, }, }, @@ -379,11 +413,13 @@ export default { elasticsearch: { dataType: 'text', notAnalyzedField: 'keyword', - type: 'text', - fields: { - keyword: { - type: 'keyword', - ignore_above: 256, + mapping: { + type: 'text', + fields: { + keyword: { + type: 'keyword', + ignore_above: 256, + }, }, }, }, @@ -396,11 +432,13 @@ export default { elasticsearch: { dataType: 'text', notAnalyzedField: 'keyword', - type: 'text', - fields: { - keyword: { - type: 'keyword', - ignore_above: 256, + mapping: { + type: 'text', + fields: { + keyword: { + type: 'keyword', + ignore_above: 256, + }, }, }, }, @@ -412,7 +450,7 @@ export default { typeOptions: ['number', 'exists'], elasticsearch: { dataType: 'float', - type: 'float', + mapping: { type: 'float' }, }, field: 'imdbRating', label: 'Imdb Rating', @@ -422,7 +460,7 @@ export default { typeOptions: ['number', 'exists'], elasticsearch: { dataType: 'long', - type: 'long', + mapping: { type: 'long' }, }, field: 'imdbVotes', label: 'Imdb Votes', @@ -433,11 +471,13 @@ export default { elasticsearch: { dataType: 'text', notAnalyzedField: 'keyword', - type: 'text', - fields: { - keyword: { - type: 'keyword', - ignore_above: 256, + mapping: { + type: 'text', + fields: { + keyword: { + type: 'keyword', + ignore_above: 256, + }, }, }, }, @@ -449,7 +489,7 @@ export default { typeOptions: ['number', 'exists'], elasticsearch: { dataType: 'long', - type: 'long', + mapping: { type: 'long' }, }, field: 'metaScore', label: 'Meta Score', @@ -460,11 +500,13 @@ export default { elasticsearch: { dataType: 'text', notAnalyzedField: 'keyword', - type: 'text', - fields: { - keyword: { - type: 'keyword', - ignore_above: 256, + mapping: { + type: 'text', + fields: { + keyword: { + type: 'keyword', + ignore_above: 256, + }, }, }, }, @@ -477,11 +519,13 @@ export default { elasticsearch: { dataType: 'text', notAnalyzedField: 'keyword', - type: 'text', - fields: { - keyword: { - type: 'keyword', - ignore_above: 256, + mapping: { + type: 'text', + fields: { + keyword: { + type: 'keyword', + ignore_above: 256, + }, }, }, }, @@ -494,11 +538,13 @@ export default { elasticsearch: { dataType: 'text', notAnalyzedField: 'keyword', - type: 'text', - fields: { - keyword: { - type: 'keyword', - ignore_above: 256, + mapping: { + type: 'text', + fields: { + keyword: { + type: 'keyword', + ignore_above: 256, + }, }, }, }, @@ -510,7 +556,7 @@ export default { typeOptions: ['date', 'exists'], elasticsearch: { dataType: 'date', - type: 'date', + mapping: { type: 'date' }, }, field: 'released', label: 'Released', @@ -520,7 +566,7 @@ export default { typeOptions: ['number', 'exists'], elasticsearch: { dataType: 'long', - type: 'long', + mapping: { type: 'long' }, }, field: 'runtimeMinutes', label: 'Runtime Minutes', @@ -531,11 +577,13 @@ export default { elasticsearch: { dataType: 'text', notAnalyzedField: 'keyword', - type: 'text', - fields: { - keyword: { - type: 'keyword', - ignore_above: 256, + mapping: { + type: 'text', + fields: { + keyword: { + type: 'keyword', + ignore_above: 256, + }, }, }, }, @@ -548,11 +596,13 @@ export default { elasticsearch: { dataType: 'text', notAnalyzedField: 'keyword', - type: 'text', - fields: { - keyword: { - type: 'keyword', - ignore_above: 256, + mapping: { + type: 'text', + fields: { + keyword: { + type: 'keyword', + ignore_above: 256, + }, }, }, }, @@ -565,11 +615,13 @@ export default { elasticsearch: { dataType: 'text', notAnalyzedField: 'keyword', - type: 'text', - fields: { - keyword: { - type: 'keyword', - ignore_above: 256, + mapping: { + type: 'text', + fields: { + keyword: { + type: 'keyword', + ignore_above: 256, + }, }, }, }, @@ -581,7 +633,7 @@ export default { typeOptions: ['number', 'exists'], elasticsearch: { dataType: 'long', - type: 'long', + mapping: { type: 'long' }, }, field: 'year', label: 'Year', @@ -591,7 +643,7 @@ export default { typeOptions: ['number', 'exists'], elasticsearch: { dataType: 'long', - type: 'long', + mapping: { type: 'long' }, }, field: 'yearEnded', label: 'Year Ended', diff --git a/packages/provider-elasticsearch/src/schema-data/schema-without-types.js b/packages/provider-elasticsearch/src/schema-data/schema-without-types.js index a2005067d..d39c2fe6c 100644 --- a/packages/provider-elasticsearch/src/schema-data/schema-without-types.js +++ b/packages/provider-elasticsearch/src/schema-data/schema-without-types.js @@ -10,11 +10,13 @@ export default { elasticsearch: { dataType: 'text', notAnalyzedField: 'keyword', - type: 'text', - fields: { - keyword: { - type: 'keyword', - ignore_above: 256, + mapping: { + type: 'text', + fields: { + keyword: { + type: 'keyword', + ignore_above: 256, + }, }, }, }, @@ -27,11 +29,13 @@ export default { elasticsearch: { dataType: 'text', notAnalyzedField: 'keyword', - type: 'text', - fields: { - keyword: { - type: 'keyword', - ignore_above: 256, + mapping: { + type: 'text', + fields: { + keyword: { + type: 'keyword', + ignore_above: 256, + }, }, }, }, @@ -53,11 +57,13 @@ export default { elasticsearch: { dataType: 'text', notAnalyzedField: 'keyword', - type: 'text', - fields: { - keyword: { - type: 'keyword', - ignore_above: 256, + mapping: { + type: 'text', + fields: { + keyword: { + type: 'keyword', + ignore_above: 256, + }, }, }, }, @@ -70,11 +76,13 @@ export default { elasticsearch: { dataType: 'text', notAnalyzedField: 'keyword', - type: 'text', - fields: { - keyword: { - type: 'keyword', - ignore_above: 256, + mapping: { + type: 'text', + fields: { + keyword: { + type: 'keyword', + ignore_above: 256, + }, }, }, }, From 40b41c0f4eb249ff31dfca316a5bbc967c310f89 Mon Sep 17 00:00:00 2001 From: Alejandro Hernandez Date: Tue, 23 Jan 2024 13:04:57 -0500 Subject: [PATCH 27/30] Fix snapshots --- .../nodes/__snapshots__/results.test.js.snap | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/packages/export/src/nodes/__snapshots__/results.test.js.snap b/packages/export/src/nodes/__snapshots__/results.test.js.snap index 7dddbc696..4e5bcbe11 100644 --- a/packages/export/src/nodes/__snapshots__/results.test.js.snap +++ b/packages/export/src/nodes/__snapshots__/results.test.js.snap @@ -45,6 +45,9 @@ exports[`results with contexts not wrapped in \`response\` retrieves records 1` "totalRecords": 3, }, }, + "highlight": { + "disable": true, + }, "include": [ "a", "b", @@ -107,6 +110,9 @@ exports[`results with contexts not wrapped in \`response\` retrieves records 1` "totalRecords": 3, }, }, + "highlight": { + "disable": true, + }, "include": [ "a", "b", @@ -170,6 +176,9 @@ exports[`results with contexts not wrapped in \`response\` retrieves records 1` "totalRecords": 3, }, }, + "highlight": { + "disable": true, + }, "include": [ "a", "b", @@ -236,6 +245,9 @@ exports[`results with contexts not wrapped in \`response\` retrieves records 1` "totalRecords": 3, }, }, + "highlight": { + "disable": true, + }, "include": [ "a", "b", @@ -299,6 +311,9 @@ exports[`results with contexts not wrapped in \`response\` retrieves records 1` "totalRecords": 3, }, }, + "highlight": { + "disable": true, + }, "include": [ "a", "b", @@ -363,6 +378,9 @@ exports[`results with contexts not wrapped in \`response\` retrieves records 1` "totalRecords": 3, }, }, + "highlight": { + "disable": true, + }, "include": [ "a", "b", @@ -431,6 +449,9 @@ exports[`results with contexts wrapped in \`response\` retrieves records 1`] = ], "totalRecords": 3, }, + "highlight": { + "disable": true, + }, "include": [ "a", "b", @@ -491,6 +512,9 @@ exports[`results with contexts wrapped in \`response\` retrieves records 1`] = ], "totalRecords": 3, }, + "highlight": { + "disable": true, + }, "include": [ "a", "b", @@ -552,6 +576,9 @@ exports[`results with contexts wrapped in \`response\` retrieves records 1`] = ], "totalRecords": 3, }, + "highlight": { + "disable": true, + }, "include": [ "a", "b", @@ -616,6 +643,9 @@ exports[`results with contexts wrapped in \`response\` retrieves records 1`] = ], "totalRecords": 3, }, + "highlight": { + "disable": true, + }, "include": [ "a", "b", @@ -677,6 +707,9 @@ exports[`results with contexts wrapped in \`response\` retrieves records 1`] = ], "totalRecords": 3, }, + "highlight": { + "disable": true, + }, "include": [ "a", "b", @@ -739,6 +772,9 @@ exports[`results with contexts wrapped in \`response\` retrieves records 1`] = ], "totalRecords": 3, }, + "highlight": { + "disable": true, + }, "include": [ "a", "b", From 22182ccaba01842cfc39a175dc6e96161a7d9bb1 Mon Sep 17 00:00:00 2001 From: Alejandro Hernandez Date: Tue, 23 Jan 2024 13:15:42 -0500 Subject: [PATCH 28/30] Reword nestedArrayIncludes to copySourcePaths --- .../src/example-types/results/README.md | 4 ++-- .../src/example-types/results/type.d.ts | 17 ++++++++--------- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/packages/provider-elasticsearch/src/example-types/results/README.md b/packages/provider-elasticsearch/src/example-types/results/README.md index d9914ce41..79b0b4ce5 100644 --- a/packages/provider-elasticsearch/src/example-types/results/README.md +++ b/packages/provider-elasticsearch/src/example-types/results/README.md @@ -297,10 +297,10 @@ assert.deepEqual(actual, expected)
-`nestedArrayIncludes` are handled when ordering the array of objects. Assumming the example above and +The paths specified in `copySourcePaths` are handled when ordering the array of objects. Assumming the example above and ```javascript -let nestedArrayIncludes = { friends: ['age'] } +let copySourcePaths = ['friends.age'] ``` the highlighted results become diff --git a/packages/provider-elasticsearch/src/example-types/results/type.d.ts b/packages/provider-elasticsearch/src/example-types/results/type.d.ts index a2a936c01..1684aa108 100644 --- a/packages/provider-elasticsearch/src/example-types/results/type.d.ts +++ b/packages/provider-elasticsearch/src/example-types/results/type.d.ts @@ -11,17 +11,16 @@ interface HighlightConfig { */ disable?: boolean /** - * Nested paths in arrays of objects that should be copied from source into - * highlighted results. + * Paths that should be copied from source into the highlighted results. * - * For example `{ "library.books": ["cover.author"] }` will make it so - * `cover.author` is copied over from the source array to the highlighted - * results for the `library.books` array. The motivation being that sometimes - * arrays are large and it's expensive to include the whole thing in the - * hits source but some of the array items fields are needed to correctly - * display the array. + * In the case of arrays of objects, nested paths get copied to every + * highlighted item in the array. For example, assumming `library.books` to + * be an array of objects and `cover.author` a nested path inside it, + * setting `copySourcePaths` to `["library.books.cover.author"]` will copy + * `cover.author` from the source array to every item in the highlighted + * results for `library.books`. */ - nestedArrayIncludes?: Record> + copySourcePaths?: Record> } interface Node { From adb5ab2cd13cbdf51cdb2c5962a30e9ec937d1d2 Mon Sep 17 00:00:00 2001 From: Alejandro Hernandez Date: Tue, 23 Jan 2024 14:12:06 -0500 Subject: [PATCH 29/30] Fix lint error --- .../results/highlighting/util.test.js | 1 - .../ResultTable/HighlightedColumn.js | 57 ------------------- 2 files changed, 58 deletions(-) delete mode 100644 packages/react/src/exampleTypes/ResultTable/HighlightedColumn.js diff --git a/packages/provider-elasticsearch/src/example-types/results/highlighting/util.test.js b/packages/provider-elasticsearch/src/example-types/results/highlighting/util.test.js index 428008721..dbad8ba0f 100644 --- a/packages/provider-elasticsearch/src/example-types/results/highlighting/util.test.js +++ b/packages/provider-elasticsearch/src/example-types/results/highlighting/util.test.js @@ -1,4 +1,3 @@ -import { schema } from './testSchema.js' import { mergeHighlights } from './util.js' let tags = { pre: '', post: '' } diff --git a/packages/react/src/exampleTypes/ResultTable/HighlightedColumn.js b/packages/react/src/exampleTypes/ResultTable/HighlightedColumn.js deleted file mode 100644 index d317e985f..000000000 --- a/packages/react/src/exampleTypes/ResultTable/HighlightedColumn.js +++ /dev/null @@ -1,57 +0,0 @@ -import React from 'react' -import _ from 'lodash/fp.js' -import F from 'futil' -import { flattenObjectsNotArrays } from '../../utils/futil.js' -import { withTheme } from '../../utils/theme.js' - -let labelForField = (schema, field) => - _.getOr(field, 'label', _.find({ field }, schema)) - -// Get fields that were highlighted but not included -export let getAdditionalHighlightedFields = ({ schema, record, node }) => { - let fieldNames = _.difference( - _.keys(flattenObjectsNotArrays(record._highlight)), - [...node.include] - ) - return _.pick(fieldNames, schema.fields) -} - -let HighlightedColumn = ({ schema, node, record, theme: { Modal, Table } }) => { - let viewModal = React.useState(false) - let additionalFields = getAdditionalHighlightedFields({ - schema, - record, - node, - }) - return _.isEmpty(additionalFields) ? null : ( - <> - -

Other Matching Fields

- - - {_.map( - ({ label, value }) => ( - - - - ), - additionalFields - )} - -
{labelForField(schema, label)} -
-
- - - ) -} - -export default withTheme(HighlightedColumn) From e004eda2f80ef23e39ab822ccb8650139100bf7d Mon Sep 17 00:00:00 2001 From: Alejandro Hernandez Date: Tue, 23 Jan 2024 14:45:21 -0500 Subject: [PATCH 30/30] Add changeset and reset package versions --- .changeset/green-windows-melt.md | 6 ++++++ packages/provider-elasticsearch/package.json | 2 +- packages/react/package.json | 3 ++- yarn.lock | 18 +----------------- 4 files changed, 10 insertions(+), 19 deletions(-) create mode 100644 .changeset/green-windows-melt.md diff --git a/.changeset/green-windows-melt.md b/.changeset/green-windows-melt.md new file mode 100644 index 000000000..d5f568410 --- /dev/null +++ b/.changeset/green-windows-melt.md @@ -0,0 +1,6 @@ +--- +'contexture-elasticsearch': minor +'contexture-react': minor +--- + +Revamp elasticsearch highlighting API and implementation diff --git a/packages/provider-elasticsearch/package.json b/packages/provider-elasticsearch/package.json index 2506ffc8d..1fc00f417 100644 --- a/packages/provider-elasticsearch/package.json +++ b/packages/provider-elasticsearch/package.json @@ -1,6 +1,6 @@ { "name": "contexture-elasticsearch", - "version": "1.26.0-alpha.3", + "version": "1.25.6", "description": "ElasticSearch Provider for Contexture", "type": "module", "exports": { diff --git a/packages/react/package.json b/packages/react/package.json index 8405f15ec..7df6dbc28 100644 --- a/packages/react/package.json +++ b/packages/react/package.json @@ -1,6 +1,6 @@ { "name": "contexture-react", - "version": "2.59.0-alpha.2", + "version": "2.58.12", "description": "React components for building contexture interfaces", "type": "module", "exports": { @@ -68,6 +68,7 @@ "mobx": "^4.3.1", "mobx-react": "^6.3.0", "mobx-utils": "^5.0.0", + "moment": "^2.24.0", "react": "^16.8.0", "react-dom": "^16.8.0", "react-select": "^2.0.0", diff --git a/yarn.lock b/yarn.lock index 19a5d4628..d397df7d2 100644 --- a/yarn.lock +++ b/yarn.lock @@ -8820,23 +8820,7 @@ __metadata: languageName: unknown linkType: soft -"contexture-elasticsearch@npm:^1.22.3": - version: 1.25.5 - resolution: "contexture-elasticsearch@npm:1.25.5" - dependencies: - "@elastic/datemath": ^2.3.0 - debug: ^4.3.1 - futil: ^1.76.0 - js-combinatorics: ^2.1.1 - lodash: ^4.17.4 - moment: ^2.18.1 - moment-timezone: ^0.5.28 - unidecode: ^0.1.8 - checksum: 9f599cafe0cdb21fcff255fb43b32735852ec1a2ef486c0dc205814e17708756ae202536d67bb3cf82d39a86a244592a7be92c5634fbf82129b333b1c0403703 - languageName: node - linkType: hard - -"contexture-elasticsearch@workspace:packages/provider-elasticsearch": +"contexture-elasticsearch@^1.22.3, contexture-elasticsearch@workspace:packages/provider-elasticsearch": version: 0.0.0-use.local resolution: "contexture-elasticsearch@workspace:packages/provider-elasticsearch" dependencies: