From 24d3306a7b6cd650b020ac1080e46cff04daf23a Mon Sep 17 00:00:00 2001 From: Michael Kirk Date: Fri, 19 Apr 2024 04:13:42 -0700 Subject: [PATCH] feat(query) remove `cutoff_frequency` (#1657) * Remove cutoff_frequency, deprecated in es7.3.0, forbidden in es8 From https://www.elastic.co/guide/en/elasticsearch/reference/8.8/migrating-8.0.html The cutoff_frequency parameter has been removed from the match and multi_match query. Details The cutoff_frequency parameter, deprecated in 7.x, has been removed in 8.0 from match and multi_match queries. The same functionality can be achieved without any configuration provided that the total number of hits is not tracked. Impact Discontinue use of the cutoff_frequency parameter. Search requests containing this parameter in a match or multi_match query will return an error. Note in the above "...provided that the total number of hits is not tracked". `track_total_hits` does not appear in the pelias codebases, so we shouldn't have any issues there. * update to pelias-query w/o cutoff_frequency * fixup! Remove cutoff_frequency, deprecated in es7.3.0, forbidden in es8 --- package.json | 2 +- query/autocomplete_defaults.js | 23 ------------------- query/search_defaults.js | 15 ------------ .../search_pelias_parser_full_address.js | 3 --- .../search_pelias_parser_regions_address.js | 2 -- 5 files changed, 1 insertion(+), 44 deletions(-) diff --git a/package.json b/package.json index 27c606453..d966c6ae1 100644 --- a/package.json +++ b/package.json @@ -55,7 +55,7 @@ "pelias-microservice-wrapper": "^1.10.0", "pelias-model": "^9.0.0", "pelias-parser": "2.2.0", - "pelias-query": "^11.0.0", + "pelias-query": "^11.2.0", "pelias-sorting": "^1.7.0", "predicates": "^2.0.0", "regenerate": "^1.4.0", diff --git a/query/autocomplete_defaults.js b/query/autocomplete_defaults.js index ad206a6ce..bcf417986 100644 --- a/query/autocomplete_defaults.js +++ b/query/autocomplete_defaults.js @@ -20,13 +20,11 @@ module.exports = _.merge({}, peliasQuery.defaults, { 'ngram:analyzer': 'peliasQuery', 'ngram:field': 'name.default', 'ngram:boost': 100, - 'ngram:cutoff_frequency': 0.01, 'phrase:analyzer': 'peliasQuery', 'phrase:field': 'phrase.default', 'phrase:boost': 1, 'phrase:slop': 3, - 'phrase:cutoff_frequency': 0.01, 'focus:function': 'exp', 'focus:offset': '0km', @@ -40,22 +38,18 @@ module.exports = _.merge({}, peliasQuery.defaults, { 'address:housenumber:analyzer': 'peliasHousenumber', 'address:housenumber:field': 'address_parts.number', 'address:housenumber:boost': 2, - 'address:housenumber:cutoff_frequency': 0.01, 'address:street:analyzer': 'peliasQuery', 'address:street:field': 'address_parts.street', 'address:street:boost': 1, - 'address:street:cutoff_frequency': 0.01, 'address:cross_street:analyzer': 'peliasQuery', 'address:cross_street:field': 'address_parts.cross_street', 'address:cross_street:boost': 5, - 'address:cross_street:cutoff_frequency': 0.01, 'address:postcode:analyzer': 'peliasZip', 'address:postcode:field': 'address_parts.zip', 'address:postcode:boost': 2000, - 'address:postcode:cutoff_frequency': 0.01, // generic multi_match config 'multi_match:type': 'cross_fields', @@ -63,14 +57,9 @@ module.exports = _.merge({}, peliasQuery.defaults, { 'multi_match:first_tokens_only:type': 'phrase', 'multi_match:boost_exact_matches:type': 'phrase', - // setting 'cutoff_frequency' will result in very common - // terms such as country not scoring at all - // 'multi_match:cutoff_frequency': 0.01, - 'admin:country_a:analyzer': 'standard', 'admin:country_a:field': 'parent.country_a.ngram', 'admin:country_a:boost': 1, - 'admin:country_a:cutoff_frequency': 0.01, // these options affect the `boundary.country` hard filter 'multi_match:boundary_country:analyzer': 'standard', @@ -79,62 +68,50 @@ module.exports = _.merge({}, peliasQuery.defaults, { 'admin:country:analyzer': 'peliasAdmin', 'admin:country:field': 'parent.country.ngram', 'admin:country:boost': 1, - 'admin:country:cutoff_frequency': 0.01, 'admin:dependency:analyzer': 'peliasAdmin', 'admin:dependency:field': 'parent.dependency.ngram', 'admin:dependency:boost': 1, - 'admin:dependency:cutoff_frequency': 0.01, 'admin:region:analyzer': 'peliasAdmin', 'admin:region:field': 'parent.region.ngram', 'admin:region:boost': 1, - 'admin:region:cutoff_frequency': 0.01, 'admin:region_a:analyzer': 'peliasAdmin', 'admin:region_a:field': 'parent.region_a.ngram', 'admin:region_a:boost': 1, - 'admin:region_a:cutoff_frequency': 0.01, 'admin:macroregion:analyzer': 'peliasAdmin', 'admin:macroregion:field': 'parent.macroregion.ngram', 'admin:macroregion:boost': 1, - 'admin:macroregion:cutoff_frequency': 0.01, 'admin:county:analyzer': 'peliasAdmin', 'admin:county:field': 'parent.county.ngram', 'admin:county:boost': 1, - 'admin:county:cutoff_frequency': 0.01, 'admin:macrocounty:analyzer': 'peliasAdmin', 'admin:macrocounty:field': 'parent.macrocounty.ngram', 'admin:macrocounty:boost': 1, - 'admin:macrocounty:cutoff_frequency': 0.01, 'admin:localadmin:analyzer': 'peliasAdmin', 'admin:localadmin:field': 'parent.localadmin.ngram', 'admin:localadmin:boost': 1, - 'admin:localadmin:cutoff_frequency': 0.01, 'admin:locality:analyzer': 'peliasAdmin', 'admin:locality:field': 'parent.locality.ngram', 'admin:locality:boost': 1, - 'admin:locality:cutoff_frequency': 0.01, 'admin:locality_a:analyzer': 'peliasAdmin', 'admin:locality_a:field': 'parent.locality_a.ngram', 'admin:locality_a:boost': 1, - 'admin:locality_a:cutoff_frequency': 0.01, 'admin:neighbourhood:analyzer': 'peliasAdmin', 'admin:neighbourhood:field': 'parent.neighbourhood.ngram', 'admin:neighbourhood:boost': 1, - 'admin:neighbourhood:cutoff_frequency': 0.01, 'admin:borough:analyzer': 'peliasAdmin', 'admin:borough:field': 'parent.borough.ngram', 'admin:borough:boost': 1, - 'admin:borough:cutoff_frequency': 0.01, // an additional 'name' field to add to admin multi-match queries. // this is used to improve venue matching in cases where the we diff --git a/query/search_defaults.js b/query/search_defaults.js index 59955d89b..11d31ed38 100644 --- a/query/search_defaults.js +++ b/query/search_defaults.js @@ -19,7 +19,6 @@ module.exports = _.merge({}, peliasQuery.defaults, { 'ngram:analyzer': 'peliasQuery', 'ngram:field': 'name.default', 'ngram:boost': 1, - 'ngram:cutoff_frequency': 0.01, 'ngram:minimum_should_match': '1<-1 3<-25%', 'match:main:analyzer': 'peliasQuery', @@ -43,28 +42,22 @@ module.exports = _.merge({}, peliasQuery.defaults, { 'address:housenumber:analyzer': 'peliasHousenumber', 'address:housenumber:field': 'address_parts.number', 'address:housenumber:boost': 2, - 'address:housenumber:cutoff_frequency': 0.01, 'address:street:analyzer': 'peliasQuery', 'address:street:field': 'address_parts.street', 'address:street:boost': 5, 'address:street:slop': 4, - 'address:street:cutoff_frequency': 0.01, 'address:postcode:analyzer': 'peliasZip', 'address:postcode:field': 'address_parts.zip', 'address:postcode:boost': 20, - 'address:postcode:cutoff_frequency': 0.01, // multi match query views require 'type' to be specified 'multi_match:type': 'best_fields', - // generic multi_match cutoff_frequency - 'multi_match:cutoff_frequency': 0.01, 'admin:country_a:analyzer': 'standard', 'admin:country_a:field': 'parent.country_a', 'admin:country_a:boost': 1, - 'admin:country_a:cutoff_frequency': 0.01, // these config variables are used for the 'boundary.country' hard filter 'multi_match:boundary_country:analyzer': 'standard', @@ -73,42 +66,34 @@ module.exports = _.merge({}, peliasQuery.defaults, { 'admin:country:analyzer': 'peliasAdmin', 'admin:country:field': 'parent.country', 'admin:country:boost': 1, - 'admin:country:cutoff_frequency': 0.01, 'admin:region:analyzer': 'peliasAdmin', 'admin:region:field': 'parent.region', 'admin:region:boost': 1, - 'admin:region:cutoff_frequency': 0.01, 'admin:region_a:analyzer': 'peliasAdmin', 'admin:region_a:field': 'parent.region_a', 'admin:region_a:boost': 1, - 'admin:region_a:cutoff_frequency': 0.01, 'admin:county:analyzer': 'peliasAdmin', 'admin:county:field': 'parent.county', 'admin:county:boost': 1, - 'admin:county:cutoff_frequency': 0.01, 'admin:localadmin:analyzer': 'peliasAdmin', 'admin:localadmin:field': 'parent.localadmin', 'admin:localadmin:boost': 1, - 'admin:localadmin:cutoff_frequency': 0.01, 'admin:locality:analyzer': 'peliasAdmin', 'admin:locality:field': 'parent.locality', 'admin:locality:boost': 1, - 'admin:locality:cutoff_frequency': 0.01, 'admin:borough:analyzer': 'peliasAdmin', 'admin:borough:field': 'parent.borough', 'admin:borough:boost': 1, - 'admin:borough:cutoff_frequency': 0.01, 'admin:neighbourhood:analyzer': 'peliasAdmin', 'admin:neighbourhood:field': 'parent.neighbourhood', 'admin:neighbourhood:boost': 1, - 'admin:neighbourhood:cutoff_frequency': 0.01, 'popularity:field': 'popularity', 'popularity:modifier': 'log1p', diff --git a/test/unit/fixture/search_pelias_parser_full_address.js b/test/unit/fixture/search_pelias_parser_full_address.js index c7711bc11..e1daedbff 100644 --- a/test/unit/fixture/search_pelias_parser_full_address.js +++ b/test/unit/fixture/search_pelias_parser_full_address.js @@ -60,7 +60,6 @@ module.exports = { 'match': { 'address_parts.number': { 'query': '123', - 'cutoff_frequency': 0.01, 'boost': vs['address:housenumber:boost'], 'analyzer': vs['address:housenumber:analyzer'] } @@ -69,7 +68,6 @@ module.exports = { 'match': { 'address_parts.street': { 'query': 'main st', - 'cutoff_frequency': 0.01, 'boost': vs['address:street:boost'], 'analyzer': vs['address:street:analyzer'] } @@ -78,7 +76,6 @@ module.exports = { 'match': { 'address_parts.zip': { 'query': '10010', - 'cutoff_frequency': 0.01, 'boost': vs['address:postcode:boost'], 'analyzer': vs['address:postcode:analyzer'] } diff --git a/test/unit/fixture/search_pelias_parser_regions_address.js b/test/unit/fixture/search_pelias_parser_regions_address.js index 0228a59ed..4d8202bb4 100644 --- a/test/unit/fixture/search_pelias_parser_regions_address.js +++ b/test/unit/fixture/search_pelias_parser_regions_address.js @@ -59,7 +59,6 @@ module.exports = { 'match': { 'address_parts.number': { 'query': '1', - 'cutoff_frequency': 0.01, 'boost': vs['address:housenumber:boost'], 'analyzer': vs['address:housenumber:analyzer'] } @@ -68,7 +67,6 @@ module.exports = { 'match': { 'address_parts.street': { 'query': 'water st', - 'cutoff_frequency': 0.01, 'boost': vs['address:street:boost'], 'analyzer': vs['address:street:analyzer'] }