From c0127fb264b5111b152b602dade61c2fde171a42 Mon Sep 17 00:00:00 2001 From: kolchfa-aws <105444904+kolchfa-aws@users.noreply.github.com> Date: Thu, 26 Sep 2024 10:37:00 -0400 Subject: [PATCH] Add has parent query (#8365) * Add has parent query Signed-off-by: Fanit Kolchina * Apply suggestions from code review Co-authored-by: Nathan Bower Signed-off-by: kolchfa-aws <105444904+kolchfa-aws@users.noreply.github.com> --------- Signed-off-by: Fanit Kolchina Signed-off-by: kolchfa-aws <105444904+kolchfa-aws@users.noreply.github.com> Co-authored-by: Nathan Bower --- _field-types/supported-field-types/join.md | 5 + _field-types/supported-field-types/nested.md | 5 + _query-dsl/joining/has-child.md | 141 +++++++- _query-dsl/joining/has-parent.md | 358 +++++++++++++++++++ _query-dsl/joining/index.md | 4 +- _search-plugins/searching-data/inner-hits.md | 6 +- 6 files changed, 516 insertions(+), 3 deletions(-) create mode 100644 _query-dsl/joining/has-parent.md diff --git a/_field-types/supported-field-types/join.md b/_field-types/supported-field-types/join.md index 1c5b0d1322..009471a784 100644 --- a/_field-types/supported-field-types/join.md +++ b/_field-types/supported-field-types/join.md @@ -327,3 +327,8 @@ PUT testindex1 - Multiple parents are not supported. - You can add a child document to an existing document only if the existing document is already marked as a parent. - You can add a new relation to an existing join field. + +## Next steps + +- Learn about [joining queries]({{site.url}}{{site.baseurl}}/query-dsl/joining/) on join fields. +- Learn more about [retrieving inner hits]({{site.url}}{{site.baseurl}}/search-plugins/searching-data/inner-hits/). \ No newline at end of file diff --git a/_field-types/supported-field-types/nested.md b/_field-types/supported-field-types/nested.md index f8dfca2ff8..4db270c1dc 100644 --- a/_field-types/supported-field-types/nested.md +++ b/_field-types/supported-field-types/nested.md @@ -314,3 +314,8 @@ Parameter | Description `include_in_parent` | A Boolean value that specifies whether all fields in the child nested object should also be added to the parent document in flattened form. Default is `false`. `include_in_root` | A Boolean value that specifies whether all fields in the child nested object should also be added to the root document in flattened form. Default is `false`. `properties` | Fields of this object, which can be of any supported type. New properties can be dynamically added to this object if `dynamic` is set to `true`. + +## Next steps + +- Learn about [joining queries]({{site.url}}{{site.baseurl}}/query-dsl/joining/) on nested fields. +- Learn about [retrieving inner hits]({{site.url}}{{site.baseurl}}/search-plugins/searching-data/inner-hits/). \ No newline at end of file diff --git a/_query-dsl/joining/has-child.md b/_query-dsl/joining/has-child.md index c1cc7a5423..a6b67ea8ca 100644 --- a/_query-dsl/joining/has-child.md +++ b/_query-dsl/joining/has-child.md @@ -176,6 +176,140 @@ The response returns both brands: } ``` +## Retrieving inner hits + +To return child documents that matched the query, provide the `inner_hits` parameter: + +```json +GET testindex1/_search +{ + "query" : { + "has_child": { + "type":"product", + "query": { + "match" : { + "name": "watch" + } + }, + "inner_hits": {} + } + } +} +``` +{% include copy-curl.html %} + +The response contains child documents in the `inner_hits` field: + +```json +{ + "took": 52, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 2, + "relation": "eq" + }, + "max_score": 1, + "hits": [ + { + "_index": "testindex1", + "_id": "1", + "_score": 1, + "_source": { + "name": "Luxury brand", + "product_to_brand": "brand" + }, + "inner_hits": { + "product": { + "hits": { + "total": { + "value": 1, + "relation": "eq" + }, + "max_score": 0.53899646, + "hits": [ + { + "_index": "testindex1", + "_id": "3", + "_score": 0.53899646, + "_routing": "1", + "_source": { + "name": "Mechanical watch", + "sales_count": 150, + "product_to_brand": { + "name": "product", + "parent": "1" + } + } + } + ] + } + } + } + }, + { + "_index": "testindex1", + "_id": "2", + "_score": 1, + "_source": { + "name": "Economy brand", + "product_to_brand": "brand" + }, + "inner_hits": { + "product": { + "hits": { + "total": { + "value": 2, + "relation": "eq" + }, + "max_score": 0.53899646, + "hits": [ + { + "_index": "testindex1", + "_id": "4", + "_score": 0.53899646, + "_routing": "2", + "_source": { + "name": "Electronic watch", + "sales_count": 300, + "product_to_brand": { + "name": "product", + "parent": "2" + } + } + }, + { + "_index": "testindex1", + "_id": "5", + "_score": 0.53899646, + "_routing": "2", + "_source": { + "name": "Digital watch", + "sales_count": 100, + "product_to_brand": { + "name": "product", + "parent": "2" + } + } + } + ] + } + } + } + } + ] + } +} +``` + +For more information about retrieving inner hits, see [Inner hits]({{site.url}}{{site.baseurl}}/search-plugins/searching-data/inner-hits/). + ## Parameters The following table lists all top-level parameters supported by `has_child` queries. @@ -188,6 +322,7 @@ The following table lists all top-level parameters supported by `has_child` quer | `max_children` | Optional | The maximum number of matching child documents for a parent document. If exceeded, the parent document is excluded from the search results. | | `min_children` | Optional | The minimum number of matching child documents required for a parent document to be included in the results. If not met, the parent is excluded. Default is `1`.| | `score_mode` | Optional | Defines how scores of matching child documents influence the parent document's score. Valid values are:
- `none`: Ignores the relevance scores of child documents and assigns a score of `0` to the parent document.
- `avg`: Uses the average relevance score of all matching child documents.
- `max`: Assigns the highest relevance score from the matching child documents to the parent.
- `min`: Assigns the lowest relevance score from the matching child documents to the parent.
- `sum`: Sums the relevance scores of all matching child documents.
Default is `none`. | +| `inner_hits` | Optional | If provided, returns the underlying hits (child documents) that matched the query. | ## Sorting limitations @@ -256,4 +391,8 @@ The response contains the brands sorted by the highest child `sales_count`: ] } } -``` \ No newline at end of file +``` + +## Next steps + +- Learn more about [retrieving inner hits]({{site.url}}{{site.baseurl}}/search-plugins/searching-data/inner-hits/). \ No newline at end of file diff --git a/_query-dsl/joining/has-parent.md b/_query-dsl/joining/has-parent.md new file mode 100644 index 0000000000..2232009fe7 --- /dev/null +++ b/_query-dsl/joining/has-parent.md @@ -0,0 +1,358 @@ +--- +layout: default +title: Has parent +parent: Joining queries +nav_order: 20 +--- + +# Has parent query + +The `has_parent` query returns child documents whose parent documents match a specific query. You can establish parent-child relationships between documents in the same index by using a [join]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/join/) field type. + +The `has_parent` query is slower than other queries because of the join operation it performs. Performance decreases as the number of matching parent documents increases. Each `has_parent` query in your search may significantly impact query performance. If you prioritize speed, avoid using this query or limit its usage as much as possible. +{: .warning} + +## Example + +Before you can run a `has_parent` query, your index must contain a [join]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/join/) field in order to establish parent-child relationships. The index mapping request uses the following format: + +```json +PUT /example_index +{ + "mappings": { + "properties": { + "relationship_field": { + "type": "join", + "relations": { + "parent_doc": "child_doc" + } + } + } + } +} +``` +{% include copy-curl.html %} + +For this example, first configure an index that contains documents representing products and their brands as described in the [`has_child` query example]({{site.url}}{{site.baseurl}}/query-dsl/joining/has-child/). + +To search for the child of a parent, use a `has_parent` query. The following query returns child documents (products) made by the brand matching the query `economy`: + +```json +GET testindex1/_search +{ + "query" : { + "has_parent": { + "parent_type":"brand", + "query": { + "match" : { + "name": "economy" + } + } + } + } +} +``` +{% include copy-curl.html %} + +The response returns all products made by the brand: + +```json +{ + "took": 11, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 2, + "relation": "eq" + }, + "max_score": 1, + "hits": [ + { + "_index": "testindex1", + "_id": "4", + "_score": 1, + "_routing": "2", + "_source": { + "name": "Electronic watch", + "sales_count": 300, + "product_to_brand": { + "name": "product", + "parent": "2" + } + } + }, + { + "_index": "testindex1", + "_id": "5", + "_score": 1, + "_routing": "2", + "_source": { + "name": "Digital watch", + "sales_count": 100, + "product_to_brand": { + "name": "product", + "parent": "2" + } + } + } + ] + } +} +``` + +## Retrieving inner hits + +To return parent documents that matched the query, provide the `inner_hits` parameter: + +```json +GET testindex1/_search +{ + "query" : { + "has_parent": { + "parent_type":"brand", + "query": { + "match" : { + "name": "economy" + } + }, + "inner_hits": {} + } + } +} +``` +{% include copy-curl.html %} + +The response contains parent documents in the `inner_hits` field: + +```json +{ + "took": 11, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 2, + "relation": "eq" + }, + "max_score": 1, + "hits": [ + { + "_index": "testindex1", + "_id": "4", + "_score": 1, + "_routing": "2", + "_source": { + "name": "Electronic watch", + "sales_count": 300, + "product_to_brand": { + "name": "product", + "parent": "2" + } + }, + "inner_hits": { + "brand": { + "hits": { + "total": { + "value": 1, + "relation": "eq" + }, + "max_score": 1.3862942, + "hits": [ + { + "_index": "testindex1", + "_id": "2", + "_score": 1.3862942, + "_source": { + "name": "Economy brand", + "product_to_brand": "brand" + } + } + ] + } + } + } + }, + { + "_index": "testindex1", + "_id": "5", + "_score": 1, + "_routing": "2", + "_source": { + "name": "Digital watch", + "sales_count": 100, + "product_to_brand": { + "name": "product", + "parent": "2" + } + }, + "inner_hits": { + "brand": { + "hits": { + "total": { + "value": 1, + "relation": "eq" + }, + "max_score": 1.3862942, + "hits": [ + { + "_index": "testindex1", + "_id": "2", + "_score": 1.3862942, + "_source": { + "name": "Economy brand", + "product_to_brand": "brand" + } + } + ] + } + } + } + } + ] + } +} +``` + +For more information about retrieving inner hits, see [Inner hits]({{site.url}}{{site.baseurl}}/search-plugins/searching-data/inner-hits/). + +## Parameters + +The following table lists all top-level parameters supported by `has_parent` queries. + +| Parameter | Required/Optional | Description | +|:---|:---|:---| +| `parent_type` | Required | Specifies the name of the parent relationship as defined in the `join` field mapping. | +| `query` | Required | The query to run on parent documents. If a parent document matches the query, the child document is returned. | +| `ignore_unmapped` | Optional | Indicates whether to ignore unmapped `parent_type` fields and not return documents instead of throwing an error. You can provide this parameter when querying multiple indexes, some of which may not contain the `parent_type` field. Default is `false`. | +| `score` | Optional | Indicates whether the relevance score of a matching parent document is aggregated into its child documents. If `false`, then the relevance score of the parent document is ignored, and each child document is assigned a relevance score equal to the query's `boost`, which defaults to `1`. If `true`, then the relevance score of the matching parent document is aggregated into the relevance scores of its child documents. Default is `false`. | +| `inner_hits` | Optional | If provided, returns the underlying hits (parent documents) that matched the query. | + + +## Sorting limitations + +The `has_parent` query does not support [sorting results]({{site.url}}{{site.baseurl}}/search-plugins/searching-data/sort/) using standard sorting options. If you need to sort child documents by fields in their parent documents, you can use a [`function_score` query]({{site.url}}{{site.baseurl}}/query-dsl/compound/function-score/) and sort by the child document's score. + +For the preceding example, first add a `customer_satisfaction` field by which you'll sort the child documents belonging to the parent (brand) documents: + +```json +PUT testindex1/_doc/1 +{ + "name": "Luxury watch brand", + "product_to_brand" : "brand", + "customer_satisfaction": 4.5 +} +``` +{% include copy-curl.html %} + +```json +PUT testindex1/_doc/2 +{ + "name": "Economy watch brand", + "product_to_brand" : "brand", + "customer_satisfaction": 3.9 +} +``` +{% include copy-curl.html %} + +Now you can sort child documents (products) based on the `customer_satisfaction` field of their parent brands. This query multiplies the score by the `customer_satisfaction` field of the parent documents: + +```json +GET testindex1/_search +{ + "query": { + "has_parent": { + "parent_type": "brand", + "score": true, + "query": { + "function_score": { + "script_score": { + "script": "_score * doc['customer_satisfaction'].value" + } + } + } + } + } +} +``` +{% include copy-curl.html %} + +The response contains the products, sorted by the highest parent `customer_satisfaction`: + +```json +{ + "took": 11, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 3, + "relation": "eq" + }, + "max_score": 4.5, + "hits": [ + { + "_index": "testindex1", + "_id": "3", + "_score": 4.5, + "_routing": "1", + "_source": { + "name": "Mechanical watch", + "sales_count": 150, + "product_to_brand": { + "name": "product", + "parent": "1" + } + } + }, + { + "_index": "testindex1", + "_id": "4", + "_score": 3.9, + "_routing": "2", + "_source": { + "name": "Electronic watch", + "sales_count": 300, + "product_to_brand": { + "name": "product", + "parent": "2" + } + } + }, + { + "_index": "testindex1", + "_id": "5", + "_score": 3.9, + "_routing": "2", + "_source": { + "name": "Digital watch", + "sales_count": 100, + "product_to_brand": { + "name": "product", + "parent": "2" + } + } + } + ] + } +} +``` + +## Next steps + +- Learn more about [retrieving inner hits]({{site.url}}{{site.baseurl}}/search-plugins/searching-data/inner-hits/). \ No newline at end of file diff --git a/_query-dsl/joining/index.md b/_query-dsl/joining/index.md index 4ed46b3e17..74ad7f1ea1 100644 --- a/_query-dsl/joining/index.md +++ b/_query-dsl/joining/index.md @@ -4,6 +4,8 @@ title: Joining queries has_children: true nav_order: 55 has_toc: false +redirect_from: + - /query-dsl/joining/ --- # Joining queries @@ -12,7 +14,7 @@ OpenSearch is a distributed system in which data is spread across multiple nodes - `nested` queries: Act as wrappers for other queries to search [nested]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/nested/) fields. The nested field objects are searched as though they were indexed as separate documents. - [`has_child`]({{site.url}}{{site.baseurl}}/query-dsl/joining/has-child/) queries: Search for parent documents whose child documents match the query. -- `has_parent` queries: Search for child documents whose parent documents match the query. +- [`has_parent`]({{site.url}}{{site.baseurl}}/query-dsl/joining/has-parent/) queries: Search for child documents whose parent documents match the query. - `parent_id` queries: A [join]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/join/) field type establishes a parent/child relationship between documents in the same index. `parent_id` queries search for child documents that are joined to a specific parent document. If [`search.allow_expensive_queries`]({{site.url}}{{site.baseurl}}/query-dsl/index/#expensive-queries) is set to `false`, then joining queries are not executed. diff --git a/_search-plugins/searching-data/inner-hits.md b/_search-plugins/searching-data/inner-hits.md index 395e9e748a..5eda9498b5 100644 --- a/_search-plugins/searching-data/inner-hits.md +++ b/_search-plugins/searching-data/inner-hits.md @@ -806,4 +806,8 @@ The following is the expected result: Using `inner_hits` provides contextual relevance by showing exactly which nested or child documents match the query criteria. This is crucial for applications in which the relevance of results depends on a specific part of the document that matches the query. - Example use case: In a customer support system, you have tickets as parent documents and comments or updates as nested or child documents. You can determine which specific comment matches the search in order to better understand the context of the ticket search. \ No newline at end of file + Example use case: In a customer support system, you have tickets as parent documents and comments or updates as nested or child documents. You can determine which specific comment matches the search in order to better understand the context of the ticket search. + +## Next steps + +- Learn about [joining queries]({{site.url}}{{site.baseurl}}/query-dsl/joining/) on [nested]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/nested/) or [join]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/join/) fields. \ No newline at end of file