Skip to content

Commit

Permalink
use elastic cross fields search by default (superdesk#527)
Browse files Browse the repository at this point in the history
* use elastic cross fields search by default

CPCN-316
  • Loading branch information
petrjasek authored Sep 5, 2023
1 parent 62c0c6a commit 9602964
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 18 deletions.
45 changes: 27 additions & 18 deletions newsroom/search/service.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import logging
from typing import List, Optional, Union, Dict, Any, TypedDict
from typing import List, Literal, Optional, Union, Dict, Any, TypedDict
from copy import deepcopy

from flask import current_app as app, json, abort
Expand Down Expand Up @@ -41,15 +41,22 @@ def strtobool(val):
return _strtobool(val)


def query_string(query, default_operator="AND", fields=["*"]):
def query_string(
query: str,
default_operator: Literal["AND", "OR"] = "AND",
fields: List[str] = ["*"],
multimatch_type: Literal["cross_fields", "best_fields"] = "cross_fields",
analyze_wildcard=False,
):
query_string_settings = app.config["ELASTICSEARCH_SETTINGS"]["settings"]["query_string"]
return {
"query_string": {
"query": query,
"default_operator": default_operator,
"analyze_wildcard": query_string_settings["analyze_wildcard"],
"analyze_wildcard": query_string_settings["analyze_wildcard"] or analyze_wildcard,
"lenient": True,
"fields": fields,
"type": multimatch_type,
}
}

Expand Down Expand Up @@ -772,28 +779,30 @@ def apply_request_advanced_search(self, search: SearchQuery):
if not fields:
return

def gen_advanced_query(keywords: str, operator: str, multi_match_type: str):
return {
"query_string": {
"query": keywords,
"fields": fields,
"default_operator": operator,
"type": multi_match_type,
"lenient": True,
"analyze_wildcard": True,
},
}

if search.advanced.get("all"):
search.query["bool"].setdefault("must", []).append(
gen_advanced_query(search.advanced["all"], "AND", "cross_fields")
query_string(
search.advanced["all"], "AND", fields=fields, multimatch_type="cross_fields", analyze_wildcard=True
)
)

if search.advanced.get("any"):
search.query["bool"].setdefault("must", []).append(
gen_advanced_query(search.advanced["any"], "OR", "best_fields")
query_string(
search.advanced["any"], "OR", fields=fields, multimatch_type="best_fields", analyze_wildcard=True
)
)

if search.advanced.get("exclude"):
search.query["bool"]["must_not"].append(gen_advanced_query(search.advanced["exclude"], "OR", "best_fields"))
search.query["bool"]["must_not"].append(
query_string(
search.advanced["exclude"],
"OR",
fields=fields,
multimatch_type="best_fields",
analyze_wildcard=True,
)
)

def apply_embargoed_filters(self, search):
"""Generate filters for embargoed params"""
Expand Down
20 changes: 20 additions & 0 deletions tests/search/test_search_fields.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from flask import json
from urllib.parse import quote
from tests.utils import get_json


Expand All @@ -16,6 +18,24 @@ def test_wire_search_fields(client, app):
assert 0 == len(data["_items"])


def test_wire_search_cross_fields(client, app):
app.data.insert(
"items",
[
{"headline": "foo", "body_html": "bar", "type": "text"},
],
)

data = get_json(client, "/wire/search?q=foo+bar")
assert 1 == len(data["_items"])

data = get_json(client, f'/wire/search?advanced={quote(json.dumps({"all": "foo bar"}))}')
assert 1 == len(data["_items"])

data = get_json(client, f'/wire/search?advanced={quote(json.dumps({"any": "foo bar"}))}')
assert 1 == len(data["_items"])


def test_agenda_search_fields(client, app):
app.data.insert(
"agenda",
Expand Down
5 changes: 5 additions & 0 deletions tests/search/test_search_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ def test_apply_section_filter(client, app):
"analyze_wildcard": query_string_settings["analyze_wildcard"],
"lenient": True,
"fields": ["*"],
"type": "cross_fields",
}
} in search.query["bool"]["filter"]

Expand All @@ -68,6 +69,7 @@ def test_apply_section_filter(client, app):
"analyze_wildcard": query_string_settings["analyze_wildcard"],
"lenient": True,
"fields": ["*"],
"type": "cross_fields",
}
} in search.query["bool"]["filter"]

Expand Down Expand Up @@ -147,6 +149,7 @@ def assert_products_query(user_id, args=None, products=None):
"analyze_wildcard": query_string_settings["analyze_wildcard"],
"lenient": True,
"fields": app.config["WIRE_SEARCH_FIELDS"],
"type": "cross_fields",
}
} in search.query["bool"]["should"]

Expand Down Expand Up @@ -182,6 +185,7 @@ def test_apply_request_filter__query_string(client, app):
"analyze_wildcard": query_string_settings["analyze_wildcard"],
"lenient": True,
"fields": app.config["WIRE_SEARCH_FIELDS"],
"type": "cross_fields",
}
} in search.query["bool"]["must"]

Expand All @@ -194,6 +198,7 @@ def test_apply_request_filter__query_string(client, app):
"analyze_wildcard": query_string_settings["analyze_wildcard"],
"lenient": True,
"fields": app.config["WIRE_SEARCH_FIELDS"],
"type": "cross_fields",
}
} in search.query["bool"]["must"]

Expand Down

0 comments on commit 9602964

Please sign in to comment.