diff --git a/cl/alerts/management/commands/cl_send_alerts.py b/cl/alerts/management/commands/cl_send_alerts.py index 897b077fea..4792e78fd2 100644 --- a/cl/alerts/management/commands/cl_send_alerts.py +++ b/cl/alerts/management/commands/cl_send_alerts.py @@ -13,17 +13,25 @@ from django.template import loader from django.urls import reverse from django.utils.timezone import now +from elasticsearch_dsl import MultiSearch from elasticsearch_dsl import Q as ES_Q +from elasticsearch_dsl.response import Response from cl.alerts.models import Alert, RealTimeQueue from cl.alerts.utils import InvalidDateError -from cl.api.models import WebhookEventType +from cl.api.models import WebhookEventType, WebhookVersions from cl.api.webhooks import send_search_alert_webhook from cl.lib import search_utils from cl.lib.command_utils import VerboseCommand, logger -from cl.lib.elasticsearch_utils import do_es_api_query +from cl.lib.elasticsearch_utils import ( + do_es_api_query, + limit_inner_hits, + set_child_docs_and_score, + set_results_highlights, +) from cl.lib.scorched_utils import ExtraSolrInterface from cl.lib.search_utils import regroup_snippets +from cl.lib.types import CleanData from cl.search.constants import ALERTS_HL_TAG, SEARCH_ALERTS_OPINION_HL_FIELDS from cl.search.documents import OpinionDocument from cl.search.forms import SearchForm @@ -106,6 +114,59 @@ def send_alert(user_profile, hits): msg.send(fail_silently=False) +def query_alerts_es( + cd: CleanData, v1_webhook: bool = False +) -> tuple[Response, Response | None]: + """Query ES for opinion alerts, optionally handling a V1 webhook query. + + :param cd: A CleanData object containing the query parameters. + :param v1_webhook: A boolean indicating whether to include a V1 webhook query. + :return: A tuple containing the main search response and an optional V1 + query response. + """ + + v1_results = None + search_query = OpinionDocument.search() + cd["highlight"] = True + main_query, _ = do_es_api_query( + search_query, + cd, + SEARCH_ALERTS_OPINION_HL_FIELDS, + ALERTS_HL_TAG, + "v4", + ) + main_query = main_query.extra( + from_=0, + size=settings.SCHEDULED_ALERT_HITS_LIMIT, + ) + multi_search = MultiSearch() + multi_search = multi_search.add(main_query) + + if v1_webhook: + search_query = OpinionDocument.search() + v1_query, _ = do_es_api_query( + search_query, + cd, + SEARCH_ALERTS_OPINION_HL_FIELDS, + ALERTS_HL_TAG, + "v3", + ) + v1_query = v1_query.extra( + from_=0, + size=settings.SCHEDULED_ALERT_HITS_LIMIT, + ) + multi_search = multi_search.add(v1_query) + + responses = multi_search.execute() + results = responses[0] + limit_inner_hits({}, results, cd["type"]) + set_results_highlights(results, cd["type"]) + set_child_docs_and_score(results) + if v1_webhook: + v1_results = responses[1] + return results, v1_results + + class Command(VerboseCommand): help = ( "Sends the alert emails on a real time, daily, weekly or monthly " @@ -152,10 +213,9 @@ def handle(self, *args, **options): if options["rate"] == Alert.REAL_TIME: self.clean_rt_queue() - def run_query(self, alert, rate): + def run_query(self, alert, rate, v1_webhook=False): results = [] - cd = {} - main_params = {} + v1_results = None logger.info(f"Now running the query: {alert.query}\n") # Make a dict from the query string. @@ -175,7 +235,7 @@ def run_query(self, alert, rate): if waffle.switch_is_active("oa-es-alerts-active"): # Return empty results for OA alerts. They are now handled # by Elasticsearch. - return query_type, results + return query_type, results, v1_results logger.info(f"Data sent to SearchForm is: {qd}\n") search_form = SearchForm(qd, is_es_form=self.o_es_alerts) @@ -187,7 +247,7 @@ def run_query(self, alert, rate): and len(self.valid_ids[query_type]) == 0 ): # Bail out. No results will be found if no valid_ids. - return query_type, results + return query_type, results, v1_results main_params = search_utils.build_main_query( cd, @@ -220,19 +280,7 @@ def run_query(self, alert, rate): ) if self.o_es_alerts: - search_query = OpinionDocument.search() - s, _ = do_es_api_query( - search_query, - cd, - SEARCH_ALERTS_OPINION_HL_FIELDS, - ALERTS_HL_TAG, - "v3", - ) - s = s.extra( - from_=0, - size=settings.SCHEDULED_ALERT_HITS_LIMIT, - ) - results = s.execute() + results, v1_results = query_alerts_es(cd, v1_webhook) else: # Ignore warnings from this bit of code. Otherwise, it complains # about the query URL being too long and having to POST it instead @@ -248,7 +296,7 @@ def run_query(self, alert, rate): regroup_snippets(results) logger.info(f"There were {len(results)} results.") - return qd, results + return qd, results, v1_results def send_emails_and_webhooks(self, rate): """Send out an email and webhook events to every user whose alert has a @@ -261,6 +309,13 @@ def send_emails_and_webhooks(self, rate): alerts = user.alerts.filter(rate=rate) logger.info(f"Running alerts for user '{user}': {alerts}") + # Query user's webhooks. + user_webhooks = user.webhooks.filter( + event_type=WebhookEventType.SEARCH_ALERT, enabled=True + ) + v1_webhook = WebhookVersions.v1 in { + webhook.version for webhook in user_webhooks + } if rate == Alert.REAL_TIME: if not user.profile.is_member: continue @@ -268,7 +323,9 @@ def send_emails_and_webhooks(self, rate): hits = [] for alert in alerts: try: - qd, results = self.run_query(alert, rate) + qd, results, v1_results = self.run_query( + alert, rate, v1_webhook + ) except: traceback.print_exc() logger.info( @@ -293,10 +350,13 @@ def send_emails_and_webhooks(self, rate): # Send webhook event if the user has a SEARCH_ALERT # endpoint enabled. - user_webhooks = user.webhooks.filter( - event_type=WebhookEventType.SEARCH_ALERT, enabled=True - ) for user_webhook in user_webhooks: + results = ( + v1_results + if alert.alert_type == SEARCH_TYPES.OPINION + and user_webhook.version == WebhookVersions.v1 + else results + ) send_search_alert_webhook( self.sis[search_type], results, user_webhook, alert ) diff --git a/cl/alerts/templates/alert_email_es.html b/cl/alerts/templates/alert_email_es.html index dc2f797268..7ef71cfcfe 100644 --- a/cl/alerts/templates/alert_email_es.html +++ b/cl/alerts/templates/alert_email_es.html @@ -75,17 +75,56 @@
+ + View original: + + {% if doc.download_url %} + + From the court + + | + {% endif %} + {% if doc.local_path %} + {# Provide link to S3. #} + + Our backup + + {% endif %} +
+ {% endif %} +
+
+ View original:
+
+ {% if result.download_url %}
+
+ From the court
+
+ |
+ {% endif %}
{% if result.local_path %}
{# Provide link to S3. #}
@@ -93,8 +132,6 @@
+
{% if result|get_highlight:"text" %}
…{{ result|get_highlight:"text"|safe|underscore_to_space }}…
{% endif %}
diff --git a/cl/alerts/templates/alert_email_es.txt b/cl/alerts/templates/alert_email_es.txt
index 2b7ec3b569..8c0324e8f1 100644
--- a/cl/alerts/templates/alert_email_es.txt
+++ b/cl/alerts/templates/alert_email_es.txt
@@ -16,8 +16,14 @@ View Full Results / Edit this Alert: https://www.courtlistener.com/?{{ alert.que
Disable this Alert (one click): https://www.courtlistener.com{% url "disable_alert" alert.secret_key %}{% endif %}
{{forloop.counter}}. {{ result.caseName|render_string_or_list|safe|striptags }} ({% if result.court_id != 'scotus' %}{{ result.court_citation_string|render_string_or_list|striptags }} {% endif %}{% if type == 'o' or type == 'r' %}{{ result.dateFiled|date:"Y" }}{% elif type == 'oa' %}{{ result.dateArgued|date:"Y" }}{% endif %})
-{% if type == 'oa' %}{% if result.dateArgued %}Date Argued: {{ result.dateArgued|date:"F jS, Y" }}{% else %}Date Argued: Unknown Date {% endif %}{% if result.docketNumber %} | Docket Number: {{ result.docketNumber|render_string_or_list|safe|striptags }}{% endif %} | Duration: {{ result.duration|naturalduration }}{% if result.judge %} | Judge: {{ result.judge|render_string_or_list|safe|striptags|underscore_to_space }}{% endif %}{% endif %}
-{% if type == 'o' or type == 'oa' %}{% if result|get_highlight:"text" %}...{{ result|get_highlight:"text"|safe|striptags|underscore_to_space|compress_whitespace }}...{% endif %}{% endif %}
+{% if type == 'oa' %}{% if result.dateArgued %}Date Argued: {{ result.dateArgued|date:"F jS, Y" }}{% else %}Date Argued: Unknown Date {% endif %}{% if result.docketNumber %} | Docket Number: {{ result.docketNumber|render_string_or_list|safe|striptags }}{% endif %} | Duration: {{ result.duration|naturalduration }}{% if result.judge %} | Judge: {{ result.judge|render_string_or_list|safe|striptags|underscore_to_space }}{% endif %}
+{% if result|get_highlight:"text" %}...{{ result|get_highlight:"text"|safe|striptags|underscore_to_space|compress_whitespace }}...{% endif %}
+{% endif %}
+{% if type == 'o' %}{% for doc in result.child_docs %}{% with doc=doc|get_es_doc_content:True %}{% if result.child_docs|length > 1 or doc.type != 'combined-opinion' %}{% if doc.text %}{{ doc.type_text }}{% endif %}{% endif %}
+ {% if doc.text %}...{{ doc.text|render_string_or_list|safe|striptags|underscore_to_space|compress_whitespace }}...{% endif %}
+ {% if doc.download_url %} - Download original from the court: {{doc.download_url}}{% endif %}
+ {% if doc.local_path %} - Download the original from our backup: https://storage.courtlistener.com/{{ doc.local_path }}{% endif %}
+{% endwith %}{% endfor %}{% endif %}
{% if type == 'r' %}{% if result.dateFiled %}Date Filed: {{ result.dateFiled|date:"F jS, Y" }}{% else %}Date Filed: Unknown Date {% endif %}{% if result.docketNumber %} | Docket Number: {{ result.docketNumber|render_string_or_list|safe|striptags }}{% endif %}
{% for doc in result.child_docs %}{% with doc=doc|get_es_doc_content:scheduled_alert %} - {% if doc.short_description %}{{ doc.short_description|render_string_or_list|safe|striptags }} - {% endif %}Document #{% if doc.document_number %}{{ doc.document_number }}{% endif %}{% if doc.attachment_number %}, Attachment #{{ doc.attachment_number }}{% endif %}
{% if doc.description %}Description: {{ doc.description|render_string_or_list|safe|striptags }}{% endif %}
@@ -27,9 +33,8 @@ Disable this Alert (one click): https://www.courtlistener.com{% url "disable_ale
{% if result.child_docs and result.child_remaining %}{% extract_q_value alert.query_run as q_value %}View Additional Results for this Case: https://www.courtlistener.com/?type={{ type|urlencode }}&q={% if q_value %}({{ q_value|urlencode }})%20AND%20{% endif %}docket_id%3A{{ result.docket_id|urlencode }}{% endif %}
{% endif %}~~~~~
- View this item on our site: https://www.courtlistener.com{% if type == 'r' %}{{result.docket_absolute_url}}{% else %}{{result.absolute_url}}{% endif %}
-{% if result.download_url %} - Download original from the court: {{result.download_url}}
-{% endif %}{% if result.local_path %} - Download the original from our backup: https://storage.courtlistener.com/{{ result.local_path }}{% endif %}{% endfor %}
-
+{% if type == 'oa' %}{% if result.download_url %} - Download original from the court: {{result.download_url}}
+{% endif %}{% if result.local_path %} - Download the original from our backup: https://storage.courtlistener.com/{{ result.local_path }}{% endif %}{% endif %}{% endfor %}
{% endfor %}
************************
This alert brought to you by the 501(c)(3) non-profit Free Law Project
diff --git a/cl/alerts/tests/tests.py b/cl/alerts/tests/tests.py
index 82b5f3c952..990658c6a3 100644
--- a/cl/alerts/tests/tests.py
+++ b/cl/alerts/tests/tests.py
@@ -48,6 +48,7 @@
Webhook,
WebhookEvent,
WebhookEventType,
+ WebhookVersions,
)
from cl.api.utils import get_webhook_deprecation_date
from cl.audio.factories import AudioWithParentsFactory
@@ -74,10 +75,14 @@
Opinion,
RECAPDocument,
)
-from cl.search.tasks import add_items_to_solr
from cl.stats.models import Stat
from cl.tests.base import SELENIUM_TIMEOUT, BaseSeleniumTest
-from cl.tests.cases import APITestCase, ESIndexTestCase, TestCase
+from cl.tests.cases import (
+ APITestCase,
+ ESIndexTestCase,
+ SearchAlertsAssertions,
+ TestCase,
+)
from cl.tests.utils import MockResponse, make_client
from cl.users.factories import UserFactory, UserProfileWithParentsFactory
from cl.users.models import EmailSent
@@ -565,7 +570,9 @@ async def test_alert_update(self) -> None:
@override_switch("o-es-alerts-active", active=True)
@mock.patch("cl.search.tasks.percolator_alerts_models_supported", new=[Audio])
-class SearchAlertsWebhooksTest(ESIndexTestCase, TestCase):
+class SearchAlertsWebhooksTest(
+ ESIndexTestCase, TestCase, SearchAlertsAssertions
+):
"""Test Search Alerts Webhooks"""
@classmethod
@@ -582,6 +589,7 @@ def setUpTestData(cls):
event_type=WebhookEventType.SEARCH_ALERT,
url="https://example.com/",
enabled=True,
+ version=2,
)
cls.webhook_enabled_1 = WebhookFactory(
user=cls.user_profile_1.user,
@@ -648,6 +656,7 @@ def setUpTestData(cls):
event_type=WebhookEventType.SEARCH_ALERT,
url="https://example.com/",
enabled=True,
+ version=1,
)
cls.search_alert_3 = AlertFactory(
user=cls.user_profile_3.user,
@@ -782,7 +791,7 @@ def test_send_search_alert_webhooks(self):
len(mail.outbox), 4, msg="Outgoing emails don't match."
)
- # Opinion email alert assertions
+ # First Opinion email alert assertions search_alert
self.assertEqual(mail.outbox[0].to[0], self.user_profile.user.email)
# Plain text assertions
opinion_alert_content = mail.outbox[0].body
@@ -794,18 +803,36 @@ def test_send_search_alert_webhooks(self):
opinion_alert_content,
)
self.assertIn("California vs Lorem", opinion_alert_content)
- self.assertIn("california sit amet", opinion_alert_content)
+ self.assertIn(
+ "california sit amet",
+ opinion_alert_content,
+ msg="Alert content didn't match",
+ )
self.assertIn(self.dly_opinion_2.download_url, opinion_alert_content)
self.assertIn(
str(self.dly_opinion_2.local_path), opinion_alert_content
)
- html_content = None
- for content, content_type in mail.outbox[0].alternatives:
- if content_type == "text/html":
- html_content = content
- break
+ html_content = self.get_html_content_from_email(mail.outbox[0])
# HTML assertions
+ self._count_alert_hits_and_child_hits(
+ html_content,
+ self.search_alert.name,
+ 1,
+ self.dly_opinion.cluster.case_name,
+ 2,
+ )
+
+ self._assert_child_hits_content(
+ html_content,
+ self.search_alert.name,
+ self.dly_opinion.cluster.case_name,
+ [
+ self.dly_opinion.get_type_display(),
+ self.dly_opinion_2.get_type_display(),
+ ],
+ )
+
self.assertIn("had 1 hit", html_content)
self.assertIn(
self.dly_opinion_2.cluster.docket.court.citation_string.replace(
@@ -831,7 +858,27 @@ def test_send_search_alert_webhooks(self):
mail.outbox[0].extra_headers["List-Unsubscribe"],
)
- # Second Opinion alert
+ # Second Opinion alert search_alert_2
+ html_content = self.get_html_content_from_email(mail.outbox[1])
+ # HTML assertions
+ self._count_alert_hits_and_child_hits(
+ html_content,
+ self.search_alert.name,
+ 1,
+ self.dly_opinion.cluster.case_name,
+ 2,
+ )
+
+ self._assert_child_hits_content(
+ html_content,
+ self.search_alert.name,
+ self.dly_opinion.cluster.case_name,
+ [
+ self.dly_opinion.get_type_display(),
+ self.dly_opinion_2.get_type_display(),
+ ],
+ )
+
self.assertEqual(mail.outbox[1].to[0], self.user_profile_2.user.email)
self.assertIn("daily opinion alert", mail.outbox[1].body)
self.assertEqual(
@@ -846,6 +893,24 @@ def test_send_search_alert_webhooks(self):
mail.outbox[1].extra_headers["List-Unsubscribe"],
)
+ # Third Opinion alert search_alert_3
+ html_content = self.get_html_content_from_email(mail.outbox[2])
+ # HTML assertions
+ self._count_alert_hits_and_child_hits(
+ html_content,
+ self.search_alert_3.name,
+ 1,
+ self.dly_opinion.cluster.case_name,
+ 1,
+ )
+
+ self._assert_child_hits_content(
+ html_content,
+ self.search_alert_3.name,
+ self.dly_opinion.cluster.case_name,
+ [self.dly_opinion.get_type_display()],
+ )
+
# Oral Argument Alert
self.assertEqual(mail.outbox[3].to[0], self.user_profile.user.email)
self.assertIn("daily oral argument alert ", mail.outbox[3].body)
@@ -861,8 +926,11 @@ def test_send_search_alert_webhooks(self):
mail.outbox[3].extra_headers["List-Unsubscribe"],
)
- # Two webhook events should be sent, both of them to user_profile user
- webhook_events = WebhookEvent.objects.all()
+ # 3 webhook events should be sent, 2 user_profile user and 1 user_profile_3
+ webhook_events = WebhookEvent.objects.filter().values_list(
+ "content", flat=True
+ )
+
self.assertEqual(
len(webhook_events), 3, msg="Webhook events don't match."
)
@@ -885,7 +953,46 @@ def test_send_search_alert_webhooks(self):
},
}
- for webhook_sent in webhook_events:
+ # Assert V2 Opinion Search Alerts Webhook
+ self._count_webhook_hits_and_child_hits(
+ list(webhook_events),
+ self.search_alert.name,
+ 1,
+ self.dly_opinion.cluster.case_name,
+ 2,
+ "opinions",
+ )
+
+ # Assert HL content in V2 webhooks.
+ self._assert_webhook_hit_hl(
+ webhook_events,
+ self.search_alert.name,
+ "caseName",
+ "California vs Lorem",
+ child_field=False,
+ nested_field="opinions",
+ )
+ self._assert_webhook_hit_hl(
+ webhook_events,
+ self.search_alert.name,
+ "snippet",
+ "Lorem dolor california sit amet, consectetur adipiscing elit.",
+ child_field=True,
+ nested_field="opinions",
+ )
+
+ # Assert V1 Opinion Search Alerts Webhook
+ self._count_webhook_hits_and_child_hits(
+ list(webhook_events),
+ self.search_alert_3.name,
+ 1,
+ self.dly_opinion.cluster.case_name,
+ 0,
+ None,
+ )
+
+ webhook_events_instances = WebhookEvent.objects.all()
+ for webhook_sent in webhook_events_instances:
with self.subTest(webhook_sent=webhook_sent):
self.assertEqual(
webhook_sent.event_status,
@@ -893,6 +1000,7 @@ def test_send_search_alert_webhooks(self):
msg="The event status doesn't match.",
)
content = webhook_sent.content
+
alert_data_compare = alert_data[
content["payload"]["alert"]["id"]
]
@@ -926,14 +1034,13 @@ def test_send_search_alert_webhooks(self):
if (
content["payload"]["alert"]["alert_type"]
== SEARCH_TYPES.OPINION
- ):
+ ) and webhook_sent.webhook.version == WebhookVersions.v1:
# Assert the number of keys in the Opinions Search Webhook
# payload
keys_count = len(content["payload"]["results"][0])
self.assertEqual(
keys_count, len(opinion_v3_search_api_keys)
)
-
# Iterate through all the opinion fields and compare them.
for (
field,
@@ -951,7 +1058,10 @@ def test_send_search_alert_webhooks(self):
expected_value,
f"Field '{field}' does not match.",
)
- else:
+ elif (
+ content["payload"]["alert"]["alert_type"]
+ == SEARCH_TYPES.ORAL_ARGUMENT
+ ):
# Assertions for OA webhook payload.
self.assertEqual(
content["payload"]["results"][0]["caseName"],
diff --git a/cl/alerts/tests/tests_recap_alerts.py b/cl/alerts/tests/tests_recap_alerts.py
index c40aa3b349..e4610cffd9 100644
--- a/cl/alerts/tests/tests_recap_alerts.py
+++ b/cl/alerts/tests/tests_recap_alerts.py
@@ -51,7 +51,7 @@
from cl.search.models import Docket
from cl.search.tasks import index_docket_parties_in_es
from cl.stats.models import Stat
-from cl.tests.cases import ESIndexTestCase, RECAPAlertsAssertions, TestCase
+from cl.tests.cases import ESIndexTestCase, SearchAlertsAssertions, TestCase
from cl.tests.utils import MockResponse
from cl.users.factories import UserProfileWithParentsFactory
@@ -61,7 +61,7 @@
return_value="alert_hits_sweep",
)
class RECAPAlertsSweepIndexTest(
- RECAPSearchTestCase, ESIndexTestCase, TestCase, RECAPAlertsAssertions
+ RECAPSearchTestCase, ESIndexTestCase, TestCase, SearchAlertsAssertions
):
"""
RECAP Alerts Sweep Index Tests
@@ -627,6 +627,18 @@ def test_filter_out_alerts_to_send_by_query_and_hits(
alert_de.docket.case_name,
[rd_2.description],
)
+ webhook_events = WebhookEvent.objects.all().values_list(
+ "content", flat=True
+ )
+ # Assert webhook event child hits.
+ self._count_webhook_hits_and_child_hits(
+ list(webhook_events),
+ cross_object_alert.name,
+ 1,
+ alert_de.docket.case_name,
+ 1,
+ )
+
# Assert email text version:
txt_email = mail.outbox[4].body
self.assertIn(cross_object_alert.name, txt_email)
@@ -1712,7 +1724,7 @@ def test_percolator_plus_sweep_alerts_integration(
return_value="alert_hits_percolator",
)
class RECAPAlertsPercolatorTest(
- RECAPSearchTestCase, ESIndexTestCase, TestCase, RECAPAlertsAssertions
+ RECAPSearchTestCase, ESIndexTestCase, TestCase, SearchAlertsAssertions
):
"""
RECAP Alerts Percolator Tests
diff --git a/cl/api/tasks.py b/cl/api/tasks.py
index ec1c5971ac..39c5fe7533 100644
--- a/cl/api/tasks.py
+++ b/cl/api/tasks.py
@@ -1,5 +1,4 @@
import json
-from collections import defaultdict
from typing import Any
from elasticsearch_dsl.response import Hit
@@ -12,13 +11,14 @@
from cl.api.webhooks import send_webhook_event
from cl.celery_init import app
from cl.corpus_importer.api_serializers import DocketEntrySerializer
-from cl.lib.elasticsearch_utils import merge_highlights_into_result
+from cl.lib.elasticsearch_utils import set_child_docs_and_score
from cl.search.api_serializers import (
RECAPESWebhookResultSerializer,
V3OAESResultSerializer,
)
from cl.search.api_utils import ResultObject
from cl.search.models import SEARCH_TYPES, DocketEntry
+from cl.search.types import ESDictDocument
@app.task()
@@ -127,7 +127,7 @@ def send_es_search_alert_webhook(
@app.task()
def send_search_alert_webhook_es(
- results: list[dict[str, Any]] | list[Hit],
+ results: list[ESDictDocument] | list[Hit],
webhook_pk: int,
alert_pk: int,
) -> None:
@@ -152,34 +152,7 @@ def send_search_alert_webhook_es(
es_results, many=True
).data
case SEARCH_TYPES.RECAP:
- for result in results:
- child_result_objects = []
- child_docs = None
- if isinstance(result, dict):
- child_docs = result.get("child_docs")
- elif hasattr(result, "child_docs"):
- child_docs = result.child_docs
-
- if child_docs:
- for child_doc in child_docs:
- if isinstance(result, dict):
- child_result_objects.append(child_doc)
- else:
- child_result_objects.append(
- defaultdict(
- lambda: None,
- child_doc["_source"].to_dict(),
- )
- )
-
- result["child_docs"] = child_result_objects
- # Merge HL into the parent document from percolator response.
- if isinstance(result, dict):
- meta_hl = result.get("meta", {}).get("highlight", {})
- merge_highlights_into_result(
- meta_hl,
- result,
- )
+ set_child_docs_and_score(results, merge_highlights=True)
serialized_results = RECAPESWebhookResultSerializer(
results, many=True
).data
diff --git a/cl/api/webhooks.py b/cl/api/webhooks.py
index f6ca97d9e3..15f1d3cabf 100644
--- a/cl/api/webhooks.py
+++ b/cl/api/webhooks.py
@@ -13,7 +13,12 @@
)
from cl.alerts.models import Alert
from cl.alerts.utils import OldAlertReport
-from cl.api.models import Webhook, WebhookEvent, WebhookEventType
+from cl.api.models import (
+ Webhook,
+ WebhookEvent,
+ WebhookEventType,
+ WebhookVersions,
+)
from cl.api.utils import (
generate_webhook_key_content,
update_webhook_event_after_request,
@@ -23,6 +28,7 @@
from cl.recap.api_serializers import PacerFetchQueueSerializer
from cl.recap.models import PROCESSING_STATUS, PacerFetchQueue
from cl.search.api_serializers import (
+ OpinionClusterWebhookResultSerializer,
SearchResultSerializer,
V3OpinionESResultSerializer,
)
@@ -192,10 +198,17 @@ def send_search_alert_webhook(
).data
else:
# ES results serialization
- serialized_results = V3OpinionESResultSerializer(
- results,
- many=True,
- ).data
+ match webhook.version:
+ case WebhookVersions.v1:
+ serialized_results = V3OpinionESResultSerializer(
+ results,
+ many=True,
+ ).data
+ case WebhookVersions.v2:
+ serialized_results = OpinionClusterWebhookResultSerializer(
+ results,
+ many=True,
+ ).data
post_content = {
"webhook": generate_webhook_key_content(webhook),
diff --git a/cl/lib/elasticsearch_utils.py b/cl/lib/elasticsearch_utils.py
index 92acd4ca61..1b9366f713 100644
--- a/cl/lib/elasticsearch_utils.py
+++ b/cl/lib/elasticsearch_utils.py
@@ -4,6 +4,7 @@
import re
import time
import traceback
+from collections import defaultdict
from copy import deepcopy
from dataclasses import fields
from functools import reduce, wraps
@@ -1281,6 +1282,7 @@ def build_es_base_query(
mlt_query,
child_highlighting=child_highlighting,
api_version=api_version,
+ alerts=alerts,
)
)
@@ -2964,9 +2966,10 @@ def do_es_api_query(
child documents.
"""
+ alerts = True if hl_tag == ALERTS_HL_TAG else False
try:
es_queries = build_es_base_query(
- search_query, cd, cd["highlight"], api_version
+ search_query, cd, cd["highlight"], api_version, alerts=alerts
)
s = es_queries.search_query
child_docs_query = es_queries.child_query
@@ -3047,7 +3050,7 @@ def do_es_api_query(
# parameters as in the frontend. Only switch highlighting according
# to the user request.
main_query = add_es_highlighting(
- s, cd, highlighting=cd["highlight"]
+ s, cd, alerts=alerts, highlighting=cd["highlight"]
)
return main_query, child_docs_query
@@ -3311,3 +3314,46 @@ def simplify_estimated_count(search_count: int) -> int:
zeroes = (len(search_count_str) - 2) * "0"
return int(first_two + zeroes)
return search_count
+
+
+def set_child_docs_and_score(
+ results: list[Hit] | list[dict[str, Any]] | Response,
+ merge_highlights: bool = False,
+ merge_score: bool = False,
+) -> None:
+ """Process and attach child documents to the main search results.
+
+ :param results: A list of search results, which can be ES Hit objects
+ or a list of dicts.
+ :param merge_highlights: A boolean indicating whether to merge
+ highlight data into the results.
+ :param merge_score: A boolean indicating whether to merge
+ the BM25 score into the results.
+ :return: None. Results are modified in place.
+ """
+
+ for result in results:
+ result_is_dict = isinstance(result, dict)
+ if result_is_dict:
+ # If the result is a dictionary, do nothing, or assign [] to
+ # child_docs if it is not present.
+ child_docs = result.get("child_docs", [])
+ result["child_docs"] = child_docs
+ else:
+ # Process child hits if the result is an ES AttrDict instance,
+ # so they can be properly serialized.
+ child_docs = getattr(result, "child_docs", [])
+ result["child_docs"] = [
+ defaultdict(lambda: None, doc["_source"].to_dict())
+ for doc in child_docs
+ ]
+
+ # Optionally merges highlights. Used for integrating percolator
+ # highlights into the percolated document.
+ if merge_highlights and result_is_dict:
+ meta_hl = result.get("meta", {}).get("highlight", {})
+ merge_highlights_into_result(meta_hl, result)
+
+ # Optionally merges the BM25 score for display in the API.
+ if merge_score and isinstance(result, AttrDict):
+ result["bm25_score"] = result.meta.score
diff --git a/cl/search/api_serializers.py b/cl/search/api_serializers.py
index f27053e95d..31752a79af 100644
--- a/cl/search/api_serializers.py
+++ b/cl/search/api_serializers.py
@@ -619,7 +619,7 @@ class Meta:
)
-class OpinionClusterESResultSerializer(MainMetaMixin, DocumentSerializer):
+class OpinionClusterBaseESResultSerializer(DocumentSerializer):
"""The serializer for OpinionCluster Search results."""
opinions = OpinionDocumentESResultSerializer(
@@ -649,6 +649,20 @@ class Meta:
)
+class OpinionClusterESResultSerializer(
+ OpinionClusterBaseESResultSerializer, MainMetaMixin
+):
+ """The serializer for OpinionCluster Search results."""
+
+
+class OpinionClusterWebhookResultSerializer(
+ OpinionClusterBaseESResultSerializer
+):
+ """The serializer class for OpinionCluster search Webhooks results."""
+
+ meta = BaseMetaDataSerializer(source="*", read_only=True)
+
+
class PositionESResultSerializer(ChildMetaMixin, DocumentSerializer):
"""The serializer for Positions Search results."""
diff --git a/cl/search/api_utils.py b/cl/search/api_utils.py
index 09afbd2653..f5c22e388a 100644
--- a/cl/search/api_utils.py
+++ b/cl/search/api_utils.py
@@ -19,6 +19,7 @@
do_es_api_query,
limit_inner_hits,
merge_unavailable_fields_on_parent_document,
+ set_child_docs_and_score,
set_results_highlights,
)
from cl.lib.scorched_utils import ExtraSolrInterface
@@ -474,18 +475,7 @@ def process_results(self, results: Response) -> None:
"v4",
self.clean_data["highlight"],
)
- for result in results:
- child_result_objects = []
- if hasattr(result, "child_docs"):
- for child_doc in result.child_docs:
- child_result_objects.append(
- defaultdict(
- lambda: None, child_doc["_source"].to_dict()
- )
- )
- result["child_docs"] = child_result_objects
- # Include the ES main document score as bm25_score.
- result["bm25_score"] = result.meta.score
+ set_child_docs_and_score(results, merge_score=True)
if self.reverse:
# If doing backward pagination, reverse the results of the current
diff --git a/cl/search/tests/tests_es_person.py b/cl/search/tests/tests_es_person.py
index 6c59b01cf6..eb82285286 100644
--- a/cl/search/tests/tests_es_person.py
+++ b/cl/search/tests/tests_es_person.py
@@ -616,6 +616,7 @@ async def test_results_api_fields(self) -> None:
search_params = {
"type": SEARCH_TYPES.PEOPLE,
"q": f"id:{self.person_2.pk} AND nomination_process:(U.S. Senate)",
+ "order_by": "score desc",
}
# API
r = await self._test_api_results_count(search_params, 1, "API fields")
@@ -662,6 +663,7 @@ def test_results_api_empty_fields(self) -> None:
search_params = {
"type": SEARCH_TYPES.PEOPLE,
"q": f"id:{person.pk}",
+ "order_by": "score desc",
}
# API
r = async_to_sync(self._test_api_results_count)(
@@ -869,6 +871,7 @@ async def test_results_api_highlighted_fields(self) -> None:
"q": f"id:{self.person_2.pk} name:Sheindlin dob_city:Brookyln nomination_process:(U.S. Senate) political_affiliation:Democratic",
"school": "New York Law School",
"dob_state": "NY",
+ "order_by": "score desc",
}
# Judged Search type HL disabled.
diff --git a/cl/tests/cases.py b/cl/tests/cases.py
index 2f0db20e88..8b23dea418 100644
--- a/cl/tests/cases.py
+++ b/cl/tests/cases.py
@@ -270,6 +270,11 @@ async def _compare_field(
set(meta_expected_value.keys()),
f"The keys in field '{meta_field}' do not match.",
)
+ for score_value in meta_value.values():
+ self.assertIsNotNone(
+ score_value, f"The score value can't be None."
+ )
+
else:
self.assertEqual(
meta_value,
@@ -412,7 +417,7 @@ def _test_page_variables(
return next_page, previous_page, current_page
-class RECAPAlertsAssertions:
+class SearchAlertsAssertions:
@staticmethod
def get_html_content_from_email(email_content):
@@ -506,7 +511,9 @@ def _count_alert_hits_and_child_hits(
case_text_cleaned = self.clean_case_title(case_text)
if case_title == case_text_cleaned:
child_hit_count = len(
- case.xpath("following-sibling::ul[1]/li/a")
+ case.xpath(
+ "following-sibling::ul[1]/li/a | following-sibling::ul[1]/li/strong"
+ )
)
self.assertEqual(
child_hit_count,
@@ -535,8 +542,8 @@ def extract_child_descriptions(case_item):
child_documents = case_item.xpath("./following-sibling::ul[1]/li")
results = []
for li in child_documents:
- a_tag = li.xpath(".//a")[0]
- full_text = a_tag.text_content()
+ child_tag = li.xpath(".//a | .//strong")[0]
+ full_text = child_tag.text_content()
first_part = full_text.split("\u2014")[0].strip()
results.append(first_part)
@@ -563,6 +570,7 @@ def _count_webhook_hits_and_child_hits(
expected_hits,
case_title,
expected_child_hits,
+ nested_field="recap_documents",
):
"""Confirm the following assertions for the search alert webhook:
- An specific alert webhook was triggered.
@@ -570,6 +578,8 @@ def _count_webhook_hits_and_child_hits(
- The specified case contains the expected number of child hits.
"""
+ matched_alert_name = None
+ matched_case_title = None
for webhook in webhooks:
if webhook["payload"]["alert"]["name"] == alert_title:
webhook_cases = webhook["payload"]["results"]
@@ -579,14 +589,21 @@ def _count_webhook_hits_and_child_hits(
msg=f"Did not get the right number of hits for the alert %s. "
% alert_title,
)
+ matched_alert_name = True
for case in webhook["payload"]["results"]:
if case_title == strip_tags(case["caseName"]):
+ matched_case_title = True
+ if nested_field is None:
+ self.assertTrue(nested_field not in case)
+ continue
self.assertEqual(
- len(case["recap_documents"]),
+ len(case[nested_field]),
expected_child_hits,
msg=f"Did not get the right number of child documents for the case %s. "
% case_title,
)
+ self.assertTrue(matched_alert_name, msg="Alert name didn't match")
+ self.assertTrue(matched_case_title, msg="Case title didn't match")
def _count_percolator_webhook_hits_and_child_hits(
self,
@@ -651,6 +668,7 @@ def _assert_webhook_hit_hl(
field_name,
hl_expected,
child_field,
+ nested_field="recap_documents",
):
"""Assert Hl in webhook fields."""
for webhook in webhooks:
@@ -659,10 +677,10 @@ def _assert_webhook_hit_hl(
if child_field:
self.assertNotIn(
"score",
- hit["recap_documents"][0]["meta"],
+ hit[nested_field][0]["meta"],
msg="score shouldn't be present on webhook nested documents",
)
- child_field_content = hit["recap_documents"][0][field_name]
+ child_field_content = hit[nested_field][0][field_name]
self.assertIn(
hl_expected,
child_field_content,
Date Argued:
{% if result.dateArgued %}
@@ -116,9 +153,7 @@
- {% endif %}
- {% if type == 'o' or type == 'oa' %}
-