diff --git a/cl/alerts/management/commands/cl_send_alerts.py b/cl/alerts/management/commands/cl_send_alerts.py index 897b077fea..4792e78fd2 100644 --- a/cl/alerts/management/commands/cl_send_alerts.py +++ b/cl/alerts/management/commands/cl_send_alerts.py @@ -13,17 +13,25 @@ from django.template import loader from django.urls import reverse from django.utils.timezone import now +from elasticsearch_dsl import MultiSearch from elasticsearch_dsl import Q as ES_Q +from elasticsearch_dsl.response import Response from cl.alerts.models import Alert, RealTimeQueue from cl.alerts.utils import InvalidDateError -from cl.api.models import WebhookEventType +from cl.api.models import WebhookEventType, WebhookVersions from cl.api.webhooks import send_search_alert_webhook from cl.lib import search_utils from cl.lib.command_utils import VerboseCommand, logger -from cl.lib.elasticsearch_utils import do_es_api_query +from cl.lib.elasticsearch_utils import ( + do_es_api_query, + limit_inner_hits, + set_child_docs_and_score, + set_results_highlights, +) from cl.lib.scorched_utils import ExtraSolrInterface from cl.lib.search_utils import regroup_snippets +from cl.lib.types import CleanData from cl.search.constants import ALERTS_HL_TAG, SEARCH_ALERTS_OPINION_HL_FIELDS from cl.search.documents import OpinionDocument from cl.search.forms import SearchForm @@ -106,6 +114,59 @@ def send_alert(user_profile, hits): msg.send(fail_silently=False) +def query_alerts_es( + cd: CleanData, v1_webhook: bool = False +) -> tuple[Response, Response | None]: + """Query ES for opinion alerts, optionally handling a V1 webhook query. + + :param cd: A CleanData object containing the query parameters. + :param v1_webhook: A boolean indicating whether to include a V1 webhook query. + :return: A tuple containing the main search response and an optional V1 + query response. + """ + + v1_results = None + search_query = OpinionDocument.search() + cd["highlight"] = True + main_query, _ = do_es_api_query( + search_query, + cd, + SEARCH_ALERTS_OPINION_HL_FIELDS, + ALERTS_HL_TAG, + "v4", + ) + main_query = main_query.extra( + from_=0, + size=settings.SCHEDULED_ALERT_HITS_LIMIT, + ) + multi_search = MultiSearch() + multi_search = multi_search.add(main_query) + + if v1_webhook: + search_query = OpinionDocument.search() + v1_query, _ = do_es_api_query( + search_query, + cd, + SEARCH_ALERTS_OPINION_HL_FIELDS, + ALERTS_HL_TAG, + "v3", + ) + v1_query = v1_query.extra( + from_=0, + size=settings.SCHEDULED_ALERT_HITS_LIMIT, + ) + multi_search = multi_search.add(v1_query) + + responses = multi_search.execute() + results = responses[0] + limit_inner_hits({}, results, cd["type"]) + set_results_highlights(results, cd["type"]) + set_child_docs_and_score(results) + if v1_webhook: + v1_results = responses[1] + return results, v1_results + + class Command(VerboseCommand): help = ( "Sends the alert emails on a real time, daily, weekly or monthly " @@ -152,10 +213,9 @@ def handle(self, *args, **options): if options["rate"] == Alert.REAL_TIME: self.clean_rt_queue() - def run_query(self, alert, rate): + def run_query(self, alert, rate, v1_webhook=False): results = [] - cd = {} - main_params = {} + v1_results = None logger.info(f"Now running the query: {alert.query}\n") # Make a dict from the query string. @@ -175,7 +235,7 @@ def run_query(self, alert, rate): if waffle.switch_is_active("oa-es-alerts-active"): # Return empty results for OA alerts. They are now handled # by Elasticsearch. - return query_type, results + return query_type, results, v1_results logger.info(f"Data sent to SearchForm is: {qd}\n") search_form = SearchForm(qd, is_es_form=self.o_es_alerts) @@ -187,7 +247,7 @@ def run_query(self, alert, rate): and len(self.valid_ids[query_type]) == 0 ): # Bail out. No results will be found if no valid_ids. - return query_type, results + return query_type, results, v1_results main_params = search_utils.build_main_query( cd, @@ -220,19 +280,7 @@ def run_query(self, alert, rate): ) if self.o_es_alerts: - search_query = OpinionDocument.search() - s, _ = do_es_api_query( - search_query, - cd, - SEARCH_ALERTS_OPINION_HL_FIELDS, - ALERTS_HL_TAG, - "v3", - ) - s = s.extra( - from_=0, - size=settings.SCHEDULED_ALERT_HITS_LIMIT, - ) - results = s.execute() + results, v1_results = query_alerts_es(cd, v1_webhook) else: # Ignore warnings from this bit of code. Otherwise, it complains # about the query URL being too long and having to POST it instead @@ -248,7 +296,7 @@ def run_query(self, alert, rate): regroup_snippets(results) logger.info(f"There were {len(results)} results.") - return qd, results + return qd, results, v1_results def send_emails_and_webhooks(self, rate): """Send out an email and webhook events to every user whose alert has a @@ -261,6 +309,13 @@ def send_emails_and_webhooks(self, rate): alerts = user.alerts.filter(rate=rate) logger.info(f"Running alerts for user '{user}': {alerts}") + # Query user's webhooks. + user_webhooks = user.webhooks.filter( + event_type=WebhookEventType.SEARCH_ALERT, enabled=True + ) + v1_webhook = WebhookVersions.v1 in { + webhook.version for webhook in user_webhooks + } if rate == Alert.REAL_TIME: if not user.profile.is_member: continue @@ -268,7 +323,9 @@ def send_emails_and_webhooks(self, rate): hits = [] for alert in alerts: try: - qd, results = self.run_query(alert, rate) + qd, results, v1_results = self.run_query( + alert, rate, v1_webhook + ) except: traceback.print_exc() logger.info( @@ -293,10 +350,13 @@ def send_emails_and_webhooks(self, rate): # Send webhook event if the user has a SEARCH_ALERT # endpoint enabled. - user_webhooks = user.webhooks.filter( - event_type=WebhookEventType.SEARCH_ALERT, enabled=True - ) for user_webhook in user_webhooks: + results = ( + v1_results + if alert.alert_type == SEARCH_TYPES.OPINION + and user_webhook.version == WebhookVersions.v1 + else results + ) send_search_alert_webhook( self.sis[search_type], results, user_webhook, alert ) diff --git a/cl/alerts/templates/alert_email_es.html b/cl/alerts/templates/alert_email_es.html index dc2f797268..7ef71cfcfe 100644 --- a/cl/alerts/templates/alert_email_es.html +++ b/cl/alerts/templates/alert_email_es.html @@ -75,17 +75,56 @@

- - View original: - - {% if result.download_url %} - - From the court - -   |   - {% endif %} + {% endif %} + {% if type == 'o' %} + + {% endif %} + {% if type == 'oa' %} +

+ + View original: + + {% if result.download_url %} + + From the court + +   |   + {% endif %} {% if result.local_path %} {# Provide link to S3. #} @@ -93,8 +132,6 @@

Date Argued: {% if result.dateArgued %} @@ -116,9 +153,7 @@

- {% endif %} - {% if type == 'o' or type == 'oa' %} -

+

{% if result|get_highlight:"text" %} …{{ result|get_highlight:"text"|safe|underscore_to_space }}… {% endif %} diff --git a/cl/alerts/templates/alert_email_es.txt b/cl/alerts/templates/alert_email_es.txt index 2b7ec3b569..8c0324e8f1 100644 --- a/cl/alerts/templates/alert_email_es.txt +++ b/cl/alerts/templates/alert_email_es.txt @@ -16,8 +16,14 @@ View Full Results / Edit this Alert: https://www.courtlistener.com/?{{ alert.que Disable this Alert (one click): https://www.courtlistener.com{% url "disable_alert" alert.secret_key %}{% endif %} {{forloop.counter}}. {{ result.caseName|render_string_or_list|safe|striptags }} ({% if result.court_id != 'scotus' %}{{ result.court_citation_string|render_string_or_list|striptags }} {% endif %}{% if type == 'o' or type == 'r' %}{{ result.dateFiled|date:"Y" }}{% elif type == 'oa' %}{{ result.dateArgued|date:"Y" }}{% endif %}) -{% if type == 'oa' %}{% if result.dateArgued %}Date Argued: {{ result.dateArgued|date:"F jS, Y" }}{% else %}Date Argued: Unknown Date {% endif %}{% if result.docketNumber %} | Docket Number: {{ result.docketNumber|render_string_or_list|safe|striptags }}{% endif %} | Duration: {{ result.duration|naturalduration }}{% if result.judge %} | Judge: {{ result.judge|render_string_or_list|safe|striptags|underscore_to_space }}{% endif %}{% endif %} -{% if type == 'o' or type == 'oa' %}{% if result|get_highlight:"text" %}...{{ result|get_highlight:"text"|safe|striptags|underscore_to_space|compress_whitespace }}...{% endif %}{% endif %} +{% if type == 'oa' %}{% if result.dateArgued %}Date Argued: {{ result.dateArgued|date:"F jS, Y" }}{% else %}Date Argued: Unknown Date {% endif %}{% if result.docketNumber %} | Docket Number: {{ result.docketNumber|render_string_or_list|safe|striptags }}{% endif %} | Duration: {{ result.duration|naturalduration }}{% if result.judge %} | Judge: {{ result.judge|render_string_or_list|safe|striptags|underscore_to_space }}{% endif %} +{% if result|get_highlight:"text" %}...{{ result|get_highlight:"text"|safe|striptags|underscore_to_space|compress_whitespace }}...{% endif %} +{% endif %} +{% if type == 'o' %}{% for doc in result.child_docs %}{% with doc=doc|get_es_doc_content:True %}{% if result.child_docs|length > 1 or doc.type != 'combined-opinion' %}{% if doc.text %}{{ doc.type_text }}{% endif %}{% endif %} + {% if doc.text %}...{{ doc.text|render_string_or_list|safe|striptags|underscore_to_space|compress_whitespace }}...{% endif %} + {% if doc.download_url %} - Download original from the court: {{doc.download_url}}{% endif %} + {% if doc.local_path %} - Download the original from our backup: https://storage.courtlistener.com/{{ doc.local_path }}{% endif %} +{% endwith %}{% endfor %}{% endif %} {% if type == 'r' %}{% if result.dateFiled %}Date Filed: {{ result.dateFiled|date:"F jS, Y" }}{% else %}Date Filed: Unknown Date {% endif %}{% if result.docketNumber %} | Docket Number: {{ result.docketNumber|render_string_or_list|safe|striptags }}{% endif %} {% for doc in result.child_docs %}{% with doc=doc|get_es_doc_content:scheduled_alert %} - {% if doc.short_description %}{{ doc.short_description|render_string_or_list|safe|striptags }} - {% endif %}Document #{% if doc.document_number %}{{ doc.document_number }}{% endif %}{% if doc.attachment_number %}, Attachment #{{ doc.attachment_number }}{% endif %} {% if doc.description %}Description: {{ doc.description|render_string_or_list|safe|striptags }}{% endif %} @@ -27,9 +33,8 @@ Disable this Alert (one click): https://www.courtlistener.com{% url "disable_ale {% if result.child_docs and result.child_remaining %}{% extract_q_value alert.query_run as q_value %}View Additional Results for this Case: https://www.courtlistener.com/?type={{ type|urlencode }}&q={% if q_value %}({{ q_value|urlencode }})%20AND%20{% endif %}docket_id%3A{{ result.docket_id|urlencode }}{% endif %} {% endif %}~~~~~ - View this item on our site: https://www.courtlistener.com{% if type == 'r' %}{{result.docket_absolute_url}}{% else %}{{result.absolute_url}}{% endif %} -{% if result.download_url %} - Download original from the court: {{result.download_url}} -{% endif %}{% if result.local_path %} - Download the original from our backup: https://storage.courtlistener.com/{{ result.local_path }}{% endif %}{% endfor %} - +{% if type == 'oa' %}{% if result.download_url %} - Download original from the court: {{result.download_url}} +{% endif %}{% if result.local_path %} - Download the original from our backup: https://storage.courtlistener.com/{{ result.local_path }}{% endif %}{% endif %}{% endfor %} {% endfor %} ************************ This alert brought to you by the 501(c)(3) non-profit Free Law Project diff --git a/cl/alerts/tests/tests.py b/cl/alerts/tests/tests.py index 82b5f3c952..990658c6a3 100644 --- a/cl/alerts/tests/tests.py +++ b/cl/alerts/tests/tests.py @@ -48,6 +48,7 @@ Webhook, WebhookEvent, WebhookEventType, + WebhookVersions, ) from cl.api.utils import get_webhook_deprecation_date from cl.audio.factories import AudioWithParentsFactory @@ -74,10 +75,14 @@ Opinion, RECAPDocument, ) -from cl.search.tasks import add_items_to_solr from cl.stats.models import Stat from cl.tests.base import SELENIUM_TIMEOUT, BaseSeleniumTest -from cl.tests.cases import APITestCase, ESIndexTestCase, TestCase +from cl.tests.cases import ( + APITestCase, + ESIndexTestCase, + SearchAlertsAssertions, + TestCase, +) from cl.tests.utils import MockResponse, make_client from cl.users.factories import UserFactory, UserProfileWithParentsFactory from cl.users.models import EmailSent @@ -565,7 +570,9 @@ async def test_alert_update(self) -> None: @override_switch("o-es-alerts-active", active=True) @mock.patch("cl.search.tasks.percolator_alerts_models_supported", new=[Audio]) -class SearchAlertsWebhooksTest(ESIndexTestCase, TestCase): +class SearchAlertsWebhooksTest( + ESIndexTestCase, TestCase, SearchAlertsAssertions +): """Test Search Alerts Webhooks""" @classmethod @@ -582,6 +589,7 @@ def setUpTestData(cls): event_type=WebhookEventType.SEARCH_ALERT, url="https://example.com/", enabled=True, + version=2, ) cls.webhook_enabled_1 = WebhookFactory( user=cls.user_profile_1.user, @@ -648,6 +656,7 @@ def setUpTestData(cls): event_type=WebhookEventType.SEARCH_ALERT, url="https://example.com/", enabled=True, + version=1, ) cls.search_alert_3 = AlertFactory( user=cls.user_profile_3.user, @@ -782,7 +791,7 @@ def test_send_search_alert_webhooks(self): len(mail.outbox), 4, msg="Outgoing emails don't match." ) - # Opinion email alert assertions + # First Opinion email alert assertions search_alert self.assertEqual(mail.outbox[0].to[0], self.user_profile.user.email) # Plain text assertions opinion_alert_content = mail.outbox[0].body @@ -794,18 +803,36 @@ def test_send_search_alert_webhooks(self): opinion_alert_content, ) self.assertIn("California vs Lorem", opinion_alert_content) - self.assertIn("california sit amet", opinion_alert_content) + self.assertIn( + "california sit amet", + opinion_alert_content, + msg="Alert content didn't match", + ) self.assertIn(self.dly_opinion_2.download_url, opinion_alert_content) self.assertIn( str(self.dly_opinion_2.local_path), opinion_alert_content ) - html_content = None - for content, content_type in mail.outbox[0].alternatives: - if content_type == "text/html": - html_content = content - break + html_content = self.get_html_content_from_email(mail.outbox[0]) # HTML assertions + self._count_alert_hits_and_child_hits( + html_content, + self.search_alert.name, + 1, + self.dly_opinion.cluster.case_name, + 2, + ) + + self._assert_child_hits_content( + html_content, + self.search_alert.name, + self.dly_opinion.cluster.case_name, + [ + self.dly_opinion.get_type_display(), + self.dly_opinion_2.get_type_display(), + ], + ) + self.assertIn("had 1 hit", html_content) self.assertIn( self.dly_opinion_2.cluster.docket.court.citation_string.replace( @@ -831,7 +858,27 @@ def test_send_search_alert_webhooks(self): mail.outbox[0].extra_headers["List-Unsubscribe"], ) - # Second Opinion alert + # Second Opinion alert search_alert_2 + html_content = self.get_html_content_from_email(mail.outbox[1]) + # HTML assertions + self._count_alert_hits_and_child_hits( + html_content, + self.search_alert.name, + 1, + self.dly_opinion.cluster.case_name, + 2, + ) + + self._assert_child_hits_content( + html_content, + self.search_alert.name, + self.dly_opinion.cluster.case_name, + [ + self.dly_opinion.get_type_display(), + self.dly_opinion_2.get_type_display(), + ], + ) + self.assertEqual(mail.outbox[1].to[0], self.user_profile_2.user.email) self.assertIn("daily opinion alert", mail.outbox[1].body) self.assertEqual( @@ -846,6 +893,24 @@ def test_send_search_alert_webhooks(self): mail.outbox[1].extra_headers["List-Unsubscribe"], ) + # Third Opinion alert search_alert_3 + html_content = self.get_html_content_from_email(mail.outbox[2]) + # HTML assertions + self._count_alert_hits_and_child_hits( + html_content, + self.search_alert_3.name, + 1, + self.dly_opinion.cluster.case_name, + 1, + ) + + self._assert_child_hits_content( + html_content, + self.search_alert_3.name, + self.dly_opinion.cluster.case_name, + [self.dly_opinion.get_type_display()], + ) + # Oral Argument Alert self.assertEqual(mail.outbox[3].to[0], self.user_profile.user.email) self.assertIn("daily oral argument alert ", mail.outbox[3].body) @@ -861,8 +926,11 @@ def test_send_search_alert_webhooks(self): mail.outbox[3].extra_headers["List-Unsubscribe"], ) - # Two webhook events should be sent, both of them to user_profile user - webhook_events = WebhookEvent.objects.all() + # 3 webhook events should be sent, 2 user_profile user and 1 user_profile_3 + webhook_events = WebhookEvent.objects.filter().values_list( + "content", flat=True + ) + self.assertEqual( len(webhook_events), 3, msg="Webhook events don't match." ) @@ -885,7 +953,46 @@ def test_send_search_alert_webhooks(self): }, } - for webhook_sent in webhook_events: + # Assert V2 Opinion Search Alerts Webhook + self._count_webhook_hits_and_child_hits( + list(webhook_events), + self.search_alert.name, + 1, + self.dly_opinion.cluster.case_name, + 2, + "opinions", + ) + + # Assert HL content in V2 webhooks. + self._assert_webhook_hit_hl( + webhook_events, + self.search_alert.name, + "caseName", + "California vs Lorem", + child_field=False, + nested_field="opinions", + ) + self._assert_webhook_hit_hl( + webhook_events, + self.search_alert.name, + "snippet", + "Lorem dolor california sit amet, consectetur adipiscing elit.", + child_field=True, + nested_field="opinions", + ) + + # Assert V1 Opinion Search Alerts Webhook + self._count_webhook_hits_and_child_hits( + list(webhook_events), + self.search_alert_3.name, + 1, + self.dly_opinion.cluster.case_name, + 0, + None, + ) + + webhook_events_instances = WebhookEvent.objects.all() + for webhook_sent in webhook_events_instances: with self.subTest(webhook_sent=webhook_sent): self.assertEqual( webhook_sent.event_status, @@ -893,6 +1000,7 @@ def test_send_search_alert_webhooks(self): msg="The event status doesn't match.", ) content = webhook_sent.content + alert_data_compare = alert_data[ content["payload"]["alert"]["id"] ] @@ -926,14 +1034,13 @@ def test_send_search_alert_webhooks(self): if ( content["payload"]["alert"]["alert_type"] == SEARCH_TYPES.OPINION - ): + ) and webhook_sent.webhook.version == WebhookVersions.v1: # Assert the number of keys in the Opinions Search Webhook # payload keys_count = len(content["payload"]["results"][0]) self.assertEqual( keys_count, len(opinion_v3_search_api_keys) ) - # Iterate through all the opinion fields and compare them. for ( field, @@ -951,7 +1058,10 @@ def test_send_search_alert_webhooks(self): expected_value, f"Field '{field}' does not match.", ) - else: + elif ( + content["payload"]["alert"]["alert_type"] + == SEARCH_TYPES.ORAL_ARGUMENT + ): # Assertions for OA webhook payload. self.assertEqual( content["payload"]["results"][0]["caseName"], diff --git a/cl/alerts/tests/tests_recap_alerts.py b/cl/alerts/tests/tests_recap_alerts.py index c40aa3b349..e4610cffd9 100644 --- a/cl/alerts/tests/tests_recap_alerts.py +++ b/cl/alerts/tests/tests_recap_alerts.py @@ -51,7 +51,7 @@ from cl.search.models import Docket from cl.search.tasks import index_docket_parties_in_es from cl.stats.models import Stat -from cl.tests.cases import ESIndexTestCase, RECAPAlertsAssertions, TestCase +from cl.tests.cases import ESIndexTestCase, SearchAlertsAssertions, TestCase from cl.tests.utils import MockResponse from cl.users.factories import UserProfileWithParentsFactory @@ -61,7 +61,7 @@ return_value="alert_hits_sweep", ) class RECAPAlertsSweepIndexTest( - RECAPSearchTestCase, ESIndexTestCase, TestCase, RECAPAlertsAssertions + RECAPSearchTestCase, ESIndexTestCase, TestCase, SearchAlertsAssertions ): """ RECAP Alerts Sweep Index Tests @@ -627,6 +627,18 @@ def test_filter_out_alerts_to_send_by_query_and_hits( alert_de.docket.case_name, [rd_2.description], ) + webhook_events = WebhookEvent.objects.all().values_list( + "content", flat=True + ) + # Assert webhook event child hits. + self._count_webhook_hits_and_child_hits( + list(webhook_events), + cross_object_alert.name, + 1, + alert_de.docket.case_name, + 1, + ) + # Assert email text version: txt_email = mail.outbox[4].body self.assertIn(cross_object_alert.name, txt_email) @@ -1712,7 +1724,7 @@ def test_percolator_plus_sweep_alerts_integration( return_value="alert_hits_percolator", ) class RECAPAlertsPercolatorTest( - RECAPSearchTestCase, ESIndexTestCase, TestCase, RECAPAlertsAssertions + RECAPSearchTestCase, ESIndexTestCase, TestCase, SearchAlertsAssertions ): """ RECAP Alerts Percolator Tests diff --git a/cl/api/tasks.py b/cl/api/tasks.py index ec1c5971ac..39c5fe7533 100644 --- a/cl/api/tasks.py +++ b/cl/api/tasks.py @@ -1,5 +1,4 @@ import json -from collections import defaultdict from typing import Any from elasticsearch_dsl.response import Hit @@ -12,13 +11,14 @@ from cl.api.webhooks import send_webhook_event from cl.celery_init import app from cl.corpus_importer.api_serializers import DocketEntrySerializer -from cl.lib.elasticsearch_utils import merge_highlights_into_result +from cl.lib.elasticsearch_utils import set_child_docs_and_score from cl.search.api_serializers import ( RECAPESWebhookResultSerializer, V3OAESResultSerializer, ) from cl.search.api_utils import ResultObject from cl.search.models import SEARCH_TYPES, DocketEntry +from cl.search.types import ESDictDocument @app.task() @@ -127,7 +127,7 @@ def send_es_search_alert_webhook( @app.task() def send_search_alert_webhook_es( - results: list[dict[str, Any]] | list[Hit], + results: list[ESDictDocument] | list[Hit], webhook_pk: int, alert_pk: int, ) -> None: @@ -152,34 +152,7 @@ def send_search_alert_webhook_es( es_results, many=True ).data case SEARCH_TYPES.RECAP: - for result in results: - child_result_objects = [] - child_docs = None - if isinstance(result, dict): - child_docs = result.get("child_docs") - elif hasattr(result, "child_docs"): - child_docs = result.child_docs - - if child_docs: - for child_doc in child_docs: - if isinstance(result, dict): - child_result_objects.append(child_doc) - else: - child_result_objects.append( - defaultdict( - lambda: None, - child_doc["_source"].to_dict(), - ) - ) - - result["child_docs"] = child_result_objects - # Merge HL into the parent document from percolator response. - if isinstance(result, dict): - meta_hl = result.get("meta", {}).get("highlight", {}) - merge_highlights_into_result( - meta_hl, - result, - ) + set_child_docs_and_score(results, merge_highlights=True) serialized_results = RECAPESWebhookResultSerializer( results, many=True ).data diff --git a/cl/api/webhooks.py b/cl/api/webhooks.py index f6ca97d9e3..15f1d3cabf 100644 --- a/cl/api/webhooks.py +++ b/cl/api/webhooks.py @@ -13,7 +13,12 @@ ) from cl.alerts.models import Alert from cl.alerts.utils import OldAlertReport -from cl.api.models import Webhook, WebhookEvent, WebhookEventType +from cl.api.models import ( + Webhook, + WebhookEvent, + WebhookEventType, + WebhookVersions, +) from cl.api.utils import ( generate_webhook_key_content, update_webhook_event_after_request, @@ -23,6 +28,7 @@ from cl.recap.api_serializers import PacerFetchQueueSerializer from cl.recap.models import PROCESSING_STATUS, PacerFetchQueue from cl.search.api_serializers import ( + OpinionClusterWebhookResultSerializer, SearchResultSerializer, V3OpinionESResultSerializer, ) @@ -192,10 +198,17 @@ def send_search_alert_webhook( ).data else: # ES results serialization - serialized_results = V3OpinionESResultSerializer( - results, - many=True, - ).data + match webhook.version: + case WebhookVersions.v1: + serialized_results = V3OpinionESResultSerializer( + results, + many=True, + ).data + case WebhookVersions.v2: + serialized_results = OpinionClusterWebhookResultSerializer( + results, + many=True, + ).data post_content = { "webhook": generate_webhook_key_content(webhook), diff --git a/cl/lib/elasticsearch_utils.py b/cl/lib/elasticsearch_utils.py index 92acd4ca61..1b9366f713 100644 --- a/cl/lib/elasticsearch_utils.py +++ b/cl/lib/elasticsearch_utils.py @@ -4,6 +4,7 @@ import re import time import traceback +from collections import defaultdict from copy import deepcopy from dataclasses import fields from functools import reduce, wraps @@ -1281,6 +1282,7 @@ def build_es_base_query( mlt_query, child_highlighting=child_highlighting, api_version=api_version, + alerts=alerts, ) ) @@ -2964,9 +2966,10 @@ def do_es_api_query( child documents. """ + alerts = True if hl_tag == ALERTS_HL_TAG else False try: es_queries = build_es_base_query( - search_query, cd, cd["highlight"], api_version + search_query, cd, cd["highlight"], api_version, alerts=alerts ) s = es_queries.search_query child_docs_query = es_queries.child_query @@ -3047,7 +3050,7 @@ def do_es_api_query( # parameters as in the frontend. Only switch highlighting according # to the user request. main_query = add_es_highlighting( - s, cd, highlighting=cd["highlight"] + s, cd, alerts=alerts, highlighting=cd["highlight"] ) return main_query, child_docs_query @@ -3311,3 +3314,46 @@ def simplify_estimated_count(search_count: int) -> int: zeroes = (len(search_count_str) - 2) * "0" return int(first_two + zeroes) return search_count + + +def set_child_docs_and_score( + results: list[Hit] | list[dict[str, Any]] | Response, + merge_highlights: bool = False, + merge_score: bool = False, +) -> None: + """Process and attach child documents to the main search results. + + :param results: A list of search results, which can be ES Hit objects + or a list of dicts. + :param merge_highlights: A boolean indicating whether to merge + highlight data into the results. + :param merge_score: A boolean indicating whether to merge + the BM25 score into the results. + :return: None. Results are modified in place. + """ + + for result in results: + result_is_dict = isinstance(result, dict) + if result_is_dict: + # If the result is a dictionary, do nothing, or assign [] to + # child_docs if it is not present. + child_docs = result.get("child_docs", []) + result["child_docs"] = child_docs + else: + # Process child hits if the result is an ES AttrDict instance, + # so they can be properly serialized. + child_docs = getattr(result, "child_docs", []) + result["child_docs"] = [ + defaultdict(lambda: None, doc["_source"].to_dict()) + for doc in child_docs + ] + + # Optionally merges highlights. Used for integrating percolator + # highlights into the percolated document. + if merge_highlights and result_is_dict: + meta_hl = result.get("meta", {}).get("highlight", {}) + merge_highlights_into_result(meta_hl, result) + + # Optionally merges the BM25 score for display in the API. + if merge_score and isinstance(result, AttrDict): + result["bm25_score"] = result.meta.score diff --git a/cl/search/api_serializers.py b/cl/search/api_serializers.py index f27053e95d..31752a79af 100644 --- a/cl/search/api_serializers.py +++ b/cl/search/api_serializers.py @@ -619,7 +619,7 @@ class Meta: ) -class OpinionClusterESResultSerializer(MainMetaMixin, DocumentSerializer): +class OpinionClusterBaseESResultSerializer(DocumentSerializer): """The serializer for OpinionCluster Search results.""" opinions = OpinionDocumentESResultSerializer( @@ -649,6 +649,20 @@ class Meta: ) +class OpinionClusterESResultSerializer( + OpinionClusterBaseESResultSerializer, MainMetaMixin +): + """The serializer for OpinionCluster Search results.""" + + +class OpinionClusterWebhookResultSerializer( + OpinionClusterBaseESResultSerializer +): + """The serializer class for OpinionCluster search Webhooks results.""" + + meta = BaseMetaDataSerializer(source="*", read_only=True) + + class PositionESResultSerializer(ChildMetaMixin, DocumentSerializer): """The serializer for Positions Search results.""" diff --git a/cl/search/api_utils.py b/cl/search/api_utils.py index 09afbd2653..f5c22e388a 100644 --- a/cl/search/api_utils.py +++ b/cl/search/api_utils.py @@ -19,6 +19,7 @@ do_es_api_query, limit_inner_hits, merge_unavailable_fields_on_parent_document, + set_child_docs_and_score, set_results_highlights, ) from cl.lib.scorched_utils import ExtraSolrInterface @@ -474,18 +475,7 @@ def process_results(self, results: Response) -> None: "v4", self.clean_data["highlight"], ) - for result in results: - child_result_objects = [] - if hasattr(result, "child_docs"): - for child_doc in result.child_docs: - child_result_objects.append( - defaultdict( - lambda: None, child_doc["_source"].to_dict() - ) - ) - result["child_docs"] = child_result_objects - # Include the ES main document score as bm25_score. - result["bm25_score"] = result.meta.score + set_child_docs_and_score(results, merge_score=True) if self.reverse: # If doing backward pagination, reverse the results of the current diff --git a/cl/search/tests/tests_es_person.py b/cl/search/tests/tests_es_person.py index 6c59b01cf6..eb82285286 100644 --- a/cl/search/tests/tests_es_person.py +++ b/cl/search/tests/tests_es_person.py @@ -616,6 +616,7 @@ async def test_results_api_fields(self) -> None: search_params = { "type": SEARCH_TYPES.PEOPLE, "q": f"id:{self.person_2.pk} AND nomination_process:(U.S. Senate)", + "order_by": "score desc", } # API r = await self._test_api_results_count(search_params, 1, "API fields") @@ -662,6 +663,7 @@ def test_results_api_empty_fields(self) -> None: search_params = { "type": SEARCH_TYPES.PEOPLE, "q": f"id:{person.pk}", + "order_by": "score desc", } # API r = async_to_sync(self._test_api_results_count)( @@ -869,6 +871,7 @@ async def test_results_api_highlighted_fields(self) -> None: "q": f"id:{self.person_2.pk} name:Sheindlin dob_city:Brookyln nomination_process:(U.S. Senate) political_affiliation:Democratic", "school": "New York Law School", "dob_state": "NY", + "order_by": "score desc", } # Judged Search type HL disabled. diff --git a/cl/tests/cases.py b/cl/tests/cases.py index 2f0db20e88..8b23dea418 100644 --- a/cl/tests/cases.py +++ b/cl/tests/cases.py @@ -270,6 +270,11 @@ async def _compare_field( set(meta_expected_value.keys()), f"The keys in field '{meta_field}' do not match.", ) + for score_value in meta_value.values(): + self.assertIsNotNone( + score_value, f"The score value can't be None." + ) + else: self.assertEqual( meta_value, @@ -412,7 +417,7 @@ def _test_page_variables( return next_page, previous_page, current_page -class RECAPAlertsAssertions: +class SearchAlertsAssertions: @staticmethod def get_html_content_from_email(email_content): @@ -506,7 +511,9 @@ def _count_alert_hits_and_child_hits( case_text_cleaned = self.clean_case_title(case_text) if case_title == case_text_cleaned: child_hit_count = len( - case.xpath("following-sibling::ul[1]/li/a") + case.xpath( + "following-sibling::ul[1]/li/a | following-sibling::ul[1]/li/strong" + ) ) self.assertEqual( child_hit_count, @@ -535,8 +542,8 @@ def extract_child_descriptions(case_item): child_documents = case_item.xpath("./following-sibling::ul[1]/li") results = [] for li in child_documents: - a_tag = li.xpath(".//a")[0] - full_text = a_tag.text_content() + child_tag = li.xpath(".//a | .//strong")[0] + full_text = child_tag.text_content() first_part = full_text.split("\u2014")[0].strip() results.append(first_part) @@ -563,6 +570,7 @@ def _count_webhook_hits_and_child_hits( expected_hits, case_title, expected_child_hits, + nested_field="recap_documents", ): """Confirm the following assertions for the search alert webhook: - An specific alert webhook was triggered. @@ -570,6 +578,8 @@ def _count_webhook_hits_and_child_hits( - The specified case contains the expected number of child hits. """ + matched_alert_name = None + matched_case_title = None for webhook in webhooks: if webhook["payload"]["alert"]["name"] == alert_title: webhook_cases = webhook["payload"]["results"] @@ -579,14 +589,21 @@ def _count_webhook_hits_and_child_hits( msg=f"Did not get the right number of hits for the alert %s. " % alert_title, ) + matched_alert_name = True for case in webhook["payload"]["results"]: if case_title == strip_tags(case["caseName"]): + matched_case_title = True + if nested_field is None: + self.assertTrue(nested_field not in case) + continue self.assertEqual( - len(case["recap_documents"]), + len(case[nested_field]), expected_child_hits, msg=f"Did not get the right number of child documents for the case %s. " % case_title, ) + self.assertTrue(matched_alert_name, msg="Alert name didn't match") + self.assertTrue(matched_case_title, msg="Case title didn't match") def _count_percolator_webhook_hits_and_child_hits( self, @@ -651,6 +668,7 @@ def _assert_webhook_hit_hl( field_name, hl_expected, child_field, + nested_field="recap_documents", ): """Assert Hl in webhook fields.""" for webhook in webhooks: @@ -659,10 +677,10 @@ def _assert_webhook_hit_hl( if child_field: self.assertNotIn( "score", - hit["recap_documents"][0]["meta"], + hit[nested_field][0]["meta"], msg="score shouldn't be present on webhook nested documents", ) - child_field_content = hit["recap_documents"][0][field_name] + child_field_content = hit[nested_field][0][field_name] self.assertIn( hl_expected, child_field_content,