Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

612 Introduced RECAP Search Alerts sweep index #4127

Open
wants to merge 34 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
3e4f269
fix(elasticsearch): Test RECAP nested index reliability
albertisfu Jun 21, 2024
53b3b65
Merge branch 'main' into 612-introduced-recap-search-alerts
albertisfu Jun 21, 2024
2955b0b
fix(alerts): Changed sweep index approach to parent-child documents
albertisfu Jun 22, 2024
9307b77
fix(alerts): Added cl_send_recap_alerts command
albertisfu Jun 25, 2024
9b4e1c1
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jun 25, 2024
8b537f0
fix(alerts): Implemented filtering of RECAP alerts hits for the sweep…
albertisfu Jun 27, 2024
c1232ec
fix(alerts): Updated ES alert email templates to support RECAP Alerts.
albertisfu Jun 28, 2024
3e96f61
Merge branch 'main' into 612-introduced-recap-search-alerts
albertisfu Jun 28, 2024
51c7bb6
fix(alerts): Group alerts and case hits limit
albertisfu Jun 28, 2024
7fc3298
fix(alerts): Trigger RECAP search alerts webhooks
albertisfu Jun 29, 2024
b5016ba
fix(alerts): Schedule wly and mly RECAP Search Alerts
albertisfu Jun 29, 2024
4a128bf
fix(alerts): Copy documents from the main index to the sweep index us…
albertisfu Jul 2, 2024
3a4a456
fix(alerts): Fixed RECAPSweepDocument index mapping
albertisfu Jul 2, 2024
add980a
fix(alerts): Tweak RECAP Alert estimation query to consider both Dock…
albertisfu Jul 3, 2024
a20113f
Merge branch 'main' into 612-introduced-recap-search-alerts
albertisfu Jul 3, 2024
ebf269d
fix(elasticsearch): Fixed build_daterange_query type hint
albertisfu Jul 3, 2024
bffee6d
fix(alerts): Fixed re_index task estimated remaining time compute
albertisfu Jul 3, 2024
847f0fd
fix(alerts): Handle creation and removal of the RECAP alerts sweep in…
albertisfu Jul 3, 2024
4b324c9
fix(elasticsearch): Fixed tests related to timestamp updates
albertisfu Jul 3, 2024
0d63080
fix(alerts): Fix should_docket_hit_be_included date comparison
albertisfu Jul 4, 2024
5b3d130
Merge branch 'main' into 612-introduced-recap-search-alerts
albertisfu Jul 4, 2024
5077e01
fix(alerts): Changed approach to filter out cross-object hits by usin…
albertisfu Jul 10, 2024
9dffbfd
Merge branch 'main' into 612-introduced-recap-search-alerts
albertisfu Jul 10, 2024
a4e4e62
fix(alerts): Added more tests related to filtering cross-object hits.
albertisfu Jul 10, 2024
49dd480
Merge branch 'main' into 612-introduced-recap-search-alerts
albertisfu Jul 10, 2024
a468336
Merge branch 'main' into 612-introduced-recap-search-alerts
albertisfu Jul 19, 2024
38d6884
Merge branch 'main' into 612-introduced-recap-search-alerts
albertisfu Jul 25, 2024
b56f235
fix(alerts): Restore send_es_search_alert_webhook to avoid conflicts …
albertisfu Jul 25, 2024
d102664
fix(alerts): Fixed MLY alerts test can't be sent after the 28th
albertisfu Jul 29, 2024
7977b80
Merge branch 'main' into 612-introduced-recap-search-alerts
albertisfu Sep 26, 2024
57b6df7
fix(alerts): Fixed merge conflicts and adjust test accordingly new RE…
albertisfu Sep 26, 2024
b35ef0a
fix(elasticsearch): Fixed failing test due to build_full_join_es_quer…
albertisfu Sep 27, 2024
8902aa0
fix(alerts): Removed recap_document_hl_matched as we no longer rely o…
albertisfu Sep 27, 2024
d0b1298
Merge branch 'main' into 612-introduced-recap-search-alerts
albertisfu Sep 27, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 112 additions & 0 deletions cl/alerts/management/commands/cl_send_recap_alerts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
import traceback
import datetime

from asgiref.sync import async_to_sync
from django.contrib.auth.models import User
from django.http import QueryDict
from django.utils.timezone import now
from elasticsearch.exceptions import RequestError, TransportError

from cl.lib.command_utils import VerboseCommand, logger
from cl.lib.elasticsearch_utils import do_es_sweep_alert_query
from cl.search.documents import DocketSweepDocument
from cl.search.models import SEARCH_TYPES
from cl.stats.utils import tally_stat
from cl.alerts.tasks import send_search_alert_emails
from cl.alerts.models import Alert
from cl.search.exception import (
BadProximityQuery,
UnbalancedParenthesesQuery,
UnbalancedQuotesQuery,
)
from cl.alerts.utils import recap_document_hl_matched, query_includes_rd_field


def index_daily_recap_documents():
# TODO implement
pass
albertisfu marked this conversation as resolved.
Show resolved Hide resolved

def has_rd_hit_been_triggered():
# TODO implement
return False

def has_docket_hit_been_triggered():
# TODO implement
return True

def query_and_send_alerts(rate):
alert_users = User.objects.filter(alerts__rate=rate).distinct()
alerts_sent_count = 0
now_time = datetime.datetime.now()
for user in alert_users:
alerts = user.alerts.filter(rate=rate)
logger.info(f"Running alerts for user '{user}': {alerts}")

hits = []
alerts_to_update = []
for alert in alerts:
search_params = QueryDict(alert.query.encode(), mutable=True)
includes_rd_fields = query_includes_rd_field(search_params)

try:
search_query = DocketSweepDocument.search()
results, total_hits = do_es_sweep_alert_query(
search_query,
search_params,
)
except (UnbalancedParenthesesQuery,
UnbalancedQuotesQuery,
BadProximityQuery,TransportError, ConnectionError, RequestError):
traceback.print_exc()
logger.info(
f"Search for this alert failed: {alert.query}\n"
)
continue

alerts_to_update.append(alert.pk)
if len(results) > 0:
search_type = search_params.get("type", SEARCH_TYPES.OPINION)
results_to_send = []
for hit in results:
if not includes_rd_fields:
rds_to_send = [rd_hit for rd_hit in hit["child_docs"]
if not recap_document_hl_matched(
rd_hit) and not has_rd_hit_been_triggered()]
if rds_to_send:
hit["child_docs"] = rds_to_send
results_to_send.append(hit)

hits.append(
[alert, search_type, results_to_send, len(results_to_send)]
)
alert.query_run = search_params.urlencode()
alert.date_last_hit = now()
alert.save()

if hits:
send_search_alert_emails.delay([(user.pk, hits)])
alerts_sent_count += 1

# Update Alert's date_last_hit in bulk.
Alert.objects.filter(id__in=alerts_to_update).update(
date_last_hit=now_time
)
async_to_sync(tally_stat)(f"alerts.sent.{rate}", inc=alerts_sent_count)
logger.info(f"Sent {alerts_sent_count} {rate} email alerts.")


def query_and_schedule_wly_and_mly_alerts():
# TODO implement
pass
albertisfu marked this conversation as resolved.
Show resolved Hide resolved


class Command(VerboseCommand):
help = "Send RECAP Search Alerts."

def handle(self, *args, **options):
super().handle(*args, **options)

index_daily_recap_documents()
query_and_send_alerts(Alert.REAL_TIME)
query_and_send_alerts(Alert.DAILY)
query_and_schedule_wly_and_mly_alerts()
Empty file added cl/alerts/tests/__init__.py
Empty file.
File renamed without changes.
190 changes: 190 additions & 0 deletions cl/alerts/tests/tests_recap_alerts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
from unittest import mock

import time_machine
from asgiref.sync import sync_to_async
from django.core import mail
from django.core.management import call_command
from django.utils.timezone import now

from cl.alerts.factories import AlertFactory
from cl.alerts.models import SEARCH_TYPES, Alert
from cl.alerts.utils import query_includes_rd_field, recap_document_hl_matched
from cl.api.factories import WebhookFactory
from cl.api.models import WebhookEventType
from cl.donate.models import NeonMembership
from cl.lib.elasticsearch_utils import do_es_sweep_alert_query
from cl.lib.test_helpers import RECAPSearchTestCase
from cl.search.documents import DocketSweepDocument
from cl.tests.cases import ESIndexTestCase, TestCase
from cl.tests.utils import MockResponse
from cl.users.factories import UserProfileWithParentsFactory


class RECAPAlertsSweepIndexTest(
RECAPSearchTestCase, ESIndexTestCase, TestCase
):
"""
RECAP Alerts Sweep Index Tests
"""

@classmethod
def setUpTestData(cls):
cls.rebuild_index("people_db.Person")
cls.rebuild_index("search.Docket")
cls.mock_date = now().replace(day=15, hour=0)
with time_machine.travel(cls.mock_date, tick=False):
super().setUpTestData()
call_command(
"cl_index_parent_and_child_docs",
search_type=SEARCH_TYPES.RECAP,
queue="celery",
pk_offset=0,
testing_mode=True,
sweep_index=True,
)

cls.user_profile = UserProfileWithParentsFactory()
NeonMembership.objects.create(
level=NeonMembership.LEGACY, user=cls.user_profile.user
)
cls.webhook_enabled = WebhookFactory(
user=cls.user_profile.user,
event_type=WebhookEventType.SEARCH_ALERT,
url="https://example.com/",
enabled=True,
)
cls.search_alert = AlertFactory(
user=cls.user_profile.user,
rate=Alert.REAL_TIME,
name="Test Alert Docket Only",
query='q="401 Civil"&type=r',
)
cls.search_alert_2 = AlertFactory(
user=cls.user_profile.user,
rate=Alert.REAL_TIME,
name="Test Alert RECAP Only",
query='q="Mauris iaculis, leo sit amet hendrerit vehicula"&type=r',
)
cls.search_alert_3 = AlertFactory(
user=cls.user_profile.user,
rate=Alert.DAILY,
name="Test Cross object",
query="q=SUBPOENAS SERVED OFF Mauris iaculis&type=r",
)

async def test_recap_document_hl_matched(self) -> None:
"""Test recap_document_hl_matched method that determines weather a hit
contains RECAPDocument HL fields."""
# Docket-only query
search_params = {
"type": SEARCH_TYPES.RECAP,
"q": '"401 Civil"',
}
search_query = DocketSweepDocument.search()
results, total_hits = await sync_to_async(do_es_sweep_alert_query)(
search_query,
search_params,
)
docket_result = results[0]
for rd in docket_result["child_docs"]:
rd_field_matched = recap_document_hl_matched(rd)
self.assertEqual(rd_field_matched, False)

# RECAPDocument-only query
search_params = {
"type": SEARCH_TYPES.RECAP,
"q": '"Mauris iaculis, leo sit amet hendrerit vehicula"',
}
search_query = DocketSweepDocument.search()
results, total_hits = await sync_to_async(do_es_sweep_alert_query)(
search_query,
search_params,
)
docket_result = results[0]
for rd in docket_result["child_docs"]:
rd_field_matched = recap_document_hl_matched(rd)
self.assertEqual(rd_field_matched, True)

# Cross-object query
search_params = {
"type": SEARCH_TYPES.RECAP,
"q": "SUBPOENAS SERVED OFF Mauris iaculis",
}
search_query = DocketSweepDocument.search()
results, total_hits = await sync_to_async(do_es_sweep_alert_query)(
search_query,
search_params,
)
docket_result = results[0]
for rd in docket_result["child_docs"]:
rd_field_matched = recap_document_hl_matched(rd)
self.assertEqual(rd_field_matched, True)

async def test_query_includes_rd_field(self) -> None:
"""Test query_includes_rd_field method that checks if a query
includes any indexed fields in the query string or filters specific to
RECAP Documents.
"""

# Docket-only query
search_params = {
"type": SEARCH_TYPES.RECAP,
"q": '"401 Civil"',
}
self.assertEqual(query_includes_rd_field(search_params), False)

# RECAPDocument-only query
search_params = {
"type": SEARCH_TYPES.RECAP,
"q": 'description:"lorem ipsum"',
}
self.assertEqual(query_includes_rd_field(search_params), True)

# Cross-object query
search_params = {
"type": SEARCH_TYPES.RECAP,
"q": 'case_name:"American v." description:"lorem ipsum"',
}
self.assertEqual(query_includes_rd_field(search_params), True)

# Docket-only query
search_params = {
"type": SEARCH_TYPES.RECAP,
"q": "",
"case_name": "SUBPOENAS",
}
self.assertEqual(query_includes_rd_field(search_params), False)

# RECAPDocument-only query
search_params = {
"type": SEARCH_TYPES.RECAP,
"q": "",
"description": "Lorem",
}
self.assertEqual(query_includes_rd_field(search_params), True)

# Cross-object query
search_params = {
"type": SEARCH_TYPES.RECAP,
"q": "",
"case_name": "SUBPOENAS",
"document_number": 1,
}
self.assertEqual(query_includes_rd_field(search_params), True)

def test_filter_out_alerts_to_send(self) -> None:
"""Test RECAP alerts hit can be properly filtered out according to
their query and hits matched conditions.
"""

with mock.patch(
"cl.api.webhooks.requests.post",
side_effect=lambda *args, **kwargs: MockResponse(
200, mock_raw=True
),
), time_machine.travel(self.mock_date, tick=False):
call_command("cl_send_recap_alerts")

self.assertEqual(
len(mail.outbox), 2, msg="Outgoing emails don't match."
)
53 changes: 51 additions & 2 deletions cl/alerts/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from django.conf import settings
from django.http import QueryDict
from elasticsearch_dsl import Q, Search
from elasticsearch_dsl.response import Response
from elasticsearch_dsl.response import Hit, Response

from cl.alerts.models import (
SCHEDULED_ALERT_HIT_STATUS,
Expand All @@ -14,9 +14,15 @@
)
from cl.lib.command_utils import logger
from cl.lib.elasticsearch_utils import add_es_highlighting
from cl.lib.types import CleanData
from cl.search.constants import (
ALERTS_HL_TAG,
SEARCH_RECAP_CHILD_HL_FIELDS,
recap_document_filters,
recap_document_indexed_fields,
)
Comment on lines +18 to +24
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We're not using any of these elements. Let's remove them to clean up the imports.

from cl.search.documents import AudioPercolator
from cl.search.models import SEARCH_TYPES, Docket
from cl.users.models import UserProfile


@dataclass
Expand Down Expand Up @@ -138,3 +144,46 @@ def alert_hits_limit_reached(alert_pk: int, user_pk: int) -> bool:
)
return True
return False


def recap_document_hl_matched(rd_hit: Hit) -> bool:
"""Determine whether HL matched a RECAPDocument text field.

:param rd_hit: The ES hit.
:return: True if the hit matched a RECAPDocument field. Otherwise, False.
"""

matched_rd_hl = set()
rd_hl_fields = set(SEARCH_RECAP_CHILD_HL_FIELDS.keys())
if hasattr(rd_hit, "highlight"):
highlights = rd_hit.highlight.to_dict()
matched_rd_hl.update(
hl_key
for hl_key, hl_value in highlights.items()
for hl in hl_value
if f"<{ALERTS_HL_TAG}>" in hl
)
if matched_rd_hl and matched_rd_hl.issubset(rd_hl_fields):
return True
return False


def query_includes_rd_field(query_params: CleanData) -> bool:
"""Determine whether the query includes any indexed fields in the query
string or filters specific to RECAP Documents.

:param query_params: The query parameters.
:return: True if any recap document fields or filters are included in the
query, otherwise False.
"""

query_string = query_params.get("q", "")
for rd_field in recap_document_indexed_fields:
if f"{rd_field}:" in query_string:
return True

for rd_filter in recap_document_filters:
if query_params.get(rd_filter, ""):
return True

return False
Loading
Loading