diff --git a/learning_resources_search/management/commands/prune_subscription_queries.py b/learning_resources_search/management/commands/prune_subscription_queries.py new file mode 100644 index 0000000000..c4007dc0fd --- /dev/null +++ b/learning_resources_search/management/commands/prune_subscription_queries.py @@ -0,0 +1,15 @@ +"""Management command to update learning resource content""" + +from django.core.management.base import BaseCommand + +from learning_resources_search.utils import prune_channel_subscriptions + + +class Command(BaseCommand): + """ + Removes duplicate Percolate Queries and consolidates users + to the real instance when search parameters change + """ + + def handle(self, **options): # noqa: ARG002 + prune_channel_subscriptions() diff --git a/learning_resources_search/utils.py b/learning_resources_search/utils.py index 901f90dffc..2e0632e415 100644 --- a/learning_resources_search/utils.py +++ b/learning_resources_search/utils.py @@ -1,6 +1,66 @@ +import logging +import urllib + from opensearch_dsl import Search +from channels.models import Channel from learning_resources.hooks import get_plugin_manager +from learning_resources_search.constants import LEARNING_RESOURCE +from learning_resources_search.models import PercolateQuery + +log = logging.getLogger() + + +def prune_channel_subscriptions(): + from learning_resources_search.api import ( + adjust_original_query_for_percolate, + adjust_query_for_percolator, + ) + from learning_resources_search.serializers import ( + PercolateQuerySubscriptionRequestSerializer, + ) + + for channel in Channel.objects.all(): + query_string = channel.search_filter + if not query_string: + continue + percolate_serializer = PercolateQuerySubscriptionRequestSerializer( + data=urllib.parse.parse_qs(query_string) + ) + percolate_serializer.is_valid() + adjusted_original_query = adjust_original_query_for_percolate( + percolate_serializer.get_search_request_data() + | {"endpoint": LEARNING_RESOURCE} + ) + actual_query, _ = PercolateQuery.objects.get_or_create( + source_type=PercolateQuery.CHANNEL_SUBSCRIPTION_TYPE, + original_query=adjusted_original_query, + query=adjust_query_for_percolator(adjusted_original_query), + ) + queries = PercolateQuery.objects.filter( + original_query__contains=urllib.parse.parse_qs(query_string), + source_type=PercolateQuery.CHANNEL_SUBSCRIPTION_TYPE, + ) + + duplicates = [q for q in queries if q.original_query != adjusted_original_query] + if actual_query: + for dup in duplicates: + if ( + dup.source_type == actual_query.source_type + and dup.source_channel() == channel + and dup.original_query != actual_query.original_query + ): + for user in dup.users.all(): + dup.users.remove(user) + dup.save() + actual_query.users.add(user) + msg = ( + f"removing duplicate percolate query ({dup.id}) " + f"for channel {channel.title}" + ) + log.info(msg) + dup.delete() + actual_query.save() def remove_child_queries(query): diff --git a/learning_resources_search/utils_test.py b/learning_resources_search/utils_test.py new file mode 100644 index 0000000000..8a20d52452 --- /dev/null +++ b/learning_resources_search/utils_test.py @@ -0,0 +1,139 @@ +import json +import urllib + +import factory +import pytest +from django.db.models import signals +from django.urls import reverse + +from channels.factories import ChannelFactory +from learning_resources_search.factories import PercolateQueryFactory +from learning_resources_search.models import PercolateQuery +from learning_resources_search.utils import prune_channel_subscriptions +from main.factories import UserFactory + + +@pytest.fixture +def mocked_api(mocker): + """Mock object that patches the channels API""" + return mocker.patch("learning_resources_search.tasks.api") + + +@factory.django.mute_signals(signals.post_delete, signals.post_save) +@pytest.mark.django_db +def test_prune_channel_subscriptions(mocked_api, mocker, client, user): + """ + Test that duplicate percolate queries for a channel are consolidated + and the users are migrated to the real instance + """ + channel = ChannelFactory.create(search_filter="offered_by=mitx") + query_string = channel.search_filter + client.force_login(user) + params = urllib.parse.parse_qs(query_string) + params["source_type"] = PercolateQuery.CHANNEL_SUBSCRIPTION_TYPE + sub_url = reverse("lr_search:v1:learning_resources_user_subscription-subscribe") + assert user.percolate_queries.count() == 0 + client.post(sub_url, json.dumps(params), content_type="application/json") + assert user.percolate_queries.count() == 1 + + percolate_query = user.percolate_queries.first() + + duplicate_query_a = percolate_query.original_query.copy() + duplicate_query_a["yearly_decay_percent"] = None + duplicate_query_b = percolate_query.original_query.copy() + duplicate_query_b["foo"] = None + duplicate_percolate_a = PercolateQueryFactory.create( + source_type=PercolateQuery.CHANNEL_SUBSCRIPTION_TYPE, + original_query=duplicate_query_a, + ) + + duplicate_percolate_b = PercolateQueryFactory.create( + source_type=PercolateQuery.CHANNEL_SUBSCRIPTION_TYPE, + original_query=duplicate_query_b, + ) + + duplicate_percolate_a.users.set(UserFactory.create_batch(7)) + duplicate_percolate_a.save() + duplicate_percolate_b.save() + duplicate_percolate_b.users.set(UserFactory.create_batch(3)) + prune_channel_subscriptions() + channel_percolate_queries = PercolateQuery.objects.filter( + source_type=PercolateQuery.CHANNEL_SUBSCRIPTION_TYPE, + original_query=percolate_query.original_query, + ) + assert channel_percolate_queries.count() == 1 + assert channel_percolate_queries.first().users.count() == 11 + + +@factory.django.mute_signals(signals.post_delete, signals.post_save) +@pytest.mark.django_db +def test_new_channel_percolate_query_is_created(mocked_api, mocker, client, user): + """ + Test that running the prune command generates percolate current query instances for channels + that dont already have them or are out of date + """ + assert ( + PercolateQuery.objects.filter( + source_type=PercolateQuery.CHANNEL_SUBSCRIPTION_TYPE, + ).count() + == 0 + ) + ChannelFactory.create(search_filter="offered_by=ocw") + channel = ChannelFactory.create(search_filter="offered_by=mitx") + prune_channel_subscriptions() + assert ( + PercolateQuery.objects.filter( + source_type=PercolateQuery.CHANNEL_SUBSCRIPTION_TYPE, + ).count() + == 2 + ) + query_string = channel.search_filter + client.force_login(user) + params = urllib.parse.parse_qs(query_string) + params["source_type"] = PercolateQuery.CHANNEL_SUBSCRIPTION_TYPE + sub_url = reverse("lr_search:v1:learning_resources_user_subscription-subscribe") + + client.post(sub_url, json.dumps(params), content_type="application/json") + assert ( + PercolateQuery.objects.filter( + source_type=PercolateQuery.CHANNEL_SUBSCRIPTION_TYPE, + ).count() + == 2 + ) + + +@factory.django.mute_signals(signals.post_delete, signals.post_save) +@pytest.mark.django_db +def test_prune_subscription_on_empty_channel_search_filter( + mocked_api, mocker, client, user +): + """ + Test that running the prune command generates percolate current query instances for channels + that dont already have them or are out of date + """ + assert ( + PercolateQuery.objects.filter( + source_type=PercolateQuery.CHANNEL_SUBSCRIPTION_TYPE, + ).count() + == 0 + ) + query_string = "offered_by=mitx" + ChannelFactory.create(search_filter="") + ChannelFactory.create(search_filter=query_string) + client.force_login(user) + params = urllib.parse.parse_qs(query_string) + params["source_type"] = PercolateQuery.CHANNEL_SUBSCRIPTION_TYPE + sub_url = reverse("lr_search:v1:learning_resources_user_subscription-subscribe") + client.post(sub_url, json.dumps(params), content_type="application/json") + params = urllib.parse.parse_qs("") + params["source_type"] = PercolateQuery.CHANNEL_SUBSCRIPTION_TYPE + sub_url = reverse("lr_search:v1:learning_resources_user_subscription-subscribe") + client.post(sub_url, json.dumps(params), content_type="application/json") + prune_channel_subscriptions() + assert ( + PercolateQuery.objects.filter( + source_type=PercolateQuery.CHANNEL_SUBSCRIPTION_TYPE, + ).count() + == 2 + ) + assert user.percolate_queries.count() == 2 diff --git a/scripts/heroku-release-phase.sh b/scripts/heroku-release-phase.sh index 3fba3bb4f2..28e02c1caa 100755 --- a/scripts/heroku-release-phase.sh +++ b/scripts/heroku-release-phase.sh @@ -17,5 +17,8 @@ python $MANAGE_FILE showmigrations --list 2>&1 | indent python $MANAGE_FILE migrate --noinput 2>&1 | indent RUN_DATA_MIGRATIONS=true python $MANAGE_FILE migrate --noinput 2>&1 | indent +# consolidate user subscriptions and remove duplicate percolate instances +python $MANAGE_FILE prune_subscription_queries 2>&1 | indent + echo "-----> Generating cache tables" python $MANAGE_FILE createcachetable 2>&1 | indent diff --git a/scripts/run-django-dev.sh b/scripts/run-django-dev.sh index 5e2e94cac9..89820d8a85 100755 --- a/scripts/run-django-dev.sh +++ b/scripts/run-django-dev.sh @@ -11,4 +11,7 @@ RUN_DATA_MIGRATIONS=true python3 manage.py migrate --noinput echo "Loading fixtures!" python3 manage.py loaddata platforms schools departments offered_by +# consolidate user subscriptions and remove duplicate percolate instances +python $MANAGE_FILE prune_subscription_queries 2>&1 | indent + uwsgi uwsgi.ini --honour-stdin