From df2adc2af70f92ec9e6cc3b11099925fad02962c Mon Sep 17 00:00:00 2001 From: Nico Virkki Date: Thu, 21 Nov 2024 16:15:42 +0200 Subject: [PATCH] feat: add command for removing old user data Command is supposed to be run in maintenance runs e.g cron jobs for cleaning the old user data. Refs KER-398 --- .../management/commands/remove_user_data.py | 53 +++++ .../test_remove_user_data_command.py | 216 ++++++++++++++++++ kerrokantasi/settings/base.py | 5 + 3 files changed, 274 insertions(+) create mode 100644 democracy/management/commands/remove_user_data.py create mode 100644 democracy/tests/integrationtest/test_remove_user_data_command.py diff --git a/democracy/management/commands/remove_user_data.py b/democracy/management/commands/remove_user_data.py new file mode 100644 index 00000000..31eaa70c --- /dev/null +++ b/democracy/management/commands/remove_user_data.py @@ -0,0 +1,53 @@ +from django.conf import settings +from django.core.management.base import BaseCommand +from django.db import transaction +from django.utils import timezone + +from democracy.utils import user_data_remover + + +class Command(BaseCommand): + def add_arguments(self, parser): + parser.add_argument( + "--remove-user-data-from-old-objects", + action="store_true", + help="Remove user reference from old objects.", + ) + parser.add_argument( + "--delete-comment-version-history", + action="store_true", + help="Delete old comments version history.", + ) + parser.add_argument( + "--delete-users", + action="store_true", + help="Delete users without activity created before threshold.", + ) + parser.add_argument( + "--older-than-days", + default=settings.DEFAULT_USER_DATA_REMOVAL_THRESHOLD_DAYS, + type=int, + help=f"Specify the number of days for removal; " + f"defaults to {settings.DEFAULT_USER_DATA_REMOVAL_THRESHOLD_DAYS}." + f"Data as old or older than this will be removed.", + ) + + @transaction.atomic + def handle(self, *args, **options): + threshold_time = timezone.now() - timezone.timedelta( + days=options["older_than_days"] + ) + + if options["remove_user_data_from_old_objects"]: + user_data_remover.remove_old_objects_user_data(threshold_time) + user_data_remover.remove_user_from_old_comments(threshold_time) + user_data_remover.remove_user_votes_from_old_comments(threshold_time) + user_data_remover.remove_user_from_old_poll_answers(threshold_time) + user_data_remover.remove_user_from_old_hearings(threshold_time) + user_data_remover.remove_contact_persons_from_old_hearings(threshold_time) + + if options["delete_comment_version_history"]: + user_data_remover.delete_old_comments_versions(threshold_time) + + if options["delete_users"]: + user_data_remover.delete_old_users_without_activity(threshold_time) diff --git a/democracy/tests/integrationtest/test_remove_user_data_command.py b/democracy/tests/integrationtest/test_remove_user_data_command.py new file mode 100644 index 00000000..3c1be30e --- /dev/null +++ b/democracy/tests/integrationtest/test_remove_user_data_command.py @@ -0,0 +1,216 @@ +import freezegun +import pytest +import reversion +from django.conf import settings +from django.core.management import call_command +from django.utils import timezone +from reversion.models import Version + +from democracy.factories.hearing import ( + MinimalHearingFactory, + SectionCommentFactory, + SectionFactory, +) +from democracy.factories.poll import SectionPollFactory, SectionPollOptionFactory +from democracy.models import ContactPerson, SectionComment, SectionPollAnswer +from kerrokantasi.models import User +from kerrokantasi.tests.factories import UserFactory + + +def run_remove_user_data_command(*args): + call_command("remove_user_data", *args) + + +@pytest.mark.django_db +class TestRemoveUserDataCommand: + @pytest.fixture(autouse=True) + def init_test_data(self): + with freezegun.freeze_time( + timezone.now() + - timezone.timedelta( + days=settings.DEFAULT_USER_DATA_REMOVAL_THRESHOLD_DAYS + 1 + ) + ): + sec = SectionFactory(post=True) + self.old_user = UserFactory(username="old_user", date_joined=timezone.now()) + self.old_user_without_activity = UserFactory( + username="tobe_deleted_user", date_joined=timezone.now() + ) + with reversion.create_revision(): + self.old_section_comment = SectionCommentFactory( + created_by=self.old_user, section=sec, post=True + ) + self.old_section_comment.title = "Old Title" + self.old_section_comment.save() + self.old_section_comment.voters.add(self.old_user) + self.old_section_comment.recache_n_votes() + self.old_hearing = MinimalHearingFactory(created_by=self.old_user) + SectionComment.objects.filter(section__hearing=self.old_hearing).delete() + self.old_contact_person = ContactPerson.objects.create( + name="Old Contact Person", created_by=self.old_user + ) + self.old_hearing.contact_persons.add(self.old_contact_person) + poll = SectionPollFactory(section=sec) + option = SectionPollOptionFactory(poll=poll) + self.old_poll_answer = SectionPollAnswer.objects.create( + created_by=self.old_user, + option=option, + comment=self.old_section_comment, + ) + + self.new_user = UserFactory(username="newer_user", date_joined=timezone.now()) + self.new_section_comment = SectionCommentFactory( + created_by=self.new_user, section=sec, post=True + ) + self.new_hearing = MinimalHearingFactory( + created_by=self.new_user, close_at=timezone.now() + ) + self.new_contact_person = ContactPerson.objects.create( + name="New Contact Person", created_by=self.new_user + ) + self.new_hearing.contact_persons.add(self.new_contact_person) + self.new_poll_answer = SectionPollAnswer.objects.create( + created_by=self.new_user, option=option, comment=self.new_section_comment + ) + + old_objects = [ + "old_section_comment", + "old_poll_answer", + "old_hearing", + ] + new_objects = [ + "new_section_comment", + "new_hearing", + "new_contact_person", + "new_poll_answer", + ] + + def test_delete_user(self): + self.old_user.delete() + self.old_section_comment.refresh_from_db() + self.old_hearing.refresh_from_db() + self.old_poll_answer.refresh_from_db() + self.old_contact_person.refresh_from_db() + assert self.old_section_comment.created_by is None + assert self.old_hearing.created_by is None + assert self.old_poll_answer.created_by is None + assert self.old_contact_person.created_by is None + assert self.old_section_comment.content is not None + assert self.old_section_comment.content != "" + assert self.old_section_comment.id > 0 + + def assert_old_objects_created_by_matches(self, exclude=()): + for model in [model for model in self.old_objects if model not in exclude]: + obj = getattr(self, model) + obj.refresh_from_db() + assert obj.created_by == self.old_user + + def assert_old_objects_created_by_none(self, exclude=()): + for model in [model for model in self.old_objects if model not in exclude]: + obj = getattr(self, model) + obj.refresh_from_db() + assert obj.created_by is None + + def assert_new_objects_created_by_matches(self): + for model in self.new_objects: + obj = getattr(self, model) + obj.refresh_from_db() + assert obj.created_by == self.new_user + + def test_all_options(self): + """Test remove_user_data command with all options.""" + args = [ + "--remove-user-data-from-old-objects", + "--delete-comment-version-history", + "--delete-users", + "--older-than-days", + str(settings.DEFAULT_USER_DATA_REMOVAL_THRESHOLD_DAYS), + ] + run_remove_user_data_command(*args) + + self.old_section_comment.refresh_from_db() + self.old_poll_answer.refresh_from_db() + self.old_hearing.refresh_from_db() + old_user = User.objects.filter(id=self.old_user.id).first() + user_without_activity = User.objects.filter( + username=self.old_user_without_activity.username + ).first() + self.new_user.refresh_from_db() + + self.assert_old_objects_created_by_none() + assert self.old_hearing.contact_persons.count() == 0 + assert self.old_section_comment.n_unregistered_votes == 1 + assert self.old_section_comment.voters.count() == 0 + assert self.old_section_comment.n_votes == 1 + self.assert_new_objects_created_by_matches() + assert Version.objects.get_for_object(self.old_section_comment).count() == 0 + + assert user_without_activity is None + assert old_user is None + assert self.new_user is not None + + def test_remove_only_user_data_from_old_objects(self): + """Test remove_user_data command with remove_user_data_from_old_objects option.""" + args = ["--remove-user-data-from-old-objects"] + run_remove_user_data_command(*args) + + self.old_section_comment.refresh_from_db() + self.old_poll_answer.refresh_from_db() + self.old_hearing.refresh_from_db() + + self.assert_old_objects_created_by_none() + assert self.old_section_comment.n_unregistered_votes == 1 + assert self.old_section_comment.voters.count() == 0 + assert self.old_section_comment.n_votes == 1 + assert self.old_hearing.contact_persons.count() == 0 + + self.assert_new_objects_created_by_matches() + + def test_remove_user_data_from_old_objects_with_delete_version_option(self): + """Test remove_user_data command with remove_user_data_from_old_objects option.""" + args = [ + "--remove-user-data-from-old-objects", + "--delete-comment-version-history", + ] + + assert Version.objects.get_for_object(self.old_section_comment).count() > 0 + run_remove_user_data_command(*args) + + self.old_section_comment.refresh_from_db() + self.old_poll_answer.refresh_from_db() + self.old_hearing.refresh_from_db() + + self.assert_old_objects_created_by_none() + assert self.old_section_comment.n_unregistered_votes == 1 + assert self.old_section_comment.voters.count() == 0 + assert self.old_section_comment.n_votes == 1 + assert self.old_hearing.contact_persons.count() == 0 + + self.assert_new_objects_created_by_matches() + + assert Version.objects.get_for_object(self.old_section_comment).count() == 0 + + def test_only_delete_inactive_users(self): + """Test remove_user_data command with delete_users option.""" + args = ["--delete-users"] + run_remove_user_data_command(*args) + + self.old_section_comment.refresh_from_db() + self.old_poll_answer.refresh_from_db() + self.old_hearing.refresh_from_db() + + self.assert_old_objects_created_by_matches() + assert self.old_section_comment.n_unregistered_votes == 0 + assert self.old_section_comment.voters.count() == 1 + assert self.old_section_comment.n_votes == 1 + assert self.old_hearing.contact_persons.count() == 1 + + self.assert_new_objects_created_by_matches() + old_user = User.objects.filter(id=self.old_user.id).first() + user_without_activity = User.objects.filter( + username=self.old_user_without_activity.username + ).first() + self.new_user.refresh_from_db() + assert user_without_activity is None + assert old_user is not None + assert self.new_user is not None diff --git a/kerrokantasi/settings/base.py b/kerrokantasi/settings/base.py index 94ec5662..9b0c5d5f 100644 --- a/kerrokantasi/settings/base.py +++ b/kerrokantasi/settings/base.py @@ -92,6 +92,7 @@ def get_git_revision_hash(): GDPR_API_DELETE_SCOPE=(str, "gdprdelete"), # Audit logging AUDIT_LOG_ENABLED=(bool, False), + DEFAULT_USER_DATA_REMOVAL_THRESHOLD_DAYS=(int, 365 * 5), # Five years. ) # Build paths inside the project like this: os.path.join(BASE_DIR, ...) @@ -433,3 +434,7 @@ def get_git_revision_hash(): "ENABLED": env("AUDIT_LOG_ENABLED"), "ORIGIN": "kerrokantasi", } + +DEFAULT_USER_DATA_REMOVAL_THRESHOLD_DAYS = env( + "DEFAULT_USER_DATA_REMOVAL_THRESHOLD_DAYS" +)