Skip to content

Commit

Permalink
feat: add command for removing old user data
Browse files Browse the repository at this point in the history
Command is supposed to be run in maintenance runs e.g cron jobs for
cleaning the old user data.

Refs KER-398
  • Loading branch information
nicobav committed Nov 28, 2024
1 parent 40349fa commit df2adc2
Show file tree
Hide file tree
Showing 3 changed files with 274 additions and 0 deletions.
53 changes: 53 additions & 0 deletions democracy/management/commands/remove_user_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
from django.conf import settings
from django.core.management.base import BaseCommand
from django.db import transaction
from django.utils import timezone

from democracy.utils import user_data_remover


class Command(BaseCommand):
def add_arguments(self, parser):
parser.add_argument(
"--remove-user-data-from-old-objects",
action="store_true",
help="Remove user reference from old objects.",
)
parser.add_argument(
"--delete-comment-version-history",
action="store_true",
help="Delete old comments version history.",
)
parser.add_argument(
"--delete-users",
action="store_true",
help="Delete users without activity created before threshold.",
)
parser.add_argument(
"--older-than-days",
default=settings.DEFAULT_USER_DATA_REMOVAL_THRESHOLD_DAYS,
type=int,
help=f"Specify the number of days for removal; "
f"defaults to {settings.DEFAULT_USER_DATA_REMOVAL_THRESHOLD_DAYS}."
f"Data as old or older than this will be removed.",
)

@transaction.atomic
def handle(self, *args, **options):
threshold_time = timezone.now() - timezone.timedelta(
days=options["older_than_days"]
)

if options["remove_user_data_from_old_objects"]:
user_data_remover.remove_old_objects_user_data(threshold_time)
user_data_remover.remove_user_from_old_comments(threshold_time)
user_data_remover.remove_user_votes_from_old_comments(threshold_time)
user_data_remover.remove_user_from_old_poll_answers(threshold_time)
user_data_remover.remove_user_from_old_hearings(threshold_time)
user_data_remover.remove_contact_persons_from_old_hearings(threshold_time)

if options["delete_comment_version_history"]:
user_data_remover.delete_old_comments_versions(threshold_time)

if options["delete_users"]:
user_data_remover.delete_old_users_without_activity(threshold_time)
216 changes: 216 additions & 0 deletions democracy/tests/integrationtest/test_remove_user_data_command.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
import freezegun
import pytest
import reversion
from django.conf import settings
from django.core.management import call_command
from django.utils import timezone
from reversion.models import Version

from democracy.factories.hearing import (
MinimalHearingFactory,
SectionCommentFactory,
SectionFactory,
)
from democracy.factories.poll import SectionPollFactory, SectionPollOptionFactory
from democracy.models import ContactPerson, SectionComment, SectionPollAnswer
from kerrokantasi.models import User
from kerrokantasi.tests.factories import UserFactory


def run_remove_user_data_command(*args):
call_command("remove_user_data", *args)


@pytest.mark.django_db
class TestRemoveUserDataCommand:
@pytest.fixture(autouse=True)
def init_test_data(self):
with freezegun.freeze_time(
timezone.now()
- timezone.timedelta(
days=settings.DEFAULT_USER_DATA_REMOVAL_THRESHOLD_DAYS + 1
)
):
sec = SectionFactory(post=True)
self.old_user = UserFactory(username="old_user", date_joined=timezone.now())
self.old_user_without_activity = UserFactory(
username="tobe_deleted_user", date_joined=timezone.now()
)
with reversion.create_revision():
self.old_section_comment = SectionCommentFactory(
created_by=self.old_user, section=sec, post=True
)
self.old_section_comment.title = "Old Title"
self.old_section_comment.save()
self.old_section_comment.voters.add(self.old_user)
self.old_section_comment.recache_n_votes()
self.old_hearing = MinimalHearingFactory(created_by=self.old_user)
SectionComment.objects.filter(section__hearing=self.old_hearing).delete()
self.old_contact_person = ContactPerson.objects.create(
name="Old Contact Person", created_by=self.old_user
)
self.old_hearing.contact_persons.add(self.old_contact_person)
poll = SectionPollFactory(section=sec)
option = SectionPollOptionFactory(poll=poll)
self.old_poll_answer = SectionPollAnswer.objects.create(
created_by=self.old_user,
option=option,
comment=self.old_section_comment,
)

self.new_user = UserFactory(username="newer_user", date_joined=timezone.now())
self.new_section_comment = SectionCommentFactory(
created_by=self.new_user, section=sec, post=True
)
self.new_hearing = MinimalHearingFactory(
created_by=self.new_user, close_at=timezone.now()
)
self.new_contact_person = ContactPerson.objects.create(
name="New Contact Person", created_by=self.new_user
)
self.new_hearing.contact_persons.add(self.new_contact_person)
self.new_poll_answer = SectionPollAnswer.objects.create(
created_by=self.new_user, option=option, comment=self.new_section_comment
)

old_objects = [
"old_section_comment",
"old_poll_answer",
"old_hearing",
]
new_objects = [
"new_section_comment",
"new_hearing",
"new_contact_person",
"new_poll_answer",
]

def test_delete_user(self):
self.old_user.delete()
self.old_section_comment.refresh_from_db()
self.old_hearing.refresh_from_db()
self.old_poll_answer.refresh_from_db()
self.old_contact_person.refresh_from_db()
assert self.old_section_comment.created_by is None
assert self.old_hearing.created_by is None
assert self.old_poll_answer.created_by is None
assert self.old_contact_person.created_by is None
assert self.old_section_comment.content is not None
assert self.old_section_comment.content != ""
assert self.old_section_comment.id > 0

def assert_old_objects_created_by_matches(self, exclude=()):
for model in [model for model in self.old_objects if model not in exclude]:
obj = getattr(self, model)
obj.refresh_from_db()
assert obj.created_by == self.old_user

def assert_old_objects_created_by_none(self, exclude=()):
for model in [model for model in self.old_objects if model not in exclude]:
obj = getattr(self, model)
obj.refresh_from_db()
assert obj.created_by is None

def assert_new_objects_created_by_matches(self):
for model in self.new_objects:
obj = getattr(self, model)
obj.refresh_from_db()
assert obj.created_by == self.new_user

def test_all_options(self):
"""Test remove_user_data command with all options."""
args = [
"--remove-user-data-from-old-objects",
"--delete-comment-version-history",
"--delete-users",
"--older-than-days",
str(settings.DEFAULT_USER_DATA_REMOVAL_THRESHOLD_DAYS),
]
run_remove_user_data_command(*args)

self.old_section_comment.refresh_from_db()
self.old_poll_answer.refresh_from_db()
self.old_hearing.refresh_from_db()
old_user = User.objects.filter(id=self.old_user.id).first()
user_without_activity = User.objects.filter(
username=self.old_user_without_activity.username
).first()
self.new_user.refresh_from_db()

self.assert_old_objects_created_by_none()
assert self.old_hearing.contact_persons.count() == 0
assert self.old_section_comment.n_unregistered_votes == 1
assert self.old_section_comment.voters.count() == 0
assert self.old_section_comment.n_votes == 1
self.assert_new_objects_created_by_matches()
assert Version.objects.get_for_object(self.old_section_comment).count() == 0

assert user_without_activity is None
assert old_user is None
assert self.new_user is not None

def test_remove_only_user_data_from_old_objects(self):
"""Test remove_user_data command with remove_user_data_from_old_objects option."""
args = ["--remove-user-data-from-old-objects"]
run_remove_user_data_command(*args)

self.old_section_comment.refresh_from_db()
self.old_poll_answer.refresh_from_db()
self.old_hearing.refresh_from_db()

self.assert_old_objects_created_by_none()
assert self.old_section_comment.n_unregistered_votes == 1
assert self.old_section_comment.voters.count() == 0
assert self.old_section_comment.n_votes == 1
assert self.old_hearing.contact_persons.count() == 0

self.assert_new_objects_created_by_matches()

def test_remove_user_data_from_old_objects_with_delete_version_option(self):
"""Test remove_user_data command with remove_user_data_from_old_objects option."""
args = [
"--remove-user-data-from-old-objects",
"--delete-comment-version-history",
]

assert Version.objects.get_for_object(self.old_section_comment).count() > 0
run_remove_user_data_command(*args)

self.old_section_comment.refresh_from_db()
self.old_poll_answer.refresh_from_db()
self.old_hearing.refresh_from_db()

self.assert_old_objects_created_by_none()
assert self.old_section_comment.n_unregistered_votes == 1
assert self.old_section_comment.voters.count() == 0
assert self.old_section_comment.n_votes == 1
assert self.old_hearing.contact_persons.count() == 0

self.assert_new_objects_created_by_matches()

assert Version.objects.get_for_object(self.old_section_comment).count() == 0

def test_only_delete_inactive_users(self):
"""Test remove_user_data command with delete_users option."""
args = ["--delete-users"]
run_remove_user_data_command(*args)

self.old_section_comment.refresh_from_db()
self.old_poll_answer.refresh_from_db()
self.old_hearing.refresh_from_db()

self.assert_old_objects_created_by_matches()
assert self.old_section_comment.n_unregistered_votes == 0
assert self.old_section_comment.voters.count() == 1
assert self.old_section_comment.n_votes == 1
assert self.old_hearing.contact_persons.count() == 1

self.assert_new_objects_created_by_matches()
old_user = User.objects.filter(id=self.old_user.id).first()
user_without_activity = User.objects.filter(
username=self.old_user_without_activity.username
).first()
self.new_user.refresh_from_db()
assert user_without_activity is None
assert old_user is not None
assert self.new_user is not None
5 changes: 5 additions & 0 deletions kerrokantasi/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ def get_git_revision_hash():
GDPR_API_DELETE_SCOPE=(str, "gdprdelete"),
# Audit logging
AUDIT_LOG_ENABLED=(bool, False),
DEFAULT_USER_DATA_REMOVAL_THRESHOLD_DAYS=(int, 365 * 5), # Five years.
)

# Build paths inside the project like this: os.path.join(BASE_DIR, ...)
Expand Down Expand Up @@ -433,3 +434,7 @@ def get_git_revision_hash():
"ENABLED": env("AUDIT_LOG_ENABLED"),
"ORIGIN": "kerrokantasi",
}

DEFAULT_USER_DATA_REMOVAL_THRESHOLD_DAYS = env(
"DEFAULT_USER_DATA_REMOVAL_THRESHOLD_DAYS"
)

0 comments on commit df2adc2

Please sign in to comment.