Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add cron to cleanup dangling single tests and batch periodic test reports #1579

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -587,6 +587,7 @@ test:
-m pytest -vvv -ra \
--junit-xml=test-results.xml \
$(filter-out integration_tests,${pysrcdirs}) \
-k'${tests}' \
${test_args}

test-shell:
Expand Down Expand Up @@ -620,6 +621,9 @@ test-all:

DOCKER_COMPOSE_TOOLS_CMD=COMPOSE_FILE=docker/compose.tools.yaml docker compose

makemigrations:
${DOCKER_COMPOSE_TOOLS_CMD} run --rm tools env SKIP_SECRET_KEY_CHECK=True CACHE_LOCATION= ENABLE_BATCH= ./manage.py makemigrations

lint:
${DOCKER_COMPOSE_TOOLS_CMD} run --rm tools bin/lint.sh ${pysrcdirs}

Expand Down
4 changes: 2 additions & 2 deletions bin/check.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ fail=0
requirements_files="requirements.in requirements-dev.in"
echo $requirements_files | xargs -n1 pip-compile --quiet
if [ ! -z "$(git status --porcelain $requirements_files)" ];then
echo "Requirements .in files have not all been compiled into .txt files and commited to Git!"
echo -e "\e[31mRequirements .in files have not all been compiled into .txt files and commited to Git!"
git status --porcelain $requirements_files
fail=1
fi
Expand All @@ -21,7 +21,7 @@ if [ $exit_code -ne 1 ];then
echo "exit code: $?"
echo "output: $output"
echo
echo "Webserver should fail with exit code 1 if authentication/allowlist is not set when DEBUG=True"
echo -e "\e[31mWebserver should fail with exit code 1 if authentication/allowlist is not set when DEBUG=True"
fail=1
fi

Expand Down
5 changes: 4 additions & 1 deletion bin/lint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ pylama --skip "**/migrations/*" ${@} || fail=1
black --line-length 120 --check ${@} || fail=1
shellcheck -e SC1071 docker/cron/periodic/*/* || fail=1

SKIP_SECRET_KEY_CHECK=True CACHE_LOCATION= ENABLE_BATCH= ./manage.py makemigrations --noinput --check --dry-run || fail=1
if ! SKIP_SECRET_KEY_CHECK=True CACHE_LOCATION= ENABLE_BATCH= ./manage.py makemigrations --noinput --check --dry-run; then
echo -e "\e[31mNot all migrations have been created after changes to \`models.py\`. Run \`make makemigrations\` to update migrations.\e[0m"
fail=1
fi

exit "$fail"
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Generated by Django 4.2.16 on 2024-12-03 22:43

import datetime
from django.db import migrations, models


class Migration(migrations.Migration):
dependencies = [
("checks", "0015_add_rpki_scoring"),
]

operations = [
migrations.AddField(
model_name="webtestappsecpriv",
name="timestamp",
field=models.DateTimeField(auto_now_add=True, default=datetime.datetime(1, 1, 1, 0, 0)),
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will auto_now_add also apply to existing rows at the time of migration? Or will it only do that when adding a new object through ORM? Otherwise, we will be expiring currently existing reports early.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should set the date to epoch(0) for existing records. I will test it out to make sure.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes but that's bad, right? If the cleanup runs right after deployment, it'll consider every report old, and start cleaning.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The cleanup does not touch single test reports, only probe/test results. It will delete results that don't have a report associated and are not recent enough to be currently running. The timestamp is added to these 2 models because they don't have it yet while other tests in the report do.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍🏻

preserve_default=False,
),
migrations.AddField(
model_name="webtesttls",
name="timestamp",
field=models.DateTimeField(auto_now_add=True, default=datetime.datetime(1, 1, 1, 0, 0)),
preserve_default=False,
),
]
4 changes: 4 additions & 0 deletions checks/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -453,6 +453,8 @@ class Meta:


class WebTestTls(DomainServersModel):
timestamp = models.DateTimeField(auto_now_add=True)

def totalscore(self, score_fields):
tests_subset = self.webtestset.all()
return super().totalscore(score_fields, tests_subset)
Expand Down Expand Up @@ -672,6 +674,8 @@ class Meta:


class WebTestAppsecpriv(DomainServersModel):
timestamp = models.DateTimeField(auto_now_add=True)

def totalscore(self, score_fields):
tests_subset = self.webtestset.all()
return super().totalscore(score_fields, tests_subset)
Expand Down
1 change: 1 addition & 0 deletions docker/compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -698,6 +698,7 @@ services:
- AUTO_UPDATE_TO
- WORKER_SLOW_REPLICAS
- DOCKER_REGISTRY
- CRON_DAILY_DATABASE_CLEANUP

restart: unless-stopped
logging:
Expand Down
14 changes: 14 additions & 0 deletions docker/cron-docker/periodic/daily/database_cleanup
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/bin/sh

# perform cleanup maintenance on database:
#
# - remove dangling subtests (probe results with no report) caused by periodic test or aborted single tests
# - remove test reports for batch periodic tests

set -e

if [ ! "$CRON_DAILY_DATABASE_CLEANUP" = "True" ];then
exit 0
fi

docker ps --filter label=com.docker.compose.service=app --quiet | xargs -I% --no-run-if-empty docker exec % ./manage.py database_cleanup -v1
1 change: 1 addition & 0 deletions docker/defaults.env
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,7 @@ CRON_DAILY_POSTGRESQL_BACKUP=True
CRON_DAILY_TRUNCATE_EXPORTER_LOGS=True
CRON_WEEKLY_POSTGRESQL_BACKUP=False
CRON_DAILY_DELETE_BATCH_RESULTS=True
CRON_DAILY_DATABASE_CLEANUP=True

# enable running tests every 15 minutes for metrics collection
CRON_15MIN_RUN_TESTS=True
Expand Down
30 changes: 30 additions & 0 deletions integration_tests/batch/test_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
TEST_DOMAIN_EXPECTED_SCORE = 49


BATCH_PERIODIC_TESTS_PREFIX = "batch periodic tests"


def wait_for_request_status(url, expected_status, timeout=10, interval=1, auth=None):
"""Poll url and parse JSON for request.status, return if value matches expected status or
fail when timeout expires."""
Expand Down Expand Up @@ -145,3 +148,30 @@ def test_cron_delete_batch_results(trigger_cron, docker_compose_exec):

assert not docker_compose_exec("cron", "ls /app/batch_results/test.json", check=False)
assert not docker_compose_exec("cron", "ls /app/batch_results/test.json.gz", check=False)


def test_batch_db_cleanup(unique_id, trigger_cron, register_test_user, test_domain):
"""A test via the Batch API should succeed."""
request_data = {"type": "web", "domains": [test_domain], "name": f"{BATCH_PERIODIC_TESTS_PREFIX} {unique_id}"}

auth = register_test_user

# start batch request
register_response = requests.post(INTERNETNL_API + "requests", json=request_data, auth=auth, verify=False)
register_data = register_response.json()
test_id = register_data["request"]["request_id"]
wait_for_request_status(INTERNETNL_API + "requests/" + test_id, "done", timeout=60, auth=auth)

# generate batch results
results_response = requests.get(INTERNETNL_API + "requests/" + test_id + "/results", auth=auth, verify=False)
results_response.raise_for_status()
assert not results_response.json() == {}

# run db clean
trigger_cron("daily/database_cleanup", service="cron-docker", suffix="-docker")

# check batch results are gone
results_response_after_cleanup = requests.get(
INTERNETNL_API + "requests/" + test_id + "/results", auth=auth, verify=False
)
assert results_response_after_cleanup.json().get("error", {}).get("label", "") == "unknown-request"
48 changes: 48 additions & 0 deletions interface/management/commands/database_cleanup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
from django.core.management.base import BaseCommand
from checks.models import BatchRequest, DomainTestIpv6, DomainTestDnssec, WebTestTls, WebTestAppsecpriv, WebTestRpki
import logging
import datetime
from django.conf import settings
from django.utils import timezone

log = logging.getLogger(__name__)


BATCH_PERIODIC_TESTS_PREFIX = "batch periodic tests"

TEST_REPORT_PROBE_MODELS = [DomainTestIpv6, DomainTestDnssec, WebTestTls, WebTestAppsecpriv, WebTestRpki]


class Command(BaseCommand):
help = "Removes batch periodic test scan results and dangling probe results from database"

def info(self, text):
if self.v_level:
self.stdout.write(f"{text}")

def debug(self, text):
if self.v_level > 1:
self.stdout.write(f"{text}")

def handle(self, *args, **options):
logging.basicConfig(level=logging.INFO if options["verbosity"] > 0 else logging.ERROR)

count, _ = BatchRequest.objects.filter(name__startswith=BATCH_PERIODIC_TESTS_PREFIX).delete()
log.info("Deleted %s BatchRequest objects from batch periodic tests.", count)

timestamp_recent_probes = timezone.make_aware(datetime.datetime.now()) - datetime.timedelta(
seconds=int(settings.CACHE_TTL)
)

for model in TEST_REPORT_PROBE_MODELS:
# >>> print(DomainTestIpv6.objects.filter(domaintestreport__isnull=True).values_list('id').query)
# SELECT "checks_domaintestipv6"."id" FROM "checks_domaintestipv6" LEFT OUTER JOIN "checks_domaintestreport"
# ON ("checks_domaintestipv6"."id" = "checks_domaintestreport"."ipv6_id")
# WHERE "checks_domaintestreport"."id" IS NULL

# find all test probe results that have no report associated, but not too recent because
# those might be unfinished tests
count, _ = model.objects.filter(
domaintestreport__isnull=True, timestamp__lt=timestamp_recent_probes
).delete()
log.info("Deleted %s probes that don't have an associated report.", count)
41 changes: 41 additions & 0 deletions tests/it/test_cleanup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
from django.core.management import call_command
from checks.models import DomainTestIpv6, DomainTestReport
import datetime
import pytest


def test_cleanup_aborted_or_periodic_test_results(db):
"""Make sure that test results with a report are deleted on cleanup, but not if they are recent."""
ipv6_no_report = DomainTestIpv6(domain="example.com", report="{}")
ipv6_no_report.save()
ipv6_no_report.timestamp = datetime.datetime.now() - datetime.timedelta(seconds=200)
ipv6_no_report.save()

ipv6_report = DomainTestIpv6(domain="example.com", report="{}")
ipv6_report.save()
ipv6_report.timestamp = datetime.datetime.now() - datetime.timedelta(seconds=200)
ipv6_report.save()

ipv6_no_report_recent = DomainTestIpv6(domain="example.com", report="{}")
ipv6_no_report_recent.save()

ipv6_report_recent = DomainTestIpv6(domain="example.com", report="{}")
ipv6_report_recent.save()

report = DomainTestReport(domain="example.com", ipv6=ipv6_report)
report.save()

# run cleanup
call_command("database_cleanup")

with pytest.raises(DomainTestIpv6.DoesNotExist):
ipv6_no_report.refresh_from_db()

ipv6_report.refresh_from_db()
assert ipv6_report

ipv6_no_report_recent.refresh_from_db()
assert ipv6_no_report_recent

ipv6_report_recent.refresh_from_db()
assert ipv6_report_recent
Loading