Skip to content

Commit

Permalink
Cleanup batch test results every day
Browse files Browse the repository at this point in the history
  • Loading branch information
aequitas committed Dec 3, 2024
1 parent 28a7404 commit 4d457f6
Show file tree
Hide file tree
Showing 10 changed files with 183 additions and 1 deletion.
19 changes: 19 additions & 0 deletions checks/migrations/0016_webtesttls_timestamp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Generated by Django 4.2.16 on 2024-12-03 19:56

import datetime
from django.db import migrations, models


class Migration(migrations.Migration):
dependencies = [
("checks", "0015_add_rpki_scoring"),
]

operations = [
migrations.AddField(
model_name="webtesttls",
name="timestamp",
field=models.DateTimeField(auto_now_add=True, default=datetime.datetime(1, 1, 1, 0, 0)),
preserve_default=False,
),
]
19 changes: 19 additions & 0 deletions checks/migrations/0017_webtestappsecpriv_timestamp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Generated by Django 4.2.16 on 2024-12-03 20:11

import datetime
from django.db import migrations, models


class Migration(migrations.Migration):
dependencies = [
("checks", "0016_webtesttls_timestamp"),
]

operations = [
migrations.AddField(
model_name="webtestappsecpriv",
name="timestamp",
field=models.DateTimeField(auto_now_add=True, default=datetime.datetime(1, 1, 1, 0, 0)),
preserve_default=False,
),
]
4 changes: 4 additions & 0 deletions checks/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -453,6 +453,8 @@ class Meta:


class WebTestTls(DomainServersModel):
timestamp = models.DateTimeField(auto_now_add=True)

def totalscore(self, score_fields):
tests_subset = self.webtestset.all()
return super().totalscore(score_fields, tests_subset)
Expand Down Expand Up @@ -672,6 +674,8 @@ class Meta:


class WebTestAppsecpriv(DomainServersModel):
timestamp = models.DateTimeField(auto_now_add=True)

def totalscore(self, score_fields):
tests_subset = self.webtestset.all()
return super().totalscore(score_fields, tests_subset)
Expand Down
1 change: 1 addition & 0 deletions docker/compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -718,6 +718,7 @@ services:
- DOCKER_REGISTRY
- WORKER_REPLICAS
- RELEASE
- CRON_DAILY_DATABASE_CLEANUP

restart: unless-stopped
logging:
Expand Down
14 changes: 14 additions & 0 deletions docker/cron-docker/periodic/daily/database_cleanup
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/bin/sh

# perform cleanup maintenance on database:
#
# - remove dangling subtests (probe results with no report) caused by periodic test or aborted single tests
# - remove test reports for batch periodic tests

set -e

if [ ! "$CRON_DAILY_DATABASE_CLEANUP" = "True" ];then
exit 0
fi

docker ps --filter label=com.docker.compose.service=app --quiet | xargs -I% --no-run-if-empty docker exec % ./manage.py database_cleanup -v1
8 changes: 7 additions & 1 deletion docker/cron/periodic/15min/tests-batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@
BATCH_REQUEST_TIMEOUT = 60 * 5
REQUEST_TIMEOUT = 30

BATCH_PERIODIC_TESTS_PREFIX = "batch periodic tests"

REQUEST_TYPES = ["web", "mail"]

IPV4_IP_APP_INTERNAL = os.environ.get("IPV4_IP_APP_INTERNAL")
Expand Down Expand Up @@ -123,7 +125,11 @@ def wait_for_request_status(url: str, expected_status: list[str], timeout: int =


def run_test_batch(request_type: str, domains: list[str]):
request_data = {"type": "web", "domains": domains, "name": f"periodic test {str(datetime.datetime.now())}"}
request_data = {
"type": "web",
"domains": domains,
"name": f"{BATCH_PERIODIC_TESTS_PREFIX} {str(datetime.datetime.now())}",
}

auth = ("periodic_tests", "periodic_tests")
api_url: str = URL_BASE + "/api/batch/v2/"
Expand Down
1 change: 1 addition & 0 deletions docker/defaults.env
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,7 @@ CRON_DAILY_POSTGRESQL_BACKUP=True
CRON_DAILY_TRUNCATE_EXPORTER_LOGS=True
CRON_WEEKLY_POSTGRESQL_BACKUP=False
CRON_DAILY_DELETE_BATCH_RESULTS=True
CRON_DAILY_DATABASE_CLEANUP=True

# enable running tests every 15 minutes for metrics collection
CRON_15MIN_RUN_TESTS=True
Expand Down
30 changes: 30 additions & 0 deletions integration_tests/batch/test_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
TEST_DOMAIN_EXPECTED_SCORE = 49


BATCH_PERIODIC_TESTS_PREFIX = "batch periodic tests"


def wait_for_request_status(url, expected_status, timeout=10, interval=1, auth=None):
"""Poll url and parse JSON for request.status, return if value matches expected status or
fail when timeout expires."""
Expand Down Expand Up @@ -145,3 +148,30 @@ def test_cron_delete_batch_results(trigger_cron, docker_compose_exec):

assert not docker_compose_exec("cron", "ls /app/batch_results/test.json", check=False)
assert not docker_compose_exec("cron", "ls /app/batch_results/test.json.gz", check=False)


def test_batch_db_cleanup(unique_id, trigger_cron, register_test_user, test_domain):
"""A test via the Batch API should succeed."""
request_data = {"type": "web", "domains": [test_domain], "name": f"{BATCH_PERIODIC_TESTS_PREFIX} {unique_id}"}

auth = register_test_user

# start batch request
register_response = requests.post(INTERNETNL_API + "requests", json=request_data, auth=auth, verify=False)
register_data = register_response.json()
test_id = register_data["request"]["request_id"]
wait_for_request_status(INTERNETNL_API + "requests/" + test_id, "done", timeout=60, auth=auth)

# generate batch results
results_response = requests.get(INTERNETNL_API + "requests/" + test_id + "/results", auth=auth, verify=False)
results_response.raise_for_status()
assert not results_response.json() == {}

# run db clean
trigger_cron("daily/database_cleanup", service="cron-docker", suffix="-docker")

# check batch results are gone
results_response_after_cleanup = requests.get(
INTERNETNL_API + "requests/" + test_id + "/results", auth=auth, verify=False
)
assert results_response_after_cleanup.json().get("error", {}).get("label", "") == "unknown-request"
48 changes: 48 additions & 0 deletions interface/management/commands/database_cleanup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
from django.core.management.base import BaseCommand
from checks.models import BatchRequest, DomainTestIpv6, DomainTestDnssec, WebTestTls, WebTestAppsecpriv, WebTestRpki
import logging
import datetime
from django.conf import settings
from django.utils import timezone

log = logging.getLogger(__name__)


BATCH_PERIODIC_TESTS_PREFIX = "batch periodic tests"

TEST_REPORT_PROBE_MODELS = [DomainTestIpv6, DomainTestDnssec, WebTestTls, WebTestAppsecpriv, WebTestRpki]


class Command(BaseCommand):
help = "Removes batch periodic test scan results and dangling probe results from database"

def info(self, text):
if self.v_level:
self.stdout.write(f"{text}")

def debug(self, text):
if self.v_level > 1:
self.stdout.write(f"{text}")

def handle(self, *args, **options):
logging.basicConfig(level=logging.INFO if options["verbosity"] > 0 else logging.ERROR)

count, _ = BatchRequest.objects.filter(name__startswith=BATCH_PERIODIC_TESTS_PREFIX).delete()
log.info("Deleted %s BatchRequest objects from batch periodic tests.", count)

timestamp_recent_probes = timezone.make_aware(datetime.datetime.now()) - datetime.timedelta(
seconds=int(settings.CACHE_TTL)
)

for model in TEST_REPORT_PROBE_MODELS:
# >>> print(DomainTestIpv6.objects.filter(domaintestreport__isnull=True).values_list('id').query)
# SELECT "checks_domaintestipv6"."id" FROM "checks_domaintestipv6" LEFT OUTER JOIN "checks_domaintestreport"
# ON ("checks_domaintestipv6"."id" = "checks_domaintestreport"."ipv6_id")
# WHERE "checks_domaintestreport"."id" IS NULL

# find all test probe results that have no report associated, but not to recent because
# those might be unfinished tests
count, _ = model.objects.filter(
domaintestreport__isnull=True, timestamp__lt=timestamp_recent_probes
).delete()
log.info("Deleted %s probes that don't have an associated report.", count)
40 changes: 40 additions & 0 deletions tests/it/test_cleanup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from django.core.management import call_command
from checks.models import DomainTestIpv6, DomainTestReport
import datetime
import pytest

def test_cleanup_aborted_or_periodic_test_results(db):
"""Make sure that test results with a report are deleted on cleanup, but not if they are recent."""
ipv6_no_report = DomainTestIpv6(domain="example.com", report="{}")
ipv6_no_report.save()
ipv6_no_report.timestamp=datetime.datetime.now() - datetime.timedelta(seconds=200)
ipv6_no_report.save()

ipv6_report = DomainTestIpv6(domain="example.com", report="{}")
ipv6_report.save()
ipv6_report.timestamp=datetime.datetime.now() - datetime.timedelta(seconds=200)
ipv6_report.save()

ipv6_no_report_recent = DomainTestIpv6(domain="example.com", report="{}")
ipv6_no_report_recent.save()

ipv6_report_recent = DomainTestIpv6(domain="example.com", report="{}")
ipv6_report_recent.save()

report = DomainTestReport(domain="example.com", ipv6=ipv6_report)
report.save()

# run cleanup
call_command("database_cleanup")

with pytest.raises(DomainTestIpv6.DoesNotExist):
ipv6_no_report.refresh_from_db()

ipv6_report.refresh_from_db()
assert ipv6_report

ipv6_no_report_recent.refresh_from_db()
assert ipv6_no_report_recent

ipv6_report_recent.refresh_from_db()
assert ipv6_report_recent

0 comments on commit 4d457f6

Please sign in to comment.