From 98635d00186fd031b24ac74e9acd3245708caf62 Mon Sep 17 00:00:00 2001 From: Jerry Mao Date: Mon, 30 Jan 2023 16:52:18 -0500 Subject: [PATCH] Support mass resume download --- backend/siarnaq/api/episodes/admin.py | 8 ++++ backend/siarnaq/api/user/managers.py | 54 ++++++++++++++++++++++++++- backend/siarnaq/gcloud/titan.py | 22 +++++++++-- backend/siarnaq/settings.py | 3 ++ deploy/galaxy/main.tf | 19 +++++++++- deploy/siarnaq/main.tf | 6 +++ deploy/siarnaq/variables.tf | 5 +++ 7 files changed, 111 insertions(+), 6 deletions(-) diff --git a/backend/siarnaq/api/episodes/admin.py b/backend/siarnaq/api/episodes/admin.py index 2246928ad..87d7427e0 100644 --- a/backend/siarnaq/api/episodes/admin.py +++ b/backend/siarnaq/api/episodes/admin.py @@ -1,5 +1,6 @@ import structlog from django.contrib import admin, messages +from django.http import HttpResponseRedirect from django.utils.html import format_html from siarnaq.api.compete.models import Match @@ -10,10 +11,16 @@ Tournament, TournamentRound, ) +from siarnaq.api.user.models import User logger = structlog.get_logger(__name__) +@admin.action(description="Export all submitted resumes") +def export_resumes(modeladmin, request, queryset): + return HttpResponseRedirect(User.objects.export_resumes(episodes=queryset)) + + class MapInline(admin.TabularInline): model = Map extra = 0 @@ -23,6 +30,7 @@ class MapInline(admin.TabularInline): @admin.register(Episode) class EpisodeAdmin(admin.ModelAdmin): + actions = [export_resumes] fieldsets = ( ( "General", diff --git a/backend/siarnaq/api/user/managers.py b/backend/siarnaq/api/user/managers.py index 1dd02a669..db6c9d14d 100644 --- a/backend/siarnaq/api/user/managers.py +++ b/backend/siarnaq/api/user/managers.py @@ -1,5 +1,13 @@ +import tempfile +import uuid +from zipfile import ZipFile + +import google.cloud.storage as storage +from django.conf import settings from django.contrib.auth.models import UserManager as DjangoUserManager -from django.db.models import Count, Exists, OuterRef, Q +from django.db.models import Count, Exists, Max, OuterRef, Q + +from siarnaq.gcloud import titan class UserManager(DjangoUserManager): @@ -29,3 +37,47 @@ def with_passed(self, requirement): ) ) ) + + def export_resumes(self, *, episodes): + users = list( + self.annotate( + rating=Max( + "teams__profile__rating__value", + filter=Q(teams__episode__in=episodes), + ) + ) + .filter(profile__has_resume=True, rating__isnull=False) + .order_by("-rating") + ) + rank_len = len(str(len({user.rating for user in users}))) + with tempfile.SpooledTemporaryFile() as f: + with ZipFile(f, "w") as archive: + rank, last_rating = 0, None + for user in users: + resume = titan.get_object( + bucket=settings.GCLOUD_BUCKET_SECURE, + name=user.profile.get_resume_path(), + check_safety=False, # TODO: actually check safety, see #628 + get_raw=True, + ) + if resume["ready"]: + if user.rating != last_rating: + rank, last_rating = rank + 1, user.rating + rank_str = "rank-" + str(rank).zfill(rank_len) + user_str = user.first_name + "-" + user.last_name + if not user_str.isascii(): + user_str = "NONASCII-USER" + fname = f"{rank_str}-{user_str}.pdf" + archive.writestr(fname, resume["data"]) + + client = storage.Client(credentials=settings.GCLOUD_CREDENTIALS) + blob = client.bucket(settings.GCLOUD_BUCKET_EPHEMERAL).blob( + f"resume-{uuid.uuid4()}.zip" + ) + # Go back to start of file after archive has finished writing + f.seek(0) + with blob.open( + "wb", content_type="application/zip", predefined_acl="publicRead" + ) as g: + g.write(f.read()) + return blob.public_url diff --git a/backend/siarnaq/gcloud/titan.py b/backend/siarnaq/gcloud/titan.py index 5af3158ac..290619d79 100644 --- a/backend/siarnaq/gcloud/titan.py +++ b/backend/siarnaq/gcloud/titan.py @@ -19,7 +19,9 @@ def request_scan(blob: storage.Blob) -> None: blob.patch() -def get_object(bucket: str, name: str, check_safety: bool) -> dict[str, str | bool]: +def get_object( + bucket: str, name: str, check_safety: bool, get_raw: bool = False +) -> dict[str, str | bytes | bool]: """ Retrieve a file from storage, performing safety checks if required. @@ -31,14 +33,21 @@ def get_object(bucket: str, name: str, check_safety: bool) -> dict[str, str | bo The name (full path) of the object in the bucket. check_safety : bool Whether the object should only be returned if verified by Titan. + get_raw : bool + Whether to return the raw file contents instead of a URL. Returns ------- dict[str, str] A dictionary consisting of a boolean field "ready" indicating whether the file - has passed any requested safety checks. If this is true, then an additional - field "url" is supplied with a signed download link. Otherwise, a field "reason" - is available explaining why the file cannot be downloaded. + has passed any requested safety checks. + + If this is true, then an additional field will be available for retrieving the + file: either a field "url" with a signed download link, or "data" with the raw + data. + + Otherwise, a field "reason" is available explaining why the file cannot be + downloaded. """ log = logger.bind(bucket=bucket, name=name) if not settings.GCLOUD_ENABLE_ACTIONS: @@ -49,6 +58,11 @@ def get_object(bucket: str, name: str, check_safety: bool) -> dict[str, str | bo blob = client.bucket(bucket).get_blob(name) match (check_safety, blob.metadata): case (False, _) | (True, {"Titan-Status": "Verified"}): + if get_raw: + return { + "ready": True, + "data": blob.download_as_bytes(), + } # Signing is complicated due to an issue with the Google Auth library. # See: https://github.com/googleapis/google-auth-library-python/issues/50 signing_credentials = impersonated_credentials.Credentials( diff --git a/backend/siarnaq/settings.py b/backend/siarnaq/settings.py index 6ee9a682c..e1bb418a2 100644 --- a/backend/siarnaq/settings.py +++ b/backend/siarnaq/settings.py @@ -267,6 +267,7 @@ class Local(Base): GCLOUD_BUCKET_PUBLIC = "nowhere-public" GCLOUD_BUCKET_SECURE = "nowhere-secure" + GCLOUD_BUCKET_EPHEMERAL = "nowhere-ephemeral" GCLOUD_TOPIC_COMPILE = "nowhere-siarnaq-compile" GCLOUD_TOPIC_EXECUTE = "nowhere-siarnaq-execute" GCLOUD_ORDER_COMPILE = "compile-order" @@ -332,6 +333,7 @@ class Staging(Base): GCLOUD_BUCKET_PUBLIC = "mitbattlecode-staging-public" GCLOUD_BUCKET_SECURE = "mitbattlecode-staging-secure" + GCLOUD_BUCKET_EPHEMERAL = "mitbattlecode-staging-ephemeral" GCLOUD_TOPIC_COMPILE = "staging-siarnaq-compile" GCLOUD_TOPIC_EXECUTE = "staging-siarnaq-execute" GCLOUD_ORDER_COMPILE = "compile-order" @@ -424,6 +426,7 @@ class Production(Base): GCLOUD_BUCKET_PUBLIC = "mitbattlecode-production-public" GCLOUD_BUCKET_SECURE = "mitbattlecode-production-secure" + GCLOUD_BUCKET_EPHEMERAL = "mitbattlecode-production-ephemeral" GCLOUD_TOPIC_COMPILE = "production-siarnaq-compile" GCLOUD_TOPIC_EXECUTE = "production-siarnaq-execute" GCLOUD_ORDER_COMPILE = "compile-order" diff --git a/deploy/galaxy/main.tf b/deploy/galaxy/main.tf index 5c04bc455..87cdb5151 100644 --- a/deploy/galaxy/main.tf +++ b/deploy/galaxy/main.tf @@ -42,6 +42,23 @@ resource "google_storage_bucket" "secure" { } } +resource "google_storage_bucket" "ephemeral" { + name = "mitbattlecode-${var.name}-ephemeral" + + location = var.gcp_region + storage_class = "STANDARD" + labels = merge(var.labels, {component="storage"}) + + lifecycle_rule { + condition { + age = 1 + } + action { + type = "Delete" + } + } +} + resource "google_storage_bucket" "frontend" { count = var.create_website ? 1 : 0 @@ -138,7 +155,7 @@ module "siarnaq" { storage_public_name = google_storage_bucket.public.name storage_secure_name = google_storage_bucket.secure.name - + storage_ephemeral_name = google_storage_bucket.ephemeral.name } module "titan" { diff --git a/deploy/siarnaq/main.tf b/deploy/siarnaq/main.tf index 8df0a4eda..88317643c 100644 --- a/deploy/siarnaq/main.tf +++ b/deploy/siarnaq/main.tf @@ -32,6 +32,12 @@ resource "google_storage_bucket_iam_member" "secure" { member = "serviceAccount:${google_service_account.this.email}" } +resource "google_storage_bucket_iam_member" "ephemeral" { + bucket = var.storage_ephemeral_name + role = "roles/storage.objectAdmin" + member = "serviceAccount:${google_service_account.this.email}" +} + resource "google_project_iam_member" "scheduler" { project = var.gcp_project role = "roles/cloudscheduler.admin" diff --git a/deploy/siarnaq/variables.tf b/deploy/siarnaq/variables.tf index c2f520cfd..8a301ab10 100644 --- a/deploy/siarnaq/variables.tf +++ b/deploy/siarnaq/variables.tf @@ -68,6 +68,11 @@ variable "storage_secure_name" { type = string } +variable "storage_ephemeral_name" { + description = "Name of Google Cloud Storage bucket resource for ephemeral artifacts" + type = string +} + variable "additional_secrets" { description = "Additional secrets to inject into the secret manager" type = map