From 00cbbc1f2bb3db90078f631cb355370c49eeca97 Mon Sep 17 00:00:00 2001 From: Jerry Mao Date: Thu, 5 Jan 2023 22:58:10 +1100 Subject: [PATCH 01/13] Make saturn configurable --- saturn/cmd/saturn/main.go | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/saturn/cmd/saturn/main.go b/saturn/cmd/saturn/main.go index 65104b4c8..b10e12391 100644 --- a/saturn/cmd/saturn/main.go +++ b/saturn/cmd/saturn/main.go @@ -2,6 +2,9 @@ package main import ( "context" + "flag" + "fmt" + "os" "os/signal" "github.com/battlecode/galaxy/saturn/pkg/run" @@ -11,41 +14,39 @@ import ( "golang.org/x/sys/unix" ) -const ( - gcpProjectID = "mitbattlecode" - - gcpSecretName = "staging-saturn" - - gcpPubsubSubscriptionID = "staging-saturn-compile" - gcpTokenedReporterAudience = "siarnaq" - gcpTokenedReporterUserAgent = "Galaxy-Saturn" - monitorAddress = "127.0.0.1:8005" - - scaffoldRoot = "/scaffolds" +var ( + gcpProjectID *string = flag.String("project", os.Getenv("GCP_PROJECT"), "the project id on gcp") + gcpSecretName *string = flag.String("secret", "", "the name of the saturn secret") + gcpPubsubSubscriptionID *string = flag.String("subscription", "", "the name of the pubsub subscription") + gcpTokenedReporterAudience *string = flag.String("audience", "siarnaq", "the audience for gcp oidc tokens") + gcpTokenedReporterUserAgent *string = flag.String("useragent", "Galaxy-Saturn", "the user agent for reporting") + monitorPort *uint = flag.Uint("port", 8005, "the port for monitoring shutdowns") + scaffoldRoot *string = flag.String("scaffold", "/scaffolds", "the root directory for saving scaffolds") ) func main() { zerolog.DefaultContextLogger = &log.Logger zerolog.LevelFieldName = "severity" + flag.Parse() ctx, stop := signal.NotifyContext(context.Background(), unix.SIGINT, unix.SIGTERM) defer stop() - secret, err := saturn.ReadSecret(ctx, gcpProjectID, gcpSecretName) + secret, err := saturn.ReadSecret(ctx, *gcpProjectID, *gcpSecretName) if err != nil { log.Ctx(ctx).Fatal().Err(err).Msg("Could not read secrets.") } - multiplexer, err := run.NewScaffoldMultiplexer(scaffoldRoot, secret) + multiplexer, err := run.NewScaffoldMultiplexer(*scaffoldRoot, secret) if err != nil { log.Ctx(ctx).Fatal().Err(err).Msg("Could not initialize scaffold multiplexer.") } app, err := saturn.New( ctx, - saturn.WithMonitor(monitorAddress), - saturn.WithGcpPubsubSubcriber(gcpProjectID, gcpPubsubSubscriptionID), - saturn.WithGcpTokenedReporter(gcpTokenedReporterAudience, gcpTokenedReporterUserAgent), + saturn.WithMonitor(fmt.Sprintf("127.0.0.1:%d", *monitorPort)), + saturn.WithGcpPubsubSubcriber(*gcpProjectID, *gcpPubsubSubscriptionID), + saturn.WithGcpTokenedReporter(*gcpTokenedReporterAudience, *gcpTokenedReporterUserAgent), saturn.WithRunner("compile", multiplexer.Compile), saturn.WithRunner("execute", multiplexer.Execute), ) From b413d29c6816029e707795a38f1550c435d2e3d9 Mon Sep 17 00:00:00 2001 From: Jerry Mao Date: Thu, 5 Jan 2023 22:59:16 +1100 Subject: [PATCH 02/13] Ensure always report even if canceled --- saturn/pkg/saturn/report.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/saturn/pkg/saturn/report.go b/saturn/pkg/saturn/report.go index d7824e6a9..29b2454ca 100644 --- a/saturn/pkg/saturn/report.go +++ b/saturn/pkg/saturn/report.go @@ -46,7 +46,7 @@ func (r *GCPTokenedReporter) Report(ctx context.Context, t *Task) error { return fmt.Errorf("json.Marshal: %v", err) } - req, err := http.NewRequestWithContext(ctx, "POST", t.Payload.Metadata.ReportURL, bytes.NewBuffer(body)) + req, err := http.NewRequest("POST", t.Payload.Metadata.ReportURL, bytes.NewBuffer(body)) if err != nil { return fmt.Errorf("http.NewRequestWithContext: %v", err) } From 62657354f9684883c540a723ebf657d511ecdd90 Mon Sep 17 00:00:00 2001 From: Jerry Mao Date: Thu, 5 Jan 2023 23:17:59 +1100 Subject: [PATCH 03/13] Dockerize saturn --- deploy/galaxy/main.tf | 10 ++++------ deploy/saturn/main.tf | 6 +++++- deploy/saturn/variables.tf | 5 ----- saturn/Dockerfile | 24 ++++++++++++++++++++++++ saturn/pkg/run/recipe.go | 2 +- 5 files changed, 34 insertions(+), 13 deletions(-) create mode 100644 saturn/Dockerfile diff --git a/deploy/galaxy/main.tf b/deploy/galaxy/main.tf index d6459da95..bf926e1c0 100644 --- a/deploy/galaxy/main.tf +++ b/deploy/galaxy/main.tf @@ -181,7 +181,6 @@ module "saturn_compile" { machine_type = "e2-medium" image = var.saturn_image - command = "compile" max_instances = var.max_compile_instances min_instances = 0 @@ -207,7 +206,6 @@ module "saturn_execute" { machine_type = "e2-highmem-2" image = var.saturn_image - command = "execute" max_instances = var.max_execute_instances min_instances = 0 @@ -215,7 +213,7 @@ module "saturn_execute" { } resource "google_cloudbuild_trigger" "saturn" { - name = var.name + name = "${var.name}-saturn" github { owner = "battlecode" @@ -229,7 +227,7 @@ resource "google_cloudbuild_trigger" "saturn" { build { step { name = "gcr.io/cloud-builders/docker" - args = ["build", "--build-arg", "BUILD=$SHORT_SHA", "-t", var.saturn_image, "."] + args = ["build", "--build-arg", "REVISION_ARG=$TAG_NAME+$SHORT_SHA.$BUILD_ID", "-t", var.saturn_image, "."] dir = "saturn" } step { @@ -239,12 +237,12 @@ resource "google_cloudbuild_trigger" "saturn" { step { name = "gcr.io/google.com/cloudsdktool/cloud-sdk" entrypoint = "gcloud" - args = ["compute", "instance-groups", "managed", "rolling-action", "replace", module.saturn_compile.compute_group_name, "--region", var.gcp_region] + args = ["compute", "instance-groups", "managed", "rolling-action", "replace", module.saturn_compile.compute_group_name, "--zone", var.gcp_zone] } step { name = "gcr.io/google.com/cloudsdktool/cloud-sdk" entrypoint = "gcloud" - args = ["compute", "instance-groups", "managed", "rolling-action", "replace", module.saturn_execute.compute_group_name, "--region", var.gcp_region] + args = ["compute", "instance-groups", "managed", "rolling-action", "replace", module.saturn_execute.compute_group_name, "--zone", var.gcp_zone] } } } diff --git a/deploy/saturn/main.tf b/deploy/saturn/main.tf index 5cda4018a..daf65308a 100644 --- a/deploy/saturn/main.tf +++ b/deploy/saturn/main.tf @@ -55,7 +55,11 @@ module "container" { container = { image = var.image - args = [var.command] + args = [ + "-project=${var.gcp_project}", + "-secret=${var.secret_id}", + "-subscription=${google_pubsub_subscription.queue.name}", + ] } } diff --git a/deploy/saturn/variables.tf b/deploy/saturn/variables.tf index be82c0ec2..1740661e7 100644 --- a/deploy/saturn/variables.tf +++ b/deploy/saturn/variables.tf @@ -58,11 +58,6 @@ variable "image" { type = string } -variable "command" { - description = "Arguments for the Docker entrypoint" - type = string -} - variable "max_instances" { description = "Maximum allowable size of the worker pool" type = number diff --git a/saturn/Dockerfile b/saturn/Dockerfile new file mode 100644 index 000000000..e6bf2ad57 --- /dev/null +++ b/saturn/Dockerfile @@ -0,0 +1,24 @@ +FROM golang:1.18-buster AS go + +ENV BUILD_HOME /build +WORKDIR $BUILD_HOME + +COPY go.mod go.sum ./ +RUN go mod download + +COPY . . +RUN GOOS=linux GOARCH=amd64 CGO_ENABLED=0 go build -o /saturn -ldflags="-s -w" ./cmd/saturn/main.go + + +FROM openjdk:8-slim-buster + +ENV APP_HOME /app +WORKDIR $APP_HOME + +ARG REVISION_ARG=nightly +ENV REVISION=$REVISION_ARG + +EXPOSE 8005 + +COPY --from=go /saturn . +ENTRYPOINT ["./saturn"] diff --git a/saturn/pkg/run/recipe.go b/saturn/pkg/run/recipe.go index 34c38d46a..33f17b8ec 100644 --- a/saturn/pkg/run/recipe.go +++ b/saturn/pkg/run/recipe.go @@ -44,7 +44,7 @@ var StateVersion = Step{ Callable: func(ctx context.Context, arg *StepArguments) error { log.Ctx(ctx).Debug().Msg("Welcome to Saturn!") log.Ctx(ctx).Debug().Msgf("Node: %s", os.Getenv("HOSTNAME")) - log.Ctx(ctx).Debug().Msgf("Build: %s", os.Getenv("SATURN_BUILD")) + log.Ctx(ctx).Debug().Msgf("Revision: %s", os.Getenv("REVISION")) return nil }, } From c10464673e592ad63ceee22fb9e3344cd4b89c98 Mon Sep 17 00:00:00 2001 From: Jerry Mao Date: Fri, 6 Jan 2023 16:53:08 +1100 Subject: [PATCH 04/13] Fix autoscaler --- deploy/saturn/main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/saturn/main.tf b/deploy/saturn/main.tf index daf65308a..4be191da9 100644 --- a/deploy/saturn/main.tf +++ b/deploy/saturn/main.tf @@ -136,7 +136,7 @@ resource "google_compute_autoscaler" "this" { metric { name = "pubsub.googleapis.com/subscription/num_undelivered_messages" - filter = "resource.type = pubsub_subscription AND resource.label.subscription_id = \"${google_pubsub_subscription.queue.id}\"" + filter = "resource.type = pubsub_subscription AND resource.label.subscription_id = \"${google_pubsub_subscription.queue.name}\"" single_instance_assignment = var.load_ratio } } From aecc867ea8d66f6f4a89b8e0d5a1632fb4f9f9fc Mon Sep 17 00:00:00 2001 From: Jerry Mao Date: Fri, 6 Jan 2023 17:05:11 +1100 Subject: [PATCH 05/13] Ensure permission to pull saturn image --- deploy/cd/outputs.tf | 4 ++++ deploy/galaxy/main.tf | 4 ++++ deploy/galaxy/variables.tf | 5 +++++ deploy/main.tf | 4 ++++ deploy/saturn/main.tf | 7 +++++++ deploy/saturn/variables.tf | 5 +++++ 6 files changed, 29 insertions(+) diff --git a/deploy/cd/outputs.tf b/deploy/cd/outputs.tf index d06e5d6dc..6a777c00a 100644 --- a/deploy/cd/outputs.tf +++ b/deploy/cd/outputs.tf @@ -4,3 +4,7 @@ output "artifact_image" { k => "${google_artifact_registry_repository.this.location}-docker.pkg.dev/${google_artifact_registry_repository.this.project}/${google_artifact_registry_repository.this.repository_id}/${k}" } } + +output "artifact_registry_name" { + value = google_artifact_registry_repository.this.name +} diff --git a/deploy/galaxy/main.tf b/deploy/galaxy/main.tf index bf926e1c0..6c92ee6af 100644 --- a/deploy/galaxy/main.tf +++ b/deploy/galaxy/main.tf @@ -174,6 +174,8 @@ module "saturn_compile" { storage_public_name = google_storage_bucket.public.name storage_secure_name = google_storage_bucket.secure.name + artifact_registry_name = var.artifact_registry_name + secret_id = google_secret_manager_secret.saturn.secret_id network_vpc_id = google_compute_network.this.id @@ -199,6 +201,8 @@ module "saturn_execute" { storage_public_name = google_storage_bucket.public.name storage_secure_name = google_storage_bucket.secure.name + artifact_registry_name = var.artifact_registry_name + secret_id = google_secret_manager_secret.saturn.secret_id network_vpc_id = google_compute_network.this.id diff --git a/deploy/galaxy/variables.tf b/deploy/galaxy/variables.tf index ee18a81dc..b8d7e9c7b 100644 --- a/deploy/galaxy/variables.tf +++ b/deploy/galaxy/variables.tf @@ -58,6 +58,11 @@ variable "siarnaq_secrets" { type = map } +variable "artifact_registry_name" { + description = "Name of the Artifact Registry where the build image can be found" + type = string +} + variable "titan_image" { description = "Image for the Titan Docker container" type = string diff --git a/deploy/main.tf b/deploy/main.tf index 148accca8..ab9cea278 100644 --- a/deploy/main.tf +++ b/deploy/main.tf @@ -42,6 +42,8 @@ module "production" { database_authorized_networks = [] siarnaq_secrets = merge(var.siarnaq_secrets_common, var.siarnaq_secrets_production) + artifact_registry_name = module.cd.artifact_registry_name + titan_image = module.cd.artifact_image["titan"] saturn_image = module.cd.artifact_image["saturn"] @@ -75,6 +77,8 @@ module "staging" { database_authorized_networks = ["0.0.0.0/0"] siarnaq_secrets = merge(var.siarnaq_secrets_common, var.siarnaq_secrets_staging) + artifact_registry_name = module.cd.artifact_registry_name + titan_image = module.cd.artifact_image["titan"] saturn_image = module.cd.artifact_image["saturn"] diff --git a/deploy/saturn/main.tf b/deploy/saturn/main.tf index 4be191da9..8d0a9d72f 100644 --- a/deploy/saturn/main.tf +++ b/deploy/saturn/main.tf @@ -16,6 +16,13 @@ resource "google_storage_bucket_iam_member" "secure" { member = "serviceAccount:${google_service_account.this.email}" } +resource "google_artifact_registry_repository_iam_member" "this" { + location = var.gcp_region + repository = var.artifact_registry_name + role = "roles/artifactregistry.reader" + member = "serviceAccount:${google_service_account.this.email}" +} + resource "google_pubsub_subscription" "queue" { name = var.name topic = var.pubsub_topic_name diff --git a/deploy/saturn/variables.tf b/deploy/saturn/variables.tf index 1740661e7..af6044e9d 100644 --- a/deploy/saturn/variables.tf +++ b/deploy/saturn/variables.tf @@ -33,6 +33,11 @@ variable "storage_secure_name" { type = string } +variable "artifact_registry_name" { + description = "Name of the Artifact Registry where the build image can be found" + type = string +} + variable "secret_id" { description = "ID of the Secret resource" type = string From 2532d45efe7f5c98aef5780ef2fb5a86a94bb22e Mon Sep 17 00:00:00 2001 From: Jerry Mao Date: Fri, 6 Jan 2023 23:23:02 +1100 Subject: [PATCH 06/13] Fix report field type --- saturn/pkg/saturn/report.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/saturn/pkg/saturn/report.go b/saturn/pkg/saturn/report.go index 29b2454ca..baf925120 100644 --- a/saturn/pkg/saturn/report.go +++ b/saturn/pkg/saturn/report.go @@ -37,7 +37,7 @@ func (r *GCPTokenedReporter) Report(ctx context.Context, t *Task) error { } payload["invocation"] = map[string]interface{}{ "status": t.status.String(), - "logs": t.logs, + "logs": t.logs.String(), "interrupted": t.status == TaskInterrupted, } From b1c3ad9a951626fc9034322aa8c3349f778d15ca Mon Sep 17 00:00:00 2001 From: Jerry Mao Date: Fri, 6 Jan 2023 23:24:22 +1100 Subject: [PATCH 07/13] Default to http for the sake of staging --- backend/siarnaq/api/compete/models.py | 4 ++-- backend/siarnaq/api/episodes/signals.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/backend/siarnaq/api/compete/models.py b/backend/siarnaq/api/compete/models.py index 8b5474f4e..857903e3a 100644 --- a/backend/siarnaq/api/compete/models.py +++ b/backend/siarnaq/api/compete/models.py @@ -163,7 +163,7 @@ def for_saturn(self): def enqueue_options(self): """Return the options to be submitted to the compilation queue.""" - report_url = "https://{}{}".format( + report_url = "http://{}{}".format( settings.ALLOWED_HOSTS[0], reverse( "submission-report", @@ -256,7 +256,7 @@ def for_saturn(self): def enqueue_options(self): """Return the options to be submitted to the execution queue.""" - report_url = "https://{}{}".format( + report_url = "http://{}{}".format( settings.ALLOWED_HOSTS[0], reverse( "match-report", diff --git a/backend/siarnaq/api/episodes/signals.py b/backend/siarnaq/api/episodes/signals.py index 492b375e6..85702ee55 100644 --- a/backend/siarnaq/api/episodes/signals.py +++ b/backend/siarnaq/api/episodes/signals.py @@ -50,7 +50,7 @@ def update_autoscrim_schedule(instance, update_fields, **kwargs): f"{parent}/jobs/" f"{settings.GCLOUD_SCHEDULER_PREFIX}-autoscrim-{instance.name_short}" ) - url = "https://{}{}".format( + url = "http://{}{}".format( settings.ALLOWED_HOSTS[0], reverse("episode-autoscrim", kwargs={"pk": instance.pk}), ) From 1a497f99efc058925335607dfc0b5593d9b8dacd Mon Sep 17 00:00:00 2001 From: Jerry Mao Date: Fri, 6 Jan 2023 23:53:12 +1100 Subject: [PATCH 08/13] Add more logging --- saturn/pkg/saturn/queue.go | 1 + saturn/pkg/saturn/report.go | 13 +++++++++++-- saturn/pkg/saturn/task.go | 11 +++++++---- 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/saturn/pkg/saturn/queue.go b/saturn/pkg/saturn/queue.go index fa46e32be..ed14d293a 100644 --- a/saturn/pkg/saturn/queue.go +++ b/saturn/pkg/saturn/queue.go @@ -36,6 +36,7 @@ func NewGCPPubsubSubscriber(ctx context.Context, projectID, subscriptionID strin func (c *GCPPubsubSubscriber) Subscribe(ctx context.Context, handler QueuedTaskHandler) error { err := c.subscription.Receive(ctx, func(ctx context.Context, msg *pubsub.Message) { defer msg.Nack() + log.Ctx(ctx).Info().Bytes("message", msg.Data).Msg("Starting task.") var task TaskPayload if err := json.Unmarshal(msg.Data, &task); err != nil { log.Ctx(ctx).Error().Err(err).Msg("Invalid message.") diff --git a/saturn/pkg/saturn/report.go b/saturn/pkg/saturn/report.go index baf925120..1f1a4c509 100644 --- a/saturn/pkg/saturn/report.go +++ b/saturn/pkg/saturn/report.go @@ -5,8 +5,10 @@ import ( "context" "encoding/json" "fmt" + "io/ioutil" "net/http" + "github.com/rs/zerolog/log" "google.golang.org/api/idtoken" ) @@ -41,12 +43,13 @@ func (r *GCPTokenedReporter) Report(ctx context.Context, t *Task) error { "interrupted": t.status == TaskInterrupted, } - body, err := json.Marshal(payload) + reqBody, err := json.Marshal(payload) if err != nil { return fmt.Errorf("json.Marshal: %v", err) } + log.Ctx(ctx).Debug().RawJSON("payload", reqBody).Msg("Sending report.") - req, err := http.NewRequest("POST", t.Payload.Metadata.ReportURL, bytes.NewBuffer(body)) + req, err := http.NewRequest("POST", t.Payload.Metadata.ReportURL, bytes.NewBuffer(reqBody)) if err != nil { return fmt.Errorf("http.NewRequestWithContext: %v", err) } @@ -59,6 +62,12 @@ func (r *GCPTokenedReporter) Report(ctx context.Context, t *Task) error { } defer resp.Body.Close() + respBody, err := ioutil.ReadAll(resp.Body) + if err != nil { + return fmt.Errorf("ioutil.ReadAll: %v", err) + } + log.Ctx(ctx).Debug().Bytes("response", respBody).Msg("Report sent.") + if resp.StatusCode == http.StatusConflict { t.Finish(TaskAborted, nil) } diff --git a/saturn/pkg/saturn/task.go b/saturn/pkg/saturn/task.go index f89a735c1..8aa397e0a 100644 --- a/saturn/pkg/saturn/task.go +++ b/saturn/pkg/saturn/task.go @@ -79,15 +79,15 @@ func (t *Task) Run(ctx context.Context, r Reporter) (err error) { switch r := recover(); r { case taskFinished{}, nil: if err != nil { - log.Ctx(ctx).Warn().Err(err).Msg("Task did not finish successfully.") + log.Ctx(ctx).Warn().Stringer("status", t.status).Err(err).Msg("Task did not finish successfully.") } else { - log.Ctx(ctx).Info().Msg("Task finished successfully.") + log.Ctx(ctx).Info().Stringer("status", t.status).Msg("Task finished successfully.") } default: panic(r) } - if err = t.FinalizeReport(ctx, r); err != nil { - err = fmt.Errorf("t.FinalizeReport: %v", err) + if tErr := t.FinalizeReport(ctx, r); tErr != nil { + err = fmt.Errorf("t.FinalizeReport: %v", tErr) } }() defer t.Finish(TaskErrored, nil) @@ -99,10 +99,13 @@ func (t *Task) Run(ctx context.Context, r Reporter) (err error) { }) ctx = log.Ctx(ctx).Hook(hook).WithContext(ctx) + log.Ctx(ctx).Debug().Msg("Initializing task.") if err = r.Report(ctx, t); err != nil { err = fmt.Errorf("r.Report: %v", err) return } + + log.Ctx(ctx).Debug().Msg("Running task.") if err = t.Runner(ctx, t, t.Payload); err != nil { err = fmt.Errorf("t.Runner: %v", err) return From 498e7230f3722dd1fae165c925b913dec0da4cfb Mon Sep 17 00:00:00 2001 From: Jerry Mao Date: Sat, 7 Jan 2023 00:10:19 +1100 Subject: [PATCH 09/13] Allow blank logs from saturn --- backend/siarnaq/api/compete/serializers.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/backend/siarnaq/api/compete/serializers.py b/backend/siarnaq/api/compete/serializers.py index 6794f6f24..9d8da7222 100644 --- a/backend/siarnaq/api/compete/serializers.py +++ b/backend/siarnaq/api/compete/serializers.py @@ -30,7 +30,9 @@ class AlreadyFinalized(APIException): class SaturnInvocationSerializer(serializers.Serializer): status = serializers.ChoiceField(SaturnStatus.choices) - logs = serializers.CharField(required=False) + logs = serializers.CharField( + required=False, allow_blank=True, trim_whitespace=False + ) interrupted = serializers.BooleanField(required=False) def update(self, instance, validated_data): From 1d466b49afa6737ea89b3442318e74c645213921 Mon Sep 17 00:00:00 2001 From: Jerry Mao Date: Sat, 7 Jan 2023 00:48:07 +1100 Subject: [PATCH 10/13] Fix queue protocol --- saturn/pkg/run/protocol.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/saturn/pkg/run/protocol.go b/saturn/pkg/run/protocol.go index 938d6c0c9..a62426738 100644 --- a/saturn/pkg/run/protocol.go +++ b/saturn/pkg/run/protocol.go @@ -8,7 +8,7 @@ type Submission struct { } type CompileRequest struct { - Submission `mapstructure:"submission"` + Submission `mapstructure:"submission,squash"` } type ExecuteRequest struct { From 461833f151a4a2b61d1ff653b2afdd800e52b0fb Mon Sep 17 00:00:00 2001 From: Jerry Mao Date: Sat, 7 Jan 2023 00:57:46 +1100 Subject: [PATCH 11/13] Fix incorrect task finishing behavior --- saturn/pkg/saturn/task.go | 36 ++++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/saturn/pkg/saturn/task.go b/saturn/pkg/saturn/task.go index 8aa397e0a..d1082e6d6 100644 --- a/saturn/pkg/saturn/task.go +++ b/saturn/pkg/saturn/task.go @@ -60,6 +60,15 @@ func (s TaskStatus) String() string { } } +func (s TaskStatus) Retryable() bool { + switch s { + case TaskErrored, TaskInterrupted: + return true + default: + return false + } +} + type taskFinished struct{} type Finisher interface { @@ -78,16 +87,18 @@ func (t *Task) Run(ctx context.Context, r Reporter) (err error) { defer func() { switch r := recover(); r { case taskFinished{}, nil: - if err != nil { - log.Ctx(ctx).Warn().Stringer("status", t.status).Err(err).Msg("Task did not finish successfully.") - } else { - log.Ctx(ctx).Info().Stringer("status", t.status).Msg("Task finished successfully.") - } + log.Ctx(ctx).Info().Stringer("status", t.status).Msg("Task finished.") default: panic(r) } - if tErr := t.FinalizeReport(ctx, r); tErr != nil { - err = fmt.Errorf("t.FinalizeReport: %v", tErr) + if err != nil { + // TODO: log a traceback + } + if err = t.FinalizeReport(ctx, r); err != nil { + err = fmt.Errorf("t.FinalizeReport: %v", err) + } + if t.status.Retryable() { + err = fmt.Errorf("task not complete: %v", err) } }() defer t.Finish(TaskErrored, nil) @@ -114,11 +125,15 @@ func (t *Task) Run(ctx context.Context, r Reporter) (err error) { } func (t *Task) Finish(status TaskStatus, details map[string]interface{}) { + if r := recover(); r != nil { + panic(r) // Don't clobber an existing panic + } if t.status != TaskRunning { - t.status = status - t.details = details - panic(taskFinished{}) + return } + t.status = status + t.details = details + panic(taskFinished{}) } func (t *Task) FinalizeReport(ctx context.Context, r Reporter) error { @@ -127,6 +142,7 @@ func (t *Task) FinalizeReport(ctx context.Context, r Reporter) error { } if ctx.Err() != nil { t.status = TaskInterrupted + log.Ctx(ctx).Debug().Msg("This task was interrupted and will be retried soon.") } if err := r.Report(ctx, t); err != nil { return fmt.Errorf("r.Report: %v", err) From c7e741669223fa1a0608c795f7e433a6ce598e96 Mon Sep 17 00:00:00 2001 From: Jerry Mao Date: Sat, 7 Jan 2023 01:26:24 +1100 Subject: [PATCH 12/13] Fix incorrect scaffold behavior --- saturn/pkg/run/java8.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/saturn/pkg/run/java8.go b/saturn/pkg/run/java8.go index 99759dd83..746790984 100644 --- a/saturn/pkg/run/java8.go +++ b/saturn/pkg/run/java8.go @@ -163,8 +163,8 @@ func (s *Java8Scaffold) RunMatch() *Step { fmt.Sprintf("-PonSaturn=%t", true), fmt.Sprintf("-PteamA=%s", arg.Details.(ExecuteRequest).A.TeamName), fmt.Sprintf("-PteamB=%s", arg.Details.(ExecuteRequest).B.TeamName), - fmt.Sprintf("-PclassLocationA=%s", filepath.Join("data", "a")), - fmt.Sprintf("-PclassLocationB=%s", filepath.Join("data", "b")), + fmt.Sprintf("-PclassLocationA=%s", filepath.Join("data", "A")), + fmt.Sprintf("-PclassLocationB=%s", filepath.Join("data", "B")), fmt.Sprintf("-PpackageNameA=%s", arg.Details.(ExecuteRequest).A.Package), fmt.Sprintf("-PpackageNameB=%s", arg.Details.(ExecuteRequest).B.Package), fmt.Sprintf("-Preplay=%s", filepath.Join("data", "replay.bin")), From 5786b1f997eb05b9e100ad6211fa73792c26ac45 Mon Sep 17 00:00:00 2001 From: Jerry Mao Date: Sat, 7 Jan 2023 02:22:38 +1100 Subject: [PATCH 13/13] Fix uninitialized variable --- saturn/pkg/run/java8.go | 1 + 1 file changed, 1 insertion(+) diff --git a/saturn/pkg/run/java8.go b/saturn/pkg/run/java8.go index 746790984..0fcfe5335 100644 --- a/saturn/pkg/run/java8.go +++ b/saturn/pkg/run/java8.go @@ -39,6 +39,7 @@ func NewJava8Scaffold(ctx context.Context, episode saturn.Episode, repo *git.Rep s.UploadReplay(), s.DetermineScores(), } + s.matchOutputs = make(map[*StepArguments]string) return s, nil }