From d3a6c0d909fc6dbb10c9ff4f01e5de0ea7d0fe0d Mon Sep 17 00:00:00 2001 From: Simon Bauer Date: Wed, 2 Oct 2024 12:26:38 +0200 Subject: [PATCH 1/3] Compare assessment tuples in map form cause otherwise the diff is horrible --- evaluate/evaluate_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/evaluate/evaluate_test.go b/evaluate/evaluate_test.go index deba8177..536879b8 100644 --- a/evaluate/evaluate_test.go +++ b/evaluate/evaluate_test.go @@ -144,7 +144,7 @@ func TestEvaluate(t *testing.T) { return nil })) - assert.ElementsMatch(t, tc.ExpectedAssessments, actualAssessments) + assert.Equal(t, tc.ExpectedAssessments.ToMap(), actualAssessments.ToMap()) assert.Equal(t, tc.ExpectedTotalScore, actualTotalScore) if tc.ExpectedOutputValidate != nil { From 82e9d3f57561dc29781f5a60ab1ce3d1a9154569 Mon Sep 17 00:00:00 2001 From: Simon Bauer Date: Wed, 2 Oct 2024 11:48:07 +0200 Subject: [PATCH 2/3] refactor, Unify log message for "write-test" and "transpile" with code repair Part of #350 --- evaluate/task/task-transpile.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/evaluate/task/task-transpile.go b/evaluate/task/task-transpile.go index da12d5e9..fc24385e 100644 --- a/evaluate/task/task-transpile.go +++ b/evaluate/task/task-transpile.go @@ -132,7 +132,7 @@ func (t *TaskTranspile) Run(ctx evaltask.Context) (repositoryAssessment map[eval continue } else { testsPassing := withSymflowerFixTestResult.TestsPass - taskLogger.Printf("Executes tests with %d tests passing after \"symflower fix\"", testsPassing) + taskLogger.Printf("with symflower repair: Executes tests with %d tests passing", testsPassing) // Symflower was able to fix a failure so now update the assessment with the improved results. withSymflowerFixAssessments := metrics.NewAssessments() From 027facf30edc8d7e5d36aa971156c6f1edbafef3 Mon Sep 17 00:00:00 2001 From: Simon Bauer Date: Wed, 2 Oct 2024 12:01:21 +0200 Subject: [PATCH 3/3] refactor, Reword assessment for clarity and remove "symflower-fix" file suffix to make it more generic Part of #350 --- evaluate/task/{symflower-fix.go => symflower.go} | 0 evaluate/task/task-write-test.go | 16 ++++++++-------- 2 files changed, 8 insertions(+), 8 deletions(-) rename evaluate/task/{symflower-fix.go => symflower.go} (100%) diff --git a/evaluate/task/symflower-fix.go b/evaluate/task/symflower.go similarity index 100% rename from evaluate/task/symflower-fix.go rename to evaluate/task/symflower.go diff --git a/evaluate/task/task-write-test.go b/evaluate/task/task-write-test.go index 9b05e1ff..5330cf44 100644 --- a/evaluate/task/task-write-test.go +++ b/evaluate/task/task-write-test.go @@ -47,15 +47,15 @@ func (t *TaskWriteTests) Run(ctx evaltask.Context) (repositoryAssessment map[eva } modelAssessment := metrics.NewAssessments() - withSymflowerAssessment := metrics.NewAssessments() + withSymflowerFixAssessment := metrics.NewAssessments() maximumReachableFiles := uint64(len(filePaths)) modelAssessment[metrics.AssessmentKeyFilesExecutedMaximumReachable] = maximumReachableFiles - withSymflowerAssessment[metrics.AssessmentKeyFilesExecutedMaximumReachable] = maximumReachableFiles + withSymflowerFixAssessment[metrics.AssessmentKeyFilesExecutedMaximumReachable] = maximumReachableFiles for _, filePath := range filePaths { modelAssessmentForFile := metrics.NewAssessments() - withSymflowerAssessmentForFile := modelAssessmentForFile // The symflower assessment tracks how the model result can be improved in case of a failure, so just link to the model assessment until a failure actually happens. + withSymflowerFixAssessmentForFile := modelAssessmentForFile // The symflower assessment tracks how the model result can be improved in case of a failure, so just link to the model assessment until a failure actually happens. if err := ctx.Repository.Reset(ctx.Logger); err != nil { ctx.Logger.Panicf("ERROR: unable to reset temporary repository path: %s", err) @@ -89,7 +89,7 @@ func (t *TaskWriteTests) Run(ctx evaltask.Context) (repositoryAssessment map[eva // If there is an execution timeout do not run "symflower fix" because the code itself is correct. if errors.Is(err, context.DeadlineExceeded) { modelAssessment.Add(modelAssessmentForFile) - withSymflowerAssessment.Add(withSymflowerAssessmentForFile) + withSymflowerFixAssessment.Add(withSymflowerFixAssessmentForFile) continue } @@ -102,7 +102,7 @@ func (t *TaskWriteTests) Run(ctx evaltask.Context) (repositoryAssessment map[eva problems = append(problems, err) modelAssessment.Add(modelAssessmentForFile) - withSymflowerAssessment.Add(withSymflowerAssessmentForFile) + withSymflowerFixAssessment.Add(withSymflowerFixAssessmentForFile) continue } else { @@ -114,7 +114,7 @@ func (t *TaskWriteTests) Run(ctx evaltask.Context) (repositoryAssessment map[eva withSymflowerFixAssessments.Award(metrics.AssessmentKeyFilesExecuted) withSymflowerFixAssessments.AwardPoints(metrics.AssessmentKeyCoverage, withSymflowerFixTestResult.Coverage) - withSymflowerAssessmentForFile = metrics.CombineWithSymflowerFixAssessments(modelAssessmentForFile, withSymflowerFixAssessments) + withSymflowerFixAssessmentForFile = metrics.CombineWithSymflowerFixAssessments(modelAssessmentForFile, withSymflowerFixAssessments) } } } else { @@ -124,12 +124,12 @@ func (t *TaskWriteTests) Run(ctx evaltask.Context) (repositoryAssessment map[eva } modelAssessment.Add(modelAssessmentForFile) - withSymflowerAssessment.Add(withSymflowerAssessmentForFile) + withSymflowerFixAssessment.Add(withSymflowerFixAssessmentForFile) } repositoryAssessment = map[evaltask.Identifier]metrics.Assessments{ IdentifierWriteTests: modelAssessment, - IdentifierWriteTestsSymflowerFix: withSymflowerAssessment, + IdentifierWriteTestsSymflowerFix: withSymflowerFixAssessment, } return repositoryAssessment, problems, nil