Skip to content

Commit

Permalink
refactor, Extract "write test" core task logic so it can be applied t…
Browse files Browse the repository at this point in the history
…wice (with and without template)

Part #350
  • Loading branch information
bauersimon committed Oct 2, 2024
1 parent ccd432e commit 8678677
Showing 1 changed file with 57 additions and 51 deletions.
108 changes: 57 additions & 51 deletions evaluate/task/task-write-test.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,64 +52,17 @@ func (t *TaskWriteTests) Run(ctx evaltask.Context) (repositoryAssessment map[eva
withSymflowerFixAssessment[metrics.AssessmentKeyFilesExecutedMaximumReachable] = maximumReachableFiles

for _, filePath := range filePaths {
modelAssessmentForFile := metrics.NewAssessments()
var withSymflowerFixAssessmentForFile metrics.Assessments

if err := ctx.Repository.Reset(ctx.Logger); err != nil {
ctx.Logger.Panicf("ERROR: unable to reset temporary repository path: %s", err)
}

modelContext := model.Context{
Language: ctx.Language,

RepositoryPath: dataPath,
FilePath: filePath,

Logger: taskLogger.Logger,
}
assessments, err := modelCapability.WriteTests(modelContext)
if err != nil {
problems = append(problems, pkgerrors.WithMessage(err, filePath))

continue
}
if assessments[metrics.AssessmentKeyProcessingTime] == 0 {
return nil, nil, pkgerrors.Errorf("no model response time measurement present for %q at repository %q", ctx.Model.ID(), ctx.Repository.Name())
}
modelAssessmentForFile.Add(assessments)
modelAssessmentForFile.Award(metrics.AssessmentKeyResponseNoError)

testResult, ps, err := ctx.Language.ExecuteTests(taskLogger.Logger, dataPath)
modelAssessmentFile, withSymflowerFixAssessmentFile, ps, err := runModelAndSymflowerFix(ctx, taskLogger, modelCapability, dataPath, filePath)
problems = append(problems, ps...)
if err != nil {
problems = append(problems, pkgerrors.WithMessage(err, filePath))
} else {
taskLogger.Printf("Executes tests with %d coverage objects", testResult.Coverage)
modelAssessmentForFile.Award(metrics.AssessmentKeyFilesExecuted)
modelAssessmentForFile.AwardPoints(metrics.AssessmentKeyCoverage, testResult.Coverage)
}

// Run "symflower fix" if the model response fails to execute.
if ctx.Language.ID() == "golang" { // Currently we only support Go for "symflower fix".
withSymflowerFixTestResult, processingTime, ps, err := ExecuteWithSymflowerFix(ctx, taskLogger.Logger, ctx.Repository.DataPath())
problems = append(problems, ps...)
if err != nil {
problems = append(problems, err)
} else {
ctx.Logger.Printf("with symflower repair: Executes tests with %d coverage objects", withSymflowerFixTestResult.Coverage)

// Symflower was able to fix a failure so now update the assessment with the improved results.
withSymflowerFixAssessments := metrics.NewAssessments()
withSymflowerFixAssessments[metrics.AssessmentKeyProcessingTime] = processingTime
withSymflowerFixAssessments.Award(metrics.AssessmentKeyFilesExecuted)
withSymflowerFixAssessments.AwardPoints(metrics.AssessmentKeyCoverage, withSymflowerFixTestResult.Coverage)

withSymflowerFixAssessmentForFile = metrics.CombineWithSymflowerFixAssessments(modelAssessmentForFile, withSymflowerFixAssessments)
}
return nil, problems, err
}

modelAssessment.Add(modelAssessmentForFile)
withSymflowerFixAssessment.Add(withSymflowerFixAssessmentForFile)
modelAssessment.Add(modelAssessmentFile)
withSymflowerFixAssessment.Add(withSymflowerFixAssessmentFile)
}

repositoryAssessment = map[evaltask.Identifier]metrics.Assessments{
Expand All @@ -120,6 +73,59 @@ func (t *TaskWriteTests) Run(ctx evaltask.Context) (repositoryAssessment map[eva
return repositoryAssessment, problems, nil
}

func runModelAndSymflowerFix(ctx evaltask.Context, taskLogger *taskLogger, modelCapability model.CapabilityWriteTests, dataPath string, filePath string) (modelAssessment metrics.Assessments, withSymflowerFixAssessment metrics.Assessments, problems []error, err error) {
modelAssessment = metrics.NewAssessments()

modelContext := model.Context{
Language: ctx.Language,

RepositoryPath: dataPath,
FilePath: filePath,

Logger: taskLogger.Logger,
}
assessments, err := modelCapability.WriteTests(modelContext)
if err != nil {
return nil, nil, append(problems, pkgerrors.WithMessage(err, filePath)), nil
}
if assessments[metrics.AssessmentKeyProcessingTime] == 0 {
return nil, nil, problems, pkgerrors.Errorf("no model response time measurement present for %q at repository %q", ctx.Model.ID(), ctx.Repository.Name())
}
modelAssessment.Add(assessments)
modelAssessment.Award(metrics.AssessmentKeyResponseNoError)

testResult, ps, err := ctx.Language.ExecuteTests(taskLogger.Logger, dataPath)
problems = append(problems, ps...)
if err != nil {
problems = append(problems, pkgerrors.WithMessage(err, filePath))
} else {
taskLogger.Printf("Executes tests with %d coverage objects", testResult.Coverage)
modelAssessment.Award(metrics.AssessmentKeyFilesExecuted)
modelAssessment.AwardPoints(metrics.AssessmentKeyCoverage, testResult.Coverage)
}

// Run "symflower fix" if the model response fails to execute.
if ctx.Language.ID() == "golang" { // Currently we only support Go for "symflower fix".
withSymflowerFixTestResult, processingTime, ps, err := ExecuteWithSymflowerFix(ctx, taskLogger.Logger, ctx.Repository.DataPath())
problems = append(problems, ps...)
if err != nil {
problems = append(problems, err)
} else {
ctx.Logger.Printf("with symflower repair: Executes tests with %d coverage objects", withSymflowerFixTestResult.Coverage)

// Symflower was able to fix a failure so now update the assessment with the improved results.
withSymflowerFix := metrics.NewAssessments()
withSymflowerFix[metrics.AssessmentKeyProcessingTime] = processingTime
withSymflowerFix.Award(metrics.AssessmentKeyFilesExecuted)
withSymflowerFix.AwardPoints(metrics.AssessmentKeyCoverage, withSymflowerFixTestResult.Coverage)

withSymflowerFixAssessment = metrics.CombineWithSymflowerFixAssessments(modelAssessment, withSymflowerFix)
}
}

return modelAssessment, withSymflowerFixAssessment, problems, nil
}

// validateWriteTestsRepository checks if the repository for the "write-tests" task is well-formed.
func validateWriteTestsRepository(logger *log.Logger, repositoryPath string, language language.Language) (err error) {
logger.Printf("validating repository %q", repositoryPath)
Expand Down

0 comments on commit 8678677

Please sign in to comment.