From 832369f8c47b3c3a9a668c066bf15f01ab57da08 Mon Sep 17 00:00:00 2001 From: Markus Zimmermann Date: Thu, 3 Oct 2024 16:10:45 +0200 Subject: [PATCH] fix, Linting problems --- cmd/eval-dev-quality/cmd/evaluate.go | 22 +++++--- cmd/eval-dev-quality/cmd/report.go | 13 +++-- cmd/eval-dev-quality/cmd/report_test.go | 8 +-- evaluate/evaluate.go | 21 +++++--- evaluate/evaluate_test.go | 5 +- evaluate/metrics/testing/assessments.go | 26 +++++----- evaluate/report/csv.go | 12 +++-- .../{task-code-repair.go => code-repair.go} | 18 +++---- ...ode-repair_test.go => code-repair_test.go} | 30 +++++------ evaluate/task/task.go | 10 ++-- evaluate/task/test-integration/task_test.go | 4 +- evaluate/task/testing/task.go | 4 ++ .../task/{task-transpile.go => transpile.go} | 38 +++++++------- ...sk-transpile_test.go => transpile_test.go} | 16 +++--- .../{task-write-test.go => write-test.go} | 30 +++++------ ...-write-test_test.go => write-test_test.go} | 13 +++-- language/golang/language_test.go | 8 --- language/java/language.go | 2 - language/java/language_test.go | 8 --- language/language.go | 2 +- language/testing/language.go | 4 ++ log/logger.go | 47 +++++++++++------ model/llm/llm.go | 6 +-- model/llm/llm_test.go | 4 +- model/symflower/symflower.go | 2 +- model/symflower/symflower_test.go | 50 +++++++++---------- provider/openai-api/query.go | 2 +- provider/openrouter/openrouter.go | 6 ++- scripts/ollama-models/main.go | 29 ++++++++--- scripts/openrouter-models/main.go | 14 ++++-- scripts/reliability/main.go | 4 +- util/exec.go | 4 +- 32 files changed, 262 insertions(+), 200 deletions(-) rename evaluate/task/{task-code-repair.go => code-repair.go} (88%) rename evaluate/task/{task-code-repair_test.go => code-repair_test.go} (97%) rename evaluate/task/{task-transpile.go => transpile.go} (86%) rename evaluate/task/{task-transpile_test.go => transpile_test.go} (98%) rename evaluate/task/{task-write-test.go => write-test.go} (81%) rename evaluate/task/{task-write-test_test.go => write-test_test.go} (97%) diff --git a/cmd/eval-dev-quality/cmd/evaluate.go b/cmd/eval-dev-quality/cmd/evaluate.go index 489e8549..a0314eba 100644 --- a/cmd/eval-dev-quality/cmd/evaluate.go +++ b/cmd/eval-dev-quality/cmd/evaluate.go @@ -135,7 +135,9 @@ func (command *Evaluate) Initialize(args []string) (evaluationContext *evaluate. if err != nil { command.logger.Panicf("ERROR: %s", err) } - configurationFile.Close() + if err := configurationFile.Close(); err != nil { + panic(err) + } command.Models = configuration.Models.Selected command.Repositories = configuration.Repositories.Selected @@ -360,9 +362,7 @@ func (command *Evaluate) Initialize(args []string) (evaluationContext *evaluate. sort.Strings(command.Repositories) } evaluationContext.RepositoryPaths = command.Repositories - for _, r := range command.Repositories { - evaluationConfiguration.Repositories.Selected = append(evaluationConfiguration.Repositories.Selected, r) - } + evaluationConfiguration.Repositories.Selected = append(evaluationConfiguration.Repositories.Selected, command.Repositories...) } // Make the resolved selected languages available in the command. @@ -494,7 +494,11 @@ func (command *Evaluate) Execute(args []string) (err error) { if err != nil { command.logger.Panicf("ERROR: cannot create configuration file: %s", err) } - defer configurationFile.Close() + defer func() { + if err := configurationFile.Close(); err != nil { + panic(err) + } + }() if err := evaluationConfiguration.Write(configurationFile); err != nil { command.logger.Panicf("ERROR: %s", err) } @@ -773,7 +777,9 @@ func (command *Evaluate) evaluateKubernetes(ctx *evaluate.Context) (err error) { parallel.Execute(func() { var tmplData bytes.Buffer - jobTmpl.Execute(&tmplData, data) + if err := jobTmpl.Execute(&tmplData, data); err != nil { + panic(err) + } commandOutput, err := util.CommandWithResult(context.Background(), command.logger, &util.Command{ Command: kubeCommand, @@ -836,7 +842,9 @@ func (command *Evaluate) evaluateKubernetes(ctx *evaluate.Context) (err error) { } var tmplData bytes.Buffer - storageTmpl.Execute(&tmplData, data) + if err := storageTmpl.Execute(&tmplData, data); err != nil { + panic(err) + } // Create the storage access pod. output, err := util.CommandWithResult(context.Background(), command.logger, &util.Command{ diff --git a/cmd/eval-dev-quality/cmd/report.go b/cmd/eval-dev-quality/cmd/report.go index b7bcedeb..d437f11a 100644 --- a/cmd/eval-dev-quality/cmd/report.go +++ b/cmd/eval-dev-quality/cmd/report.go @@ -47,7 +47,11 @@ func (command *Report) Execute(args []string) (err error) { if evaluationCSVFile, err = os.OpenFile(filepath.Join(command.ResultPath, "evaluation.csv"), os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0755); err != nil { command.logger.Panicf("ERROR: %s", err) } - defer evaluationCSVFile.Close() + defer func() { + if err := evaluationCSVFile.Close(); err != nil { + panic(err) + } + }() // Collect all evaluation CSV file paths. allEvaluationPaths := map[string]bool{} @@ -86,7 +90,11 @@ func (command *Report) Execute(args []string) (err error) { if err != nil { command.logger.Panicf("ERROR: %s", err) } - defer modelsMetaInformationCSVFile.Close() + defer func() { + if err := modelsMetaInformationCSVFile.Close(); err != nil { + panic(err) + } + }() // Fetch all openrouter models since it is the only provider that currently supports querying meta information. provider := openrouter.NewProvider().(*openrouter.Provider) @@ -139,7 +147,6 @@ func collectAllEvaluationLogFiles(evaluationCSVFilePaths []string) (evaluationLo if err != nil { continue } - filepath.Base(evaluationDirectory) evaluationLogFilePaths = append(evaluationLogFilePaths, filepath.Join(filepath.Base(evaluationDirectory), "evaluation.log")) } diff --git a/cmd/eval-dev-quality/cmd/report_test.go b/cmd/eval-dev-quality/cmd/report_test.go index 7e4f4d8d..65498037 100644 --- a/cmd/eval-dev-quality/cmd/report_test.go +++ b/cmd/eval-dev-quality/cmd/report_test.go @@ -313,7 +313,7 @@ func TestPathsFromGlobPattern(t *testing.T) { Before: func(workingDirectory string) { file, err := os.Create(filepath.Join(workingDirectory, "not-an-evaluation.csv")) require.NoError(t, err) - file.Close() + require.NoError(t, file.Close()) }, EvaluationGlobPattern: "not-an-evaluation.csv", @@ -393,7 +393,7 @@ func TestCollectAllEvaluationLogFiles(t *testing.T) { file, err := os.Create(filepath.Join(workingDirectory, "someModel", "evaluation.csv")) require.NoError(t, err) - file.Close() + require.NoError(t, file.Close()) }, EvaluationCSVFilePaths: []string{ @@ -442,9 +442,9 @@ func createEvaluationDirectoryWithLogFiles(t *testing.T, workingDirectory string file, err := os.Create(filepath.Join(workingDirectory, "evaluation.csv")) require.NoError(t, err) - file.Close() + require.NoError(t, file.Close()) file, err = os.Create(filepath.Join(workingDirectory, "evaluation.log")) require.NoError(t, err) - file.Close() + require.NoError(t, file.Close()) } diff --git a/evaluate/evaluate.go b/evaluate/evaluate.go index 6eef9873..11cb440f 100644 --- a/evaluate/evaluate.go +++ b/evaluate/evaluate.go @@ -7,7 +7,6 @@ import ( "github.com/symflower/eval-dev-quality/evaluate/report" evaluatetask "github.com/symflower/eval-dev-quality/evaluate/task" - "github.com/symflower/eval-dev-quality/language" evallanguage "github.com/symflower/eval-dev-quality/language" "github.com/symflower/eval-dev-quality/log" evalmodel "github.com/symflower/eval-dev-quality/model" @@ -79,7 +78,11 @@ func Evaluate(ctx *Context) (assessments *report.AssessmentStore, totalScore uin if err != nil { ctx.Log.Panicf("ERROR: unable to create evaluation CSV file: %+v", err) } - defer evaluationCSVFile.Close() + defer func() { + if err := evaluationCSVFile.Close(); err != nil { + panic(err) + } + }() evaluationFile, err := report.NewEvaluationFile(evaluationCSVFile) if err != nil { ctx.Log.Panicf("ERROR: %+v", err) @@ -131,7 +134,7 @@ func Evaluate(ctx *Context) (assessments *report.AssessmentStore, totalScore uin } for _, taskIdentifier := range temporaryRepository.SupportedTasks() { - task, err := evaluatetask.TaskForIdentifier(taskIdentifier) + task, err := evaluatetask.ForIdentifier(taskIdentifier) if err != nil { logger.Fatal(err) } @@ -172,7 +175,9 @@ func Evaluate(ctx *Context) (assessments *report.AssessmentStore, totalScore uin } assessments.AddAssessmentPerTask(model, language, repositoryPath, assessment) // Write the task assessment to the evaluation CSV file. - evaluationFile.WriteEvaluationRecord(model, language, temporaryRepository.Name(), runCount, assessment) + if err := evaluationFile.WriteEvaluationRecord(model, language, temporaryRepository.Name(), runCount, assessment); err != nil { + panic(err) + } } }) } @@ -191,7 +196,7 @@ func Evaluate(ctx *Context) (assessments *report.AssessmentStore, totalScore uin // Create temporary repositories for each language so the repository is copied only once per language. temporaryRepositories := map[string]*evaluatetask.Repository{} for _, l := range ctx.Languages { - relativeRepositoryPaths, err := language.RepositoriesForLanguage(l, ctx.TestdataPath) + relativeRepositoryPaths, err := evallanguage.RepositoriesForLanguage(l, ctx.TestdataPath) if err != nil { ctx.Log.Panicf("ERROR: %s", err) } @@ -256,7 +261,7 @@ func Evaluate(ctx *Context) (assessments *report.AssessmentStore, totalScore uin continue } for _, taskIdentifier := range temporaryRepository.Tasks { - task, err := evaluatetask.TaskForIdentifier(taskIdentifier) + task, err := evaluatetask.ForIdentifier(taskIdentifier) if err != nil { logger.Fatal(err) } @@ -291,7 +296,9 @@ func Evaluate(ctx *Context) (assessments *report.AssessmentStore, totalScore uin } assessments.AddAssessmentPerTask(model, language, repositoryPath, assessment) // Write the task assessment to the evaluation CSV file. - evaluationFile.WriteEvaluationRecord(model, language, temporaryRepository.Name(), runCount, assessment) + if err := evaluationFile.WriteEvaluationRecord(model, language, temporaryRepository.Name(), runCount, assessment); err != nil { + panic(err) + } } }) } diff --git a/evaluate/evaluate_test.go b/evaluate/evaluate_test.go index 16dd99de..b6f66c49 100644 --- a/evaluate/evaluate_test.go +++ b/evaluate/evaluate_test.go @@ -22,7 +22,6 @@ import ( "github.com/symflower/eval-dev-quality/language" "github.com/symflower/eval-dev-quality/language/golang" "github.com/symflower/eval-dev-quality/log" - "github.com/symflower/eval-dev-quality/model" evalmodel "github.com/symflower/eval-dev-quality/model" "github.com/symflower/eval-dev-quality/model/llm" modeltesting "github.com/symflower/eval-dev-quality/model/testing" @@ -863,7 +862,7 @@ func TestEvaluate(t *testing.T) { mockedModelID := "testing-provider/testing-model" mockedModel := modeltesting.NewMockCapabilityWriteTestsNamed(t, mockedModelID) mockedProviderID := "testing-provider" - mockedProvider := providertesting.NewMockProviderNamedWithModels(t, mockedProviderID, []model.Model{mockedModel}) + mockedProvider := providertesting.NewMockProviderNamedWithModels(t, mockedProviderID, []evalmodel.Model{mockedModel}) mockedLoader := providertesting.NewMockLoader(t) embeddedProvider := &struct { provider.Provider @@ -947,7 +946,7 @@ func TestEvaluate(t *testing.T) { mockedModelID := "testing-provider/testing-model" mockedModel := modeltesting.NewMockCapabilityWriteTestsNamed(t, mockedModelID) mockedProviderID := "testing-provider" - mockedProvider := providertesting.NewMockProviderNamedWithModels(t, mockedProviderID, []model.Model{mockedModel}) + mockedProvider := providertesting.NewMockProviderNamedWithModels(t, mockedProviderID, []evalmodel.Model{mockedModel}) mockedLoader := providertesting.NewMockLoader(t) embeddedProvider := &struct { provider.Provider diff --git a/evaluate/metrics/testing/assessments.go b/evaluate/metrics/testing/assessments.go index 1435a125..11f434e7 100644 --- a/evaluate/metrics/testing/assessments.go +++ b/evaluate/metrics/testing/assessments.go @@ -11,40 +11,40 @@ import ( // Clean deletes all empty and nondeterministic keys from the assessment. func Clean(assessment metrics.Assessments) metrics.Assessments { - copy := metrics.Assessments{} - maps.Copy(copy, assessment) + c := metrics.Assessments{} + maps.Copy(c, assessment) - delete(copy, metrics.AssessmentKeyProcessingTime) + delete(c, metrics.AssessmentKeyProcessingTime) for _, key := range metrics.AllAssessmentKeysStrings { - if copy[metrics.AssessmentKey(key)] == 0 { - delete(copy, metrics.AssessmentKey(key)) + if c[metrics.AssessmentKey(key)] == 0 { + delete(c, metrics.AssessmentKey(key)) } } - return copy + return c } // CleanSlice deletes all empty and nondeterministic keys from the assessments. func CleanSlice(assessments []metrics.Assessments) []metrics.Assessments { - copy := make([]metrics.Assessments, len(assessments)) + c := make([]metrics.Assessments, len(assessments)) for i, assessment := range assessments { - copy[i] = Clean(assessment) + c[i] = Clean(assessment) } - return copy + return c } // CleanMap deletes all empty and nondeterministic keys from the assessments. func CleanMap[E comparable](assessments map[E]metrics.Assessments) map[E]metrics.Assessments { - copy := map[E]metrics.Assessments{} + c := map[E]metrics.Assessments{} for key, assessment := range assessments { - copy[key] = Clean(assessment) + c[key] = Clean(assessment) } - return copy + return c } // AssessmentsWithProcessingTime is an empty assessment collection with positive processing time. @@ -61,8 +61,10 @@ type AssessmentTuple struct { Assessment metrics.Assessments } +// AssessmentTuples holds a list of all parameters uniquely defining to which run an assessment belongs to. type AssessmentTuples []*AssessmentTuple +// ToMap converts a list of assessment tuples to a mapping. func (at AssessmentTuples) ToMap() (lookup map[model.Model]map[language.Language]map[string]map[task.Identifier]metrics.Assessments) { lookup = map[model.Model]map[language.Language]map[string]map[task.Identifier]metrics.Assessments{} for _, t := range at { diff --git a/evaluate/report/csv.go b/evaluate/report/csv.go index 89abee4d..f7930e96 100644 --- a/evaluate/report/csv.go +++ b/evaluate/report/csv.go @@ -72,7 +72,7 @@ func (e *EvaluationFile) WriteLines(records [][]string) (err error) { return nil } -// evaluationHeader returns the CSV header for the evaluation CSV. +// EvaluationHeader returns the CSV header for the evaluation CSV. func EvaluationHeader() (header []string) { return append([]string{"model-id", "language", "repository", "task", "run", "score"}, metrics.AllAssessmentKeysStrings...) } @@ -84,12 +84,18 @@ func RecordsFromEvaluationCSVFiles(evaluationCSVFilePaths []string) (records [][ if err != nil { return nil, pkgerrors.WithStack(err) } - defer file.Close() + defer func() { + if err := file.Close(); err != nil { + panic(err) + } + }() csv := csv.NewReader(file) // Ignore the CSV header. - csv.Read() + if _, err := csv.Read(); err != nil { + panic(err) + } evaluationRecords, err := csv.ReadAll() if err != nil { diff --git a/evaluate/task/task-code-repair.go b/evaluate/task/code-repair.go similarity index 88% rename from evaluate/task/task-code-repair.go rename to evaluate/task/code-repair.go index ecbf52e5..df9043fd 100644 --- a/evaluate/task/task-code-repair.go +++ b/evaluate/task/code-repair.go @@ -14,26 +14,26 @@ import ( evaltask "github.com/symflower/eval-dev-quality/task" ) -// TaskCodeRepair holds the code repair task. -type TaskCodeRepair struct { +// CodeRepair holds the code repair task. +type CodeRepair struct { } -// TaskArgumentsCodeRepair holds extra arguments to be used in a query prompt. -type TaskArgumentsCodeRepair struct { +// ArgumentsCodeRepair holds extra arguments to be used in a query prompt. +type ArgumentsCodeRepair struct { // Mistakes holds the list of compilation errors for a package. Mistakes []string } -var _ evaltask.Task = (*TaskCodeRepair)(nil) +var _ evaltask.Task = (*CodeRepair)(nil) // Identifier returns the code repair task identifier. -func (t *TaskCodeRepair) Identifier() evaltask.Identifier { +func (t *CodeRepair) Identifier() evaltask.Identifier { return IdentifierCodeRepair } // Run performs source code repairing in a repository with compilation errors. // This task requires the repository to consist of multiple packages, with each containing one faulty implementation file and a corresponding test file. -func (t *TaskCodeRepair) Run(ctx evaltask.Context) (repositoryAssessment map[evaltask.Identifier]metrics.Assessments, problems []error, err error) { +func (t *CodeRepair) Run(ctx evaltask.Context) (repositoryAssessment map[evaltask.Identifier]metrics.Assessments, problems []error, err error) { modelCapability, ok := ctx.Model.(model.CapabilityRepairCode) if !ok { return nil, nil, pkgerrors.Wrap(evaltask.ErrTaskUnsupportedByModel, fmt.Sprintf("%q does not support %q", ctx.Model.ID(), string(t.Identifier()))) @@ -76,7 +76,7 @@ func (t *TaskCodeRepair) Run(ctx evaltask.Context) (repositoryAssessment map[eva RepositoryPath: packagePath, FilePath: sourceFile, - Arguments: &TaskArgumentsCodeRepair{ + Arguments: &ArgumentsCodeRepair{ Mistakes: mistakes, }, @@ -115,7 +115,7 @@ func (t *TaskCodeRepair) Run(ctx evaltask.Context) (repositoryAssessment map[eva } // unpackCodeRepairPackage validates a package under test and returns the source file path and the list of compilation errors found. -func (t *TaskCodeRepair) unpackCodeRepairPackage(ctx evaltask.Context, fileLogger *log.Logger, packagePath string) (sourceFilePath string, mistakes []string, err error) { +func (t *CodeRepair) unpackCodeRepairPackage(ctx evaltask.Context, fileLogger *log.Logger, packagePath string) (sourceFilePath string, mistakes []string, err error) { mistakes, err = ctx.Language.Mistakes(ctx.Logger, packagePath) if err != nil { return "", nil, pkgerrors.WithStack(err) diff --git a/evaluate/task/task-code-repair_test.go b/evaluate/task/code-repair_test.go similarity index 97% rename from evaluate/task/task-code-repair_test.go rename to evaluate/task/code-repair_test.go index cba2d01c..32defc8c 100644 --- a/evaluate/task/task-code-repair_test.go +++ b/evaluate/task/code-repair_test.go @@ -22,10 +22,10 @@ import ( "github.com/zimmski/osutil/bytesutil" ) -func TestTaskCodeRepairRun(t *testing.T) { +func TestCodeRepairRun(t *testing.T) { validate := func(t *testing.T, tc *tasktesting.TestCaseTask) { t.Run(tc.Name, func(t *testing.T) { - task, err := TaskForIdentifier(IdentifierCodeRepair) + task, err := ForIdentifier(IdentifierCodeRepair) require.NoError(t, err) tc.Task = task @@ -392,7 +392,7 @@ func TestValidateCodeRepairRepository(t *testing.T) { Before: func(repositoryPath string) { someFile, err := os.Create(filepath.Join(repositoryPath, "someFile.go")) require.NoError(t, err) - someFile.Close() + require.NoError(t, someFile.Close()) }, TestdataPath: filepath.Join("..", "..", "testdata"), @@ -428,11 +428,11 @@ func TestValidateCodeRepairRepository(t *testing.T) { fileA, err := os.Create(filepath.Join(somePackage, "fileA.go")) require.NoError(t, err) - fileA.Close() + require.NoError(t, fileA.Close()) fileB, err := os.Create(filepath.Join(somePackage, "fileB.go")) require.NoError(t, err) - fileB.Close() + require.NoError(t, fileB.Close()) }, TestdataPath: filepath.Join("..", "..", "testdata"), @@ -452,7 +452,7 @@ func TestValidateCodeRepairRepository(t *testing.T) { file, err := os.Create(filepath.Join(somePackage, "someFile.go")) require.NoError(t, err) - defer file.Close() + require.NoError(t, file.Close()) }, TestdataPath: filepath.Join("..", "..", "testdata"), @@ -472,15 +472,15 @@ func TestValidateCodeRepairRepository(t *testing.T) { fileA, err := os.Create(filepath.Join(somePackage, "fileA.go")) require.NoError(t, err) - fileA.Close() + require.NoError(t, fileA.Close()) fileATest, err := os.Create(filepath.Join(somePackage, "fileA_test.go")) require.NoError(t, err) - fileATest.Close() + require.NoError(t, fileATest.Close()) fileBTest, err := os.Create(filepath.Join(somePackage, "fileB_test.go")) require.NoError(t, err) - fileBTest.Close() + require.NoError(t, fileBTest.Close()) }, TestdataPath: filepath.Join("..", "..", "testdata"), @@ -529,11 +529,11 @@ func TestValidateCodeRepairRepository(t *testing.T) { fileA, err := os.Create(filepath.Join(somePackage, "FileA.java")) require.NoError(t, err) - fileA.Close() + require.NoError(t, fileA.Close()) fileB, err := os.Create(filepath.Join(somePackage, "FileB.java")) require.NoError(t, err) - fileB.Close() + require.NoError(t, fileB.Close()) }, TestdataPath: filepath.Join("..", "..", "testdata"), @@ -554,7 +554,7 @@ func TestValidateCodeRepairRepository(t *testing.T) { fileA, err := os.Create(filepath.Join(somePackage, "FileA.java")) require.NoError(t, err) - fileA.Close() + require.NoError(t, fileA.Close()) }, TestdataPath: filepath.Join("..", "..", "testdata"), @@ -576,15 +576,15 @@ func TestValidateCodeRepairRepository(t *testing.T) { fileA, err := os.Create(filepath.Join(sourcePackage, "FileA.java")) require.NoError(t, err) - fileA.Close() + require.NoError(t, fileA.Close()) fileATest, err := os.Create(filepath.Join(testPackage, "FileATest.java")) require.NoError(t, err) - fileATest.Close() + require.NoError(t, fileATest.Close()) fileBTest, err := os.Create(filepath.Join(testPackage, "FileBTest.java")) require.NoError(t, err) - fileBTest.Close() + require.NoError(t, fileBTest.Close()) }, TestdataPath: filepath.Join("..", "..", "testdata"), diff --git a/evaluate/task/task.go b/evaluate/task/task.go index 56a5997f..1065d55e 100644 --- a/evaluate/task/task.go +++ b/evaluate/task/task.go @@ -45,15 +45,15 @@ var ( IdentifierTranspileSymflowerFix = registerIdentifier("transpile-symflower-fix") ) -// TaskForIdentifier returns a task based on the task identifier. -func TaskForIdentifier(taskIdentifier evaltask.Identifier) (task evaltask.Task, err error) { +// ForIdentifier returns a task based on the task identifier. +func ForIdentifier(taskIdentifier evaltask.Identifier) (task evaltask.Task, err error) { switch taskIdentifier { case IdentifierWriteTests: - return &TaskWriteTests{}, nil + return &WriteTests{}, nil case IdentifierCodeRepair: - return &TaskCodeRepair{}, nil + return &CodeRepair{}, nil case IdentifierTranspile: - return &TaskTranspile{}, nil + return &Transpile{}, nil default: return nil, pkgerrors.Wrap(evaltask.ErrTaskUnknown, string(taskIdentifier)) } diff --git a/evaluate/task/test-integration/task_test.go b/evaluate/task/test-integration/task_test.go index 24c8efb3..18daf80c 100644 --- a/evaluate/task/test-integration/task_test.go +++ b/evaluate/task/test-integration/task_test.go @@ -17,12 +17,12 @@ import ( toolstesting "github.com/symflower/eval-dev-quality/tools/testing" ) -func TestTaskWriteTestsRun(t *testing.T) { +func TestWriteTestsRun(t *testing.T) { toolstesting.RequiresTool(t, tools.NewSymflower()) validate := func(t *testing.T, tc *tasktesting.TestCaseTask) { t.Run(tc.Name, func(t *testing.T) { - task, err := evaluatetask.TaskForIdentifier(evaluatetask.IdentifierWriteTests) + task, err := evaluatetask.ForIdentifier(evaluatetask.IdentifierWriteTests) require.NoError(t, err) tc.Task = task diff --git a/evaluate/task/testing/task.go b/evaluate/task/testing/task.go index f88e708f..e9b072a4 100644 --- a/evaluate/task/testing/task.go +++ b/evaluate/task/testing/task.go @@ -17,6 +17,7 @@ import ( "github.com/zimmski/osutil" ) +// TestCaseTask holds a test case for a task. type TestCaseTask struct { Name string @@ -35,6 +36,7 @@ type TestCaseTask struct { type createRepositoryFunction func(logger *log.Logger, testDataPath string, repositoryPathRelative string) (repository evaltask.Repository, cleanup func(), err error) +// Validate validates the object. func (tc *TestCaseTask) Validate(t *testing.T, createRepository createRepositoryFunction) { resultPath := t.TempDir() @@ -99,6 +101,7 @@ func (tc *TestCaseTask) Validate(t *testing.T, createRepository createRepository } } +// TestCaseValidateRepository holds a test case for validating a repository. type TestCaseValidateRepository struct { Name string @@ -113,6 +116,7 @@ type TestCaseValidateRepository struct { type validateRepositoryForTask func(logger *log.Logger, repositoryPath string, language language.Language) (err error) +// Validate validates the object. func (tc *TestCaseValidateRepository) Validate(t *testing.T, validateRepositoryForTask validateRepositoryForTask) { t.Run(tc.Name, func(t *testing.T) { logOutput, logger := log.Buffer() diff --git a/evaluate/task/task-transpile.go b/evaluate/task/transpile.go similarity index 86% rename from evaluate/task/task-transpile.go rename to evaluate/task/transpile.go index fc24385e..4175c332 100644 --- a/evaluate/task/task-transpile.go +++ b/evaluate/task/transpile.go @@ -17,26 +17,26 @@ import ( "golang.org/x/exp/maps" ) -// TaskTranspile holds the transpilation task. -type TaskTranspile struct{} +// Transpile holds the transpilation task. +type Transpile struct{} -// TaskArgumentsTranspile holds extra arguments to be used in a query prompt. -type TaskArgumentsTranspile struct { +// ArgumentsTranspile holds extra arguments to be used in a query prompt. +type ArgumentsTranspile struct { // OriginLanguage holds the language we are transpiling from. OriginLanguage language.Language // OriginFilePath holds the path for the file containing the source code we want to transpile. OriginFilePath string } -var _ evaltask.Task = (*TaskTranspile)(nil) +var _ evaltask.Task = (*Transpile)(nil) // Identifier returns the transpilation task identifier. -func (t *TaskTranspile) Identifier() evaltask.Identifier { +func (t *Transpile) Identifier() evaltask.Identifier { return IdentifierTranspile } // Run transpiles code between languages and runs predefined tests to check if the transpilation was successful. -func (t *TaskTranspile) Run(ctx evaltask.Context) (repositoryAssessment map[evaltask.Identifier]metrics.Assessments, problems []error, err error) { +func (t *Transpile) Run(ctx evaltask.Context) (repositoryAssessment map[evaltask.Identifier]metrics.Assessments, problems []error, err error) { modelCapability, ok := ctx.Model.(model.CapabilityTranspile) if !ok { return nil, nil, pkgerrors.Wrap(evaltask.ErrTaskUnsupportedByModel, fmt.Sprintf("%q does not support %q", ctx.Model.ID(), string(t.Identifier()))) @@ -87,7 +87,7 @@ func (t *TaskTranspile) Run(ctx evaltask.Context) (repositoryAssessment map[eval RepositoryPath: filepath.Join(ctx.Repository.DataPath(), packagePath), FilePath: stubFilePath, - Arguments: &TaskArgumentsTranspile{ + Arguments: &ArgumentsTranspile{ OriginLanguage: originLanguage, OriginFilePath: originFilePath, }, @@ -130,18 +130,18 @@ func (t *TaskTranspile) Run(ctx evaltask.Context) (repositoryAssessment map[eval withSymflowerAssessments.Add(withSymflowerAssessmentsForFile) continue - } else { - testsPassing := withSymflowerFixTestResult.TestsPass - taskLogger.Printf("with symflower repair: Executes tests with %d tests passing", testsPassing) + } - // Symflower was able to fix a failure so now update the assessment with the improved results. - withSymflowerFixAssessments := metrics.NewAssessments() - withSymflowerFixAssessments[metrics.AssessmentKeyProcessingTime] = processingTime - withSymflowerFixAssessments.Award(metrics.AssessmentKeyFilesExecuted) - withSymflowerFixAssessments.AwardPoints(metrics.AssessmentKeyTestsPassing, uint64(testsPassing)) + testsPassing := withSymflowerFixTestResult.TestsPass + taskLogger.Printf("with symflower repair: Executes tests with %d tests passing", testsPassing) - withSymflowerAssessmentsForFile = metrics.CombineWithSymflowerFixAssessments(modelAssessmentsForFile, withSymflowerFixAssessments) - } + // Symflower was able to fix a failure so now update the assessment with the improved results. + withSymflowerFixAssessments := metrics.NewAssessments() + withSymflowerFixAssessments[metrics.AssessmentKeyProcessingTime] = processingTime + withSymflowerFixAssessments.Award(metrics.AssessmentKeyFilesExecuted) + withSymflowerFixAssessments.AwardPoints(metrics.AssessmentKeyTestsPassing, uint64(testsPassing)) + + withSymflowerAssessmentsForFile = metrics.CombineWithSymflowerFixAssessments(modelAssessmentsForFile, withSymflowerFixAssessments) } } else { testsPassing := testResult.TestsPass @@ -164,7 +164,7 @@ func (t *TaskTranspile) Run(ctx evaltask.Context) (repositoryAssessment map[eval } // unpackTranspilerPackage returns a set of source file paths and the corresponding language we want to transpile from and also the path to the file that holds the stub. -func (t *TaskTranspile) unpackTranspilerPackage(ctx evaltask.Context, logger *log.Logger, packagePath string) (originFilePathsWithLanguage map[string]language.Language, stubFilePath string, err error) { +func (t *Transpile) unpackTranspilerPackage(ctx evaltask.Context, logger *log.Logger, packagePath string) (originFilePathsWithLanguage map[string]language.Language, stubFilePath string, err error) { originFilePathsWithLanguage = map[string]language.Language{} packagePathAbsolute := filepath.Join(ctx.Repository.DataPath(), packagePath) diff --git a/evaluate/task/task-transpile_test.go b/evaluate/task/transpile_test.go similarity index 98% rename from evaluate/task/task-transpile_test.go rename to evaluate/task/transpile_test.go index 20186ecd..487c9731 100644 --- a/evaluate/task/task-transpile_test.go +++ b/evaluate/task/transpile_test.go @@ -24,10 +24,10 @@ import ( "github.com/zimmski/osutil/bytesutil" ) -func TestTaskTranspileRun(t *testing.T) { +func TestTranspileRun(t *testing.T) { validate := func(t *testing.T, tc *tasktesting.TestCaseTask) { t.Run(tc.Name, func(t *testing.T) { - task, err := TaskForIdentifier(IdentifierTranspile) + task, err := ForIdentifier(IdentifierTranspile) require.NoError(t, err) tc.Task = task @@ -40,8 +40,8 @@ func TestTaskTranspileRun(t *testing.T) { } validateContext := func(t *testing.T, c model.Context) { - arguments, ok := c.Arguments.(*TaskArgumentsTranspile) - require.True(t, ok, fmt.Sprintf("%T != TaskArgumentsTranspile", arguments)) + arguments, ok := c.Arguments.(*ArgumentsTranspile) + require.True(t, ok, fmt.Sprintf("%T != ArgumentsTranspile", arguments)) assert.True(t, strings.HasPrefix(arguments.OriginFilePath, "implementation"+string(os.PathSeparator)), fmt.Sprintf("%q must be a relative path", arguments.OriginFilePath)) } @@ -387,8 +387,8 @@ func TestTaskTranspileRun(t *testing.T) { end `) modelMock.RegisterGenerateSuccess(t, func(t *testing.T, c model.Context) { - arguments, ok := c.Arguments.(*TaskArgumentsTranspile) - require.True(t, ok, fmt.Sprintf("%T != TaskArgumentsTranspile", arguments)) + arguments, ok := c.Arguments.(*ArgumentsTranspile) + require.True(t, ok, fmt.Sprintf("%T != ArgumentsTranspile", arguments)) assert.True(t, strings.HasPrefix(arguments.OriginFilePath, "implementation/"), fmt.Sprintf("%q must be a relative path", arguments.OriginFilePath)) // This assertion checks explicitly that stub files are not overwritten, now that we added a third language with Ruby. @@ -769,7 +769,7 @@ func TestValidateTranspileRepository(t *testing.T) { }) } -func TestTaskTranspileUnpackTranspilerPackage(t *testing.T) { +func TestTranspileUnpackTranspilerPackage(t *testing.T) { type testCase struct { Name string @@ -799,7 +799,7 @@ func TestTaskTranspileUnpackTranspilerPackage(t *testing.T) { require.NoError(t, err) defer cleanup() - taskTranspile := TaskTranspile{} + taskTranspile := Transpile{} ctx := evaltask.Context{ Language: tc.DestinationLanguage, Repository: repository, diff --git a/evaluate/task/task-write-test.go b/evaluate/task/write-test.go similarity index 81% rename from evaluate/task/task-write-test.go rename to evaluate/task/write-test.go index 5330cf44..78666999 100644 --- a/evaluate/task/task-write-test.go +++ b/evaluate/task/write-test.go @@ -14,19 +14,19 @@ import ( evaltask "github.com/symflower/eval-dev-quality/task" ) -// TaskWriteTests holds the write test task. -type TaskWriteTests struct { +// WriteTests holds the write test task. +type WriteTests struct { } -var _ evaltask.Task = (*TaskWriteTests)(nil) +var _ evaltask.Task = (*WriteTests)(nil) // Identifier returns the write test task identifier. -func (t *TaskWriteTests) Identifier() evaltask.Identifier { +func (t *WriteTests) Identifier() evaltask.Identifier { return IdentifierWriteTests } -// TaskWriteTests generates test files for the given implementation file in a repository. -func (t *TaskWriteTests) Run(ctx evaltask.Context) (repositoryAssessment map[evaltask.Identifier]metrics.Assessments, problems []error, err error) { +// Run generates test files for the given implementation file in a repository. +func (t *WriteTests) Run(ctx evaltask.Context) (repositoryAssessment map[evaltask.Identifier]metrics.Assessments, problems []error, err error) { modelCapability, ok := ctx.Model.(model.CapabilityWriteTests) if !ok { return nil, nil, pkgerrors.Wrap(evaltask.ErrTaskUnsupportedByModel, fmt.Sprintf("%q does not support %q", ctx.Model.ID(), string(t.Identifier()))) @@ -105,17 +105,17 @@ func (t *TaskWriteTests) Run(ctx evaltask.Context) (repositoryAssessment map[eva withSymflowerFixAssessment.Add(withSymflowerFixAssessmentForFile) continue - } else { - ctx.Logger.Printf("with symflower repair: Executes tests with %d coverage objects", withSymflowerFixTestResult.Coverage) + } - // Symflower was able to fix a failure so now update the assessment with the improved results. - withSymflowerFixAssessments := metrics.NewAssessments() - withSymflowerFixAssessments[metrics.AssessmentKeyProcessingTime] = processingTime - withSymflowerFixAssessments.Award(metrics.AssessmentKeyFilesExecuted) - withSymflowerFixAssessments.AwardPoints(metrics.AssessmentKeyCoverage, withSymflowerFixTestResult.Coverage) + ctx.Logger.Printf("with symflower repair: Executes tests with %d coverage objects", withSymflowerFixTestResult.Coverage) - withSymflowerFixAssessmentForFile = metrics.CombineWithSymflowerFixAssessments(modelAssessmentForFile, withSymflowerFixAssessments) - } + // Symflower was able to fix a failure so now update the assessment with the improved results. + withSymflowerFixAssessments := metrics.NewAssessments() + withSymflowerFixAssessments[metrics.AssessmentKeyProcessingTime] = processingTime + withSymflowerFixAssessments.Award(metrics.AssessmentKeyFilesExecuted) + withSymflowerFixAssessments.AwardPoints(metrics.AssessmentKeyCoverage, withSymflowerFixTestResult.Coverage) + + withSymflowerFixAssessmentForFile = metrics.CombineWithSymflowerFixAssessments(modelAssessmentForFile, withSymflowerFixAssessments) } } else { taskLogger.Printf("Executes tests with %d coverage objects", testResult.Coverage) diff --git a/evaluate/task/task-write-test_test.go b/evaluate/task/write-test_test.go similarity index 97% rename from evaluate/task/task-write-test_test.go rename to evaluate/task/write-test_test.go index f6a0bcf4..7f06195b 100644 --- a/evaluate/task/task-write-test_test.go +++ b/evaluate/task/write-test_test.go @@ -20,16 +20,15 @@ import ( languagetesting "github.com/symflower/eval-dev-quality/language/testing" "github.com/symflower/eval-dev-quality/log" modeltesting "github.com/symflower/eval-dev-quality/model/testing" - "github.com/symflower/eval-dev-quality/task" evaltask "github.com/symflower/eval-dev-quality/task" "github.com/zimmski/osutil" "github.com/zimmski/osutil/bytesutil" ) -func TestTaskWriteTestsRun(t *testing.T) { +func TestWriteTestsRun(t *testing.T) { validate := func(t *testing.T, tc *tasktesting.TestCaseTask) { t.Run(tc.Name, func(t *testing.T) { - task, err := TaskForIdentifier(IdentifierWriteTests) + task, err := ForIdentifier(IdentifierWriteTests) require.NoError(t, err) tc.Task = task @@ -192,7 +191,7 @@ func TestTaskWriteTestsRun(t *testing.T) { `), expectedAssessments, expectedProblems, false) } { - expectedAssessments := map[task.Identifier]metrics.Assessments{ + expectedAssessments := map[evaltask.Identifier]metrics.Assessments{ IdentifierWriteTests: metrics.Assessments{ metrics.AssessmentKeyFilesExecutedMaximumReachable: 1, metrics.AssessmentKeyResponseNoError: 1, @@ -244,7 +243,7 @@ func TestTaskWriteTestsRun(t *testing.T) { TestDataPath: temporaryDirectoryPath, RepositoryPath: filepath.Join("ruby", "plain"), - ExpectedRepositoryAssessment: map[task.Identifier]metrics.Assessments{ + ExpectedRepositoryAssessment: map[evaltask.Identifier]metrics.Assessments{ IdentifierWriteTests: metrics.Assessments{ metrics.AssessmentKeyFilesExecutedMaximumReachable: 1, metrics.AssessmentKeyFilesExecuted: 1, @@ -288,7 +287,7 @@ func TestValidateWriteTestsRepository(t *testing.T) { Before: func(repositoryPath string) { fileATest, err := os.Create(filepath.Join(repositoryPath, "fileA_test.go")) require.NoError(t, err) - fileATest.Close() + require.NoError(t, fileATest.Close()) }, TestdataPath: filepath.Join("..", "..", "testdata"), @@ -327,7 +326,7 @@ func TestValidateWriteTestsRepository(t *testing.T) { fileATest, err := os.Create(filepath.Join(somePackage, "FileATest.java")) require.NoError(t, err) - fileATest.Close() + require.NoError(t, fileATest.Close()) }, TestdataPath: filepath.Join("..", "..", "testdata"), diff --git a/language/golang/language_test.go b/language/golang/language_test.go index 79a607c6..fd6ee7cf 100644 --- a/language/golang/language_test.go +++ b/language/golang/language_test.go @@ -149,14 +149,6 @@ func TestLanguageExecute(t *testing.T) { } func TestMistakes(t *testing.T) { - type testCase struct { - Name string - - RepositoryPath string - - ExpectedMistakes []string - } - validate := func(t *testing.T, tc *languagetesting.TestCaseMistakes) { tc.Validate(t) } diff --git a/language/java/language.go b/language/java/language.go index 8f27c782..21fffb2a 100644 --- a/language/java/language.go +++ b/language/java/language.go @@ -80,8 +80,6 @@ func (l *Language) DefaultTestFileSuffix() string { return "Test.java" } -var languageJavaCoverageMatch = regexp.MustCompile(`Total coverage (.+?)%`) - // ExecuteTests invokes the language specific testing on the given repository. func (l *Language) ExecuteTests(logger *log.Logger, repositoryPath string) (testResult *language.TestResult, problems []error, err error) { ctx, cancel := context.WithTimeout(context.Background(), language.DefaultExecutionTimeout) diff --git a/language/java/language_test.go b/language/java/language_test.go index ed340ef7..f8fb4d49 100644 --- a/language/java/language_test.go +++ b/language/java/language_test.go @@ -231,14 +231,6 @@ func TestLanguageExecute(t *testing.T) { } func TestMistakes(t *testing.T) { - type testCase struct { - Name string - - RepositoryPath string - - ExpectedMistakes []string - } - validate := func(t *testing.T, tc *languagetesting.TestCaseMistakes) { tc.Validate(t) } diff --git a/language/language.go b/language/language.go index a3b3f8f3..f5df4a68 100644 --- a/language/language.go +++ b/language/language.go @@ -75,7 +75,7 @@ func RepositoriesForLanguage(language Language, testdataPath string) (relativeRe languagePath := filepath.Join(testdataPath, language.ID()) languageRepositories, err := os.ReadDir(languagePath) if err != nil { - pkgerrors.WithMessagef(err, "language path %q cannot be accessed", languagePath) + return nil, pkgerrors.WithMessagef(err, "language path %q cannot be accessed", languagePath) } for _, repository := range languageRepositories { diff --git a/language/testing/language.go b/language/testing/language.go index 2655c51c..07196ed1 100644 --- a/language/testing/language.go +++ b/language/testing/language.go @@ -11,6 +11,7 @@ import ( "github.com/zimmski/osutil" ) +// TestCaseExecuteTests holds a test case for "test execution" task. type TestCaseExecuteTests struct { Name string @@ -25,6 +26,7 @@ type TestCaseExecuteTests struct { ExpectedErrorText string } +// Validate validates the object. func (tc *TestCaseExecuteTests) Validate(t *testing.T) { t.Run(tc.Name, func(t *testing.T) { logOutput, logger := log.Buffer() @@ -60,6 +62,7 @@ func (tc *TestCaseExecuteTests) Validate(t *testing.T) { }) } +// TestCaseMistakes holds a test case for "mistake fixing" task. type TestCaseMistakes struct { Name string @@ -70,6 +73,7 @@ type TestCaseMistakes struct { ExpectedMistakesContains func(t *testing.T, mistakes []string) } +// Validate validates the object. func (tc *TestCaseMistakes) Validate(t *testing.T) { t.Run(tc.Name, func(t *testing.T) { temporaryPath := t.TempDir() diff --git a/log/logger.go b/log/logger.go index cfc9db9b..85ca865a 100644 --- a/log/logger.go +++ b/log/logger.go @@ -21,13 +21,20 @@ import ( type AttributeKey string const ( - AttributeKeyArtifact AttributeKey = "Artifact" - AttributeKeyLanguage = "Language" - AttributeKeyModel = "Model" - AttributeKeyRepository = "Repository" - AttributeKeyResultPath = "ResultPath" - AttributeKeyRun = "Run" - AttributeKeyTask = "Task" + // AttributeKeyArtifact holds the key for the "Artifact" attribute. + AttributeKeyArtifact = AttributeKey("Artifact") + // AttributeKeyLanguage holds the key for the "Language" attribute. + AttributeKeyLanguage = AttributeKey("Language") + // AttributeKeyModel holds the key for the "Model" attribute. + AttributeKeyModel = AttributeKey("Model") + // AttributeKeyRepository holds the key for the "Repository" attribute. + AttributeKeyRepository = AttributeKey("Repository") + // AttributeKeyResultPath holds the key for the "ResultPath" attribute. + AttributeKeyResultPath = AttributeKey("ResultPath") + // AttributeKeyRun holds the key for the "Run" attribute. + AttributeKeyRun = AttributeKey("Run") + // AttributeKeyTask holds the key for the "Task" attribute. + AttributeKeyTask = AttributeKey("Task") ) // Attribute returns a logging attribute. @@ -39,9 +46,13 @@ func Attribute(key AttributeKey, value any) (attribute slog.Attr) { type Flags int const ( + // FlagMessageOnly defines to log only the message. FlagMessageOnly = 0 - FlagDate = 1 << iota + // FlagDate defines to log the date. + FlagDate = 1 << iota + // FlagTime defines to log the time. FlagTime + // FlagStandard defines to log with the standard format. FlagStandard = FlagDate | FlagTime ) @@ -64,7 +75,9 @@ func CloseOpenLogFiles() { defer openLogFilesMutex.Unlock() for _, logFile := range openLogFiles { - logFile.Close() + if err := logFile.Close(); err != nil { + panic(err) + } } openLogFiles = nil @@ -274,15 +287,15 @@ func (h *spawningHandler) Handle(ctx context.Context, record slog.Record) (err e } if h.flags&FlagDate != 0 { - fmt.Fprint(writer, record.Time.Format("2006/01/02")) - fmt.Fprint(writer, " ") + _, _ = fmt.Fprint(writer, record.Time.Format("2006/01/02")) + _, _ = fmt.Fprint(writer, " ") } if h.flags&FlagTime != 0 { - fmt.Fprint(writer, record.Time.Format("15:04:05")) - fmt.Fprint(writer, " ") + _, _ = fmt.Fprint(writer, record.Time.Format("15:04:05")) + _, _ = fmt.Fprint(writer, " ") } - fmt.Fprintln(writer, record.Message) + _, _ = fmt.Fprintln(writer, record.Message) return nil } @@ -306,13 +319,15 @@ func (h *spawningHandler) WithAttrs(attributes []slog.Attr) slog.Handler { logFilePath := spawner.FilePath(h.attributes) writer, err := newLogWriter(h.writer, logFilePath) if err != nil { - fmt.Fprintf(h.writer, "ERROR: cannot create new handler: %s\n", err.Error()) + _, _ = fmt.Fprintf(h.writer, "ERROR: cannot create new handler: %s\n", err.Error()) continue } logMessage := fmt.Sprintf("Spawning new log file at %s", logFilePath) - h.Handle(context.Background(), slog.NewRecord(time.Now(), slog.LevelInfo, logMessage, 0)) + if err := h.Handle(context.Background(), slog.NewRecord(time.Now(), slog.LevelInfo, logMessage, 0)); err != nil { + panic(err) + } newHandler.writer = writer newHandler.logFileSpawners = slices.Delete(newHandler.logFileSpawners, i, i+1) // The currently triggered log file spawner must not be part of the new handler as it would trigger again and again. diff --git a/model/llm/llm.go b/model/llm/llm.go index 4bd52fee..e522eb23 100644 --- a/model/llm/llm.go +++ b/model/llm/llm.go @@ -265,13 +265,11 @@ var _ model.CapabilityRepairCode = (*Model)(nil) // RepairCode queries the model to repair a source code with compilation error. func (m *Model) RepairCode(ctx model.Context) (assessment metrics.Assessments, err error) { - codeRepairArguments, ok := ctx.Arguments.(*evaluatetask.TaskArgumentsCodeRepair) + codeRepairArguments, ok := ctx.Arguments.(*evaluatetask.ArgumentsCodeRepair) if !ok { return nil, pkgerrors.Errorf("unexpected type %#v", ctx.Arguments) } - assessment = map[metrics.AssessmentKey]uint64{} - data, err := os.ReadFile(filepath.Join(ctx.RepositoryPath, ctx.FilePath)) if err != nil { return nil, pkgerrors.WithStack(err) @@ -320,7 +318,7 @@ var _ model.CapabilityTranspile = (*Model)(nil) // Transpile queries the model to transpile source code to another language. func (m *Model) Transpile(ctx model.Context) (assessment metrics.Assessments, err error) { - transpileArguments, ok := ctx.Arguments.(*evaluatetask.TaskArgumentsTranspile) + transpileArguments, ok := ctx.Arguments.(*evaluatetask.ArgumentsTranspile) if !ok { return nil, pkgerrors.Errorf("unexpected type %#v", ctx.Arguments) } diff --git a/model/llm/llm_test.go b/model/llm/llm_test.go index 6963282b..f0cd951b 100644 --- a/model/llm/llm_test.go +++ b/model/llm/llm_test.go @@ -165,7 +165,7 @@ func TestModelRepairSourceCodeFile(t *testing.T) { RepositoryPath: repositoryPath, FilePath: tc.SourceFilePath, - Arguments: &evaluatetask.TaskArgumentsCodeRepair{ + Arguments: &evaluatetask.ArgumentsCodeRepair{ Mistakes: tc.Mistakes, }, @@ -510,7 +510,7 @@ func TestModelTranspile(t *testing.T) { RepositoryPath: repositoryPath, FilePath: tc.StubFilePath, - Arguments: &evaluatetask.TaskArgumentsTranspile{ + Arguments: &evaluatetask.ArgumentsTranspile{ OriginLanguage: tc.OriginLanguage, OriginFilePath: tc.OriginFilePath, }, diff --git a/model/symflower/symflower.go b/model/symflower/symflower.go index ca1f2a74..07cd0e29 100644 --- a/model/symflower/symflower.go +++ b/model/symflower/symflower.go @@ -53,7 +53,7 @@ func (m *Model) MetaInformation() (metaInformation *model.MetaInformation) { var _ model.CapabilityWriteTests = (*Model)(nil) -// generateTestsForFile generates test files for the given implementation file in a repository. +// WriteTests generates test files for the given implementation file in a repository. func (m *Model) WriteTests(ctx model.Context) (assessment metrics.Assessments, err error) { ctxWithTimeout, cancel := context.WithTimeout(context.Background(), m.symbolicExecutionTimeout) defer cancel() diff --git a/model/symflower/symflower_test.go b/model/symflower/symflower_test.go index 95980998..75f7dd90 100644 --- a/model/symflower/symflower_test.go +++ b/model/symflower/symflower_test.go @@ -170,31 +170,31 @@ func TestExtractGeneratedFilePaths(t *testing.T) { validate(t, &testCase{ Name: "Full output", - Output: `2024/04/25 20:13:49 Evaluating model "symflower/symbolic-execution" using language "java" and repository "java/plain" -2024/04/25 20:13:49 $ symflower unit-tests --code-disable-fetch-dependencies --workspace /tmp/eval-dev-quality1527239031/plain src/main/java/com/eval/Plain.java -Analyzing workspace /tmp/eval-dev-quality1527239031/plain/ -Search for Java files -Load dependency stdlib:@dev -Found 1 Java files -Found 0 problems in Java files -src/main/java/com/eval/Plain.java: found 1 symbols -src/main/java/com/eval/Plain.java: com.eval.Plain.plain: computing test cases -src/main/java/com/eval/Plain.java: com.eval.Plain.plain: computed 1 unit tests -src/main/java/com/eval/Plain.java: com.eval.Plain.plain: found 0 problems -Symflower's table driven test style is not supported, switching to basic style -src/main/java/com/eval/Plain.java: generated unit test file src/test/java/com/eval/PlainSymflowerTest.java -src/test/java/com/eval/Foo.java: generated unit test file src/test/java/com/eval/Foo.java -src/test/java/com/eval/Bar.java: generated unit test file src/test/java/com/eval/Bar.java -src/test/java/com/eval/FooBar.java: generated unit test file src/test/java/com/eval/FooBar.java -Analyzed 1 out of 1 source files -Had 0 errors that block a full analysis -Generated 1 test -Found 0 potential problems -Give us your feedback and let us know how we can improve Symflower at hello@symflower.com or https://github.com/symflower/symflower. Thanks so much for youhelp! -2024/04/25 20:13:52 $ symflower test --language java --workspace /tmp/eval-dev-quality1527239031/plain -Total coverage 100.000000% -Give us your feedback and let us know how we can improve Symflower at hello@symflower.com or https://github.com/symflower/symflower. Thanks so much for youhelp! -2024/04/25 20:13:58 Evaluated model "symflower/symbolic-execution" using language "java" and repository "java/plain": encountered 0 problems: []`, + Output: ` + 2024/04/25 20:13:49 Evaluating model "symflower/symbolic-execution" using language "java" and repository "java/plain" + 2024/04/25 20:13:49 $ symflower unit-tests --code-disable-fetch-dependencies --workspace /tmp/eval-dev-quality1527239031/plain src/main/java/com/eval/Plain.java + Analyzing workspace /tmp/eval-dev-quality1527239031/plain/ + Search for Java files + Load dependency stdlib:@dev + Found 1 Java files + Found 0 problems in Java files + src/main/java/com/eval/Plain.java: found 1 symbols + src/main/java/com/eval/Plain.java: com.eval.Plain.plain: computing test cases + src/main/java/com/eval/Plain.java: com.eval.Plain.plain: computed 1 unit tests + src/main/java/com/eval/Plain.java: com.eval.Plain.plain: found 0 problems + Symflower's table driven test style is not supported, switching to basic style + src/main/java/com/eval/Plain.java: generated unit test file src/test/java/com/eval/PlainSymflowerTest.java + src/test/java/com/eval/Foo.java: generated unit test file src/test/java/com/eval/Foo.java + src/test/java/com/eval/Bar.java: generated unit test file src/test/java/com/eval/Bar.java + src/test/java/com/eval/FooBar.java: generated unit test file src/test/java/com/eval/FooBar.java + Analyzed 1 out of 1 source files + Had 0 errors that block a full analysis + Generated 1 test + Found 0 potential problems + 2024/04/25 20:13:52 $ symflower test --language java --workspace /tmp/eval-dev-quality1527239031/plain + Total coverage 100.000000% + 2024/04/25 20:13:58 Evaluated model "symflower/symbolic-execution" using language "java" and repository "java/plain": encountered 0 problems: [] + `, ExpectedFilePaths: []string{ "src/test/java/com/eval/PlainSymflowerTest.java", diff --git a/provider/openai-api/query.go b/provider/openai-api/query.go index 8411fea0..5d2e1ee6 100644 --- a/provider/openai-api/query.go +++ b/provider/openai-api/query.go @@ -8,7 +8,7 @@ import ( "github.com/sashabaranov/go-openai" ) -// QueryOpenAIModel queries an OpenAI API model. +// QueryOpenAIAPIModel queries an OpenAI API model. func QueryOpenAIAPIModel(ctx context.Context, client *openai.Client, modelIdentifier string, promptText string) (response string, err error) { apiResponse, err := client.CreateChatCompletion( ctx, diff --git a/provider/openrouter/openrouter.go b/provider/openrouter/openrouter.go index 7c4e4e2f..8f6277e8 100644 --- a/provider/openrouter/openrouter.go +++ b/provider/openrouter/openrouter.go @@ -96,7 +96,11 @@ func (p *Provider) fetchModels() (models ModelsList, err error) { if err != nil { return pkgerrors.WithStack(err) } - defer response.Body.Close() + defer func() { + if err := response.Body.Close(); err != nil { + panic(err) + } + }() if response.StatusCode != http.StatusOK { return pkgerrors.Errorf("received status code %d when querying provider models", response.StatusCode) diff --git a/scripts/ollama-models/main.go b/scripts/ollama-models/main.go index 967a60c9..13876913 100644 --- a/scripts/ollama-models/main.go +++ b/scripts/ollama-models/main.go @@ -13,6 +13,7 @@ import ( "strings" ) +// JSONModels holds the collection of the models. type JSONModels struct { Models []struct { Name string `json:"name"` @@ -21,16 +22,19 @@ type JSONModels struct { } `json:"models"` } +// FullOllamaModels holds a full Ollama model collection. type FullOllamaModels struct { Models []Model `json:"models"` } +// Model holds a model. type Model struct { Name string `json:"name"` Description string `json:"description"` Tags []Tag `json:"tags"` } +// Tag holds a tag. type Tag struct { Name string `json:"name"` Size float64 `json:"size"` @@ -55,7 +59,7 @@ func main() { tagsBody := OnPage("https://ollama.com/library/" + model.Name + "/tags") - split := strings.Split(stripHtmlRegex(tagsBody), " ") + split := strings.Split(stripHTMLRegex(tagsBody), " ") for _, tag := range model.Tags { textSize := "" @@ -100,32 +104,45 @@ func main() { if err != nil { log.Fatalf("failed creating file: %s", err) } - defer file.Close() + defer func() { + if err := file.Close(); err != nil { + panic(err) + } + }() csvwriter := csv.NewWriter(file) - csvwriter.Write([]string{"model", "size (GB)"}) + if err := csvwriter.Write([]string{"model", "size (GB)"}); err != nil { + panic(err) + } for _, m := range fullModels.Models { for _, t := range m.Tags { - csvwriter.Write([]string{m.Name + "/" + t.Name, fmt.Sprintf("%.2f", t.Size)}) + if err := csvwriter.Write([]string{m.Name + "/" + t.Name, fmt.Sprintf("%.2f", t.Size)}); err != nil { + panic(err) + } } } csvwriter.Flush() } +// OnPage returns the body of an URL. func OnPage(link string) string { res, err := http.Get(link) if err != nil { log.Fatal(err) } content, err := io.ReadAll(res.Body) - res.Body.Close() + defer func() { + if err := res.Body.Close(); err != nil { + panic(err) + } + }() if err != nil { log.Fatal(err) } return string(content) } -func stripHtmlRegex(input string) string { +func stripHTMLRegex(input string) string { input = regexp.MustCompile(`<.*?>`).ReplaceAllString(input, "") input = strings.ReplaceAll(input, ">", "") input = strings.ReplaceAll(input, "<", "") diff --git a/scripts/openrouter-models/main.go b/scripts/openrouter-models/main.go index ced994e9..3fa07c9b 100644 --- a/scripts/openrouter-models/main.go +++ b/scripts/openrouter-models/main.go @@ -94,12 +94,20 @@ func main() { if err != nil { panic(err) } - defer csvFile.Close() + defer func() { + if err := csvFile.Close(); err != nil { + panic(err) + } + }() csvWriter := csv.NewWriter(csvFile) defer csvWriter.Flush() - csvWriter.Write([]string{"model"}) + if err := csvWriter.Write([]string{"model"}); err != nil { + panic(err) + } for _, model := range modelNames { - csvWriter.Write([]string{model}) + if err := csvWriter.Write([]string{model}); err != nil { + panic(err) + } } } diff --git a/scripts/reliability/main.go b/scripts/reliability/main.go index 02b0d882..e42289d4 100644 --- a/scripts/reliability/main.go +++ b/scripts/reliability/main.go @@ -29,7 +29,9 @@ func main() { if err != nil { panic(err) } - os.Stderr.WriteString(fmt.Sprintf("Loaded CSV file with %d records\n", len(records))) + if _, err := os.Stderr.WriteString(fmt.Sprintf("Loaded CSV file with %d records\n", len(records))); err != nil { + panic(err) + } // Collect all results and also the maximum scores. scoresPerModelPerRun := map[string][]float64{} diff --git a/util/exec.go b/util/exec.go index 9b8ffee8..62811016 100644 --- a/util/exec.go +++ b/util/exec.go @@ -172,6 +172,6 @@ func (p *Parallel) Execute(f func()) { } // Wait waits until all executions are done. -func (l *Parallel) Wait() { - l.wg.Wait() +func (p *Parallel) Wait() { + p.wg.Wait() }