Skip to content

Commit

Permalink
Task for code transpilation, so models can transpile Go code to Java …
Browse files Browse the repository at this point in the history
…and back

Closes #201
  • Loading branch information
ruiAzevedo19 committed Jul 22, 2024
1 parent 640337f commit 5a439a6
Show file tree
Hide file tree
Showing 17 changed files with 1,025 additions and 34 deletions.
1 change: 1 addition & 0 deletions .mockery.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ packages:
Model:
CapabilityWriteTests:
CapabilityRepairCode:
CapabilityTranspile:
github.com/symflower/eval-dev-quality/provider:
interfaces:
Loader:
Expand Down
24 changes: 12 additions & 12 deletions cmd/eval-dev-quality/cmd/report_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,24 +16,24 @@ import (
)

var claudeEvaluationCSVFileContent = bytesutil.StringTrimIndentations(`
openrouter/anthropic/claude-2.0,golang,golang/light,write-tests,1,1,1,1,1,1,1,1,1,1
openrouter/anthropic/claude-2.0,golang,golang/plain,write-tests,2,2,2,2,2,2,2,2,2,2
openrouter/anthropic/claude-2.0,java,java/light,write-tests,3,3,3,3,3,3,3,3,3,3
openrouter/anthropic/claude-2.0,java,java/plain,write-tests,4,4,4,4,4,4,4,4,4,4
openrouter/anthropic/claude-2.0,golang,golang/light,write-tests,1,1,1,1,1,1,1,1,1,1,1
openrouter/anthropic/claude-2.0,golang,golang/plain,write-tests,2,2,2,2,2,2,2,2,2,2,2
openrouter/anthropic/claude-2.0,java,java/light,write-tests,3,3,3,3,3,3,3,3,3,3,3
openrouter/anthropic/claude-2.0,java,java/plain,write-tests,4,4,4,4,4,4,4,4,4,4,4
`)

var gemmaEvaluationCSVFileContent = bytesutil.StringTrimIndentations(`
openrouter/google/gemma-7b-it,golang,golang/light,write-tests,5,5,5,5,5,5,5,5,5,5
openrouter/google/gemma-7b-it,golang,golang/plain,write-tests,6,6,6,6,6,6,6,6,6,6
openrouter/google/gemma-7b-it,java,java/light,write-tests,7,7,7,7,7,7,7,7,7,7
openrouter/google/gemma-7b-it,java,java/plain,write-tests,8,8,8,8,8,8,8,8,8,8
openrouter/google/gemma-7b-it,golang,golang/light,write-tests,5,5,5,5,5,5,5,5,5,5,5
openrouter/google/gemma-7b-it,golang,golang/plain,write-tests,6,6,6,6,6,6,6,6,6,6,6
openrouter/google/gemma-7b-it,java,java/light,write-tests,7,7,7,7,7,7,7,7,7,7,7
openrouter/google/gemma-7b-it,java,java/plain,write-tests,8,8,8,8,8,8,8,8,8,8,8
`)

var gpt4EvaluationCSVFileContent = bytesutil.StringTrimIndentations(`
openrouter/openai/gpt-4,golang,golang/light,write-tests,9,9,9,9,9,9,9,9,9,9
openrouter/openai/gpt-4,golang,golang/plain,write-tests,10,10,10,10,10,10,10,10,10,10
openrouter/openai/gpt-4,java,java/light,write-tests,11,11,11,11,11,11,11,11,11,11
openrouter/openai/gpt-4,java,java/plain,write-tests,12,12,12,12,12,12,12,12,12,12
openrouter/openai/gpt-4,golang,golang/light,write-tests,9,9,9,9,9,9,9,9,9,9,9
openrouter/openai/gpt-4,golang,golang/plain,write-tests,10,10,10,10,10,10,10,10,10,10,10
openrouter/openai/gpt-4,java,java/light,write-tests,11,11,11,11,11,11,11,11,11,11,11
openrouter/openai/gpt-4,java,java/plain,write-tests,12,12,12,12,12,12,12,12,12,12,12
`)

func TestReportExecute(t *testing.T) {
Expand Down
4 changes: 4 additions & 0 deletions evaluate/metrics/assessment.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,9 @@ var (
// AssessmentKeyCoverage counts execution coverage objects.
AssessmentKeyCoverage = RegisterAssessmentKey("coverage", 10)

// AssessmentKeyTestsPassing holds the percentage of passing tests.
AssessmentKeyTestsPassing = RegisterAssessmentKey("tests-passing", 10)

// AssessmentKeyResponseCharacterCount counts the number of characters of a response.
AssessmentKeyResponseCharacterCount = RegisterAssessmentKey("response-character-count", 0)
// AssessmentKeyGenerateTestsForFileCharacterCount counts the number of characters of a generated test file.
Expand Down Expand Up @@ -167,6 +170,7 @@ func CombineWithSymflowerFixAssessments(model Assessments, fixed Assessments) (c
combined[AssessmentKeyResponseNoError] = model[AssessmentKeyResponseNoError]
combined[AssessmentKeyResponseNoExcess] = model[AssessmentKeyResponseNoExcess]
combined[AssessmentKeyResponseWithCode] = model[AssessmentKeyResponseWithCode]
combined[AssessmentKeyTestsPassing] = fixed[AssessmentKeyTestsPassing]

return combined
}
7 changes: 5 additions & 2 deletions evaluate/metrics/assessment_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ func TestAssessmentString(t *testing.T) {

Assessment: NewAssessments(),

ExpectedString: "score=0, coverage=0, files-executed=0, files-executed-maximum-reachable=0, generate-tests-for-file-character-count=0, processing-time=0, response-character-count=0, response-no-error=0, response-no-excess=0, response-with-code=0",
ExpectedString: "score=0, coverage=0, files-executed=0, files-executed-maximum-reachable=0, generate-tests-for-file-character-count=0, processing-time=0, response-character-count=0, response-no-error=0, response-no-excess=0, response-with-code=0, tests-passing=0",
})

validate(t, &testCase{
Expand All @@ -153,9 +153,10 @@ func TestAssessmentString(t *testing.T) {
AssessmentKeyResponseNoExcess: 4,
AssessmentKeyResponseWithCode: 5,
AssessmentKeyProcessingTime: 200,
AssessmentKeyTestsPassing: 70,
},

ExpectedString: "score=15, coverage=1, files-executed=2, files-executed-maximum-reachable=2, generate-tests-for-file-character-count=50, processing-time=200, response-character-count=100, response-no-error=3, response-no-excess=4, response-with-code=5",
ExpectedString: "score=85, coverage=1, files-executed=2, files-executed-maximum-reachable=2, generate-tests-for-file-character-count=50, processing-time=200, response-character-count=100, response-no-error=3, response-no-excess=4, response-with-code=5, tests-passing=70",
})
}

Expand Down Expand Up @@ -310,6 +311,7 @@ func TestCombineModelAndSymflowerFixAssessments(t *testing.T) {
AssessmentKeyProcessingTime: uint64(100),
AssessmentKeyCoverage: 10,
AssessmentKeyResponseNoError: 1,
AssessmentKeyTestsPassing: 100,
},

ExpectedAssessments: Assessments{
Expand All @@ -321,6 +323,7 @@ func TestCombineModelAndSymflowerFixAssessments(t *testing.T) {
AssessmentKeyResponseNoError: 0,
AssessmentKeyResponseWithCode: 1,
AssessmentKeyResponseNoExcess: 1,
AssessmentKeyTestsPassing: 100,
},
})
}
38 changes: 19 additions & 19 deletions evaluate/report/csv_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ func TestNewEvaluationFile(t *testing.T) {
require.NoError(t, err)

expectedEvaluationFileContent := bytesutil.StringTrimIndentations(`
model-id,language,repository,task,score,coverage,files-executed,files-executed-maximum-reachable,generate-tests-for-file-character-count,processing-time,response-character-count,response-no-error,response-no-excess,response-with-code
model-id,language,repository,task,score,coverage,files-executed,files-executed-maximum-reachable,generate-tests-for-file-character-count,processing-time,response-character-count,response-no-error,response-no-excess,response-with-code,tests-passing
`)

assert.Equal(t, expectedEvaluationFileContent, string(actualEvaluationFileContent))
Expand Down Expand Up @@ -66,8 +66,8 @@ func TestWriteEvaluationRecord(t *testing.T) {
},

ExpectedCSV: `
model-id,language,repository,task,score,coverage,files-executed,files-executed-maximum-reachable,generate-tests-for-file-character-count,processing-time,response-character-count,response-no-error,response-no-excess,response-with-code
mocked-model,golang,golang/plain,write-tests,0,0,0,0,0,0,0,0,0,0
model-id,language,repository,task,score,coverage,files-executed,files-executed-maximum-reachable,generate-tests-for-file-character-count,processing-time,response-character-count,response-no-error,response-no-excess,response-with-code,tests-passing
mocked-model,golang,golang/plain,write-tests,0,0,0,0,0,0,0,0,0,0,0
`,
})
validate(t, &testCase{
Expand All @@ -89,9 +89,9 @@ func TestWriteEvaluationRecord(t *testing.T) {
},

ExpectedCSV: `
model-id,language,repository,task,score,coverage,files-executed,files-executed-maximum-reachable,generate-tests-for-file-character-count,processing-time,response-character-count,response-no-error,response-no-excess,response-with-code
mocked-model,golang,golang/plain,write-tests,2,0,1,1,0,0,0,1,0,0
mocked-model,golang,golang/plain,write-tests-symflower-fix,12,10,1,1,0,0,0,1,0,0
model-id,language,repository,task,score,coverage,files-executed,files-executed-maximum-reachable,generate-tests-for-file-character-count,processing-time,response-character-count,response-no-error,response-no-excess,response-with-code,tests-passing
mocked-model,golang,golang/plain,write-tests,2,0,1,1,0,0,0,1,0,0,0
mocked-model,golang,golang/plain,write-tests-symflower-fix,12,10,1,1,0,0,0,1,0,0,0
`,
})
}
Expand Down Expand Up @@ -224,37 +224,37 @@ func TestEvaluationFileWriteLines(t *testing.T) {
Name: "No records",

ExpectedEvaluationFile: `
model-id,language,repository,task,score,coverage,files-executed,files-executed-maximum-reachable,generate-tests-for-file-character-count,processing-time,response-character-count,response-no-error,response-no-excess,response-with-code
model-id,language,repository,task,score,coverage,files-executed,files-executed-maximum-reachable,generate-tests-for-file-character-count,processing-time,response-character-count,response-no-error,response-no-excess,response-with-code,tests-passing
`,
})
validate(t, &testCase{
Name: "Single record",

RawRecords: [][]string{
[]string{"modelA", "golang", "golang/light", "write-tests", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1"},
[]string{"modelA", "golang", "golang/light", "write-tests", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1"},
},

ExpectedEvaluationFile: `
model-id,language,repository,task,score,coverage,files-executed,files-executed-maximum-reachable,generate-tests-for-file-character-count,processing-time,response-character-count,response-no-error,response-no-excess,response-with-code
modelA,golang,golang/light,write-tests,1,1,1,1,1,1,1,1,1,1
model-id,language,repository,task,score,coverage,files-executed,files-executed-maximum-reachable,generate-tests-for-file-character-count,processing-time,response-character-count,response-no-error,response-no-excess,response-with-code,tests-passing
modelA,golang,golang/light,write-tests,1,1,1,1,1,1,1,1,1,1,1
`,
})
validate(t, &testCase{
Name: "Multiple records",

RawRecords: [][]string{
[]string{"modelA", "golang", "golang/light", "write-tests", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1"},
[]string{"modelA", "golang", "golang/plain", "write-tests", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2"},
[]string{"modelA", "java", "java/light", "write-tests", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3"},
[]string{"modelA", "java", "java/plain", "write-tests", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4"},
[]string{"modelA", "golang", "golang/light", "write-tests", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1"},
[]string{"modelA", "golang", "golang/plain", "write-tests", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2"},
[]string{"modelA", "java", "java/light", "write-tests", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3"},
[]string{"modelA", "java", "java/plain", "write-tests", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4", "4"},
},

ExpectedEvaluationFile: `
model-id,language,repository,task,score,coverage,files-executed,files-executed-maximum-reachable,generate-tests-for-file-character-count,processing-time,response-character-count,response-no-error,response-no-excess,response-with-code
modelA,golang,golang/light,write-tests,1,1,1,1,1,1,1,1,1,1
modelA,golang,golang/plain,write-tests,2,2,2,2,2,2,2,2,2,2
modelA,java,java/light,write-tests,3,3,3,3,3,3,3,3,3,3
modelA,java,java/plain,write-tests,4,4,4,4,4,4,4,4,4,4
model-id,language,repository,task,score,coverage,files-executed,files-executed-maximum-reachable,generate-tests-for-file-character-count,processing-time,response-character-count,response-no-error,response-no-excess,response-with-code,tests-passing
modelA,golang,golang/light,write-tests,1,1,1,1,1,1,1,1,1,1,1
modelA,golang,golang/plain,write-tests,2,2,2,2,2,2,2,2,2,2,2
modelA,java,java/light,write-tests,3,3,3,3,3,3,3,3,3,3,3
modelA,java,java/plain,write-tests,4,4,4,4,4,4,4,4,4,4,4
`,
})
}
Expand Down
2 changes: 1 addition & 1 deletion evaluate/task/task-code-repair.go
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ func validateCodeRepairRepository(logger *log.Logger, repositoryPath string, lan
var packagePaths []string
var otherFiles []string
for _, file := range files {
if file.Name() == "repository.json" {
if file.Name() == "repository.json" || file.Name() == ".git" || file.Name() == "target" {
continue
} else if file.IsDir() {
packagePaths = append(packagePaths, filepath.Join(repositoryPath, file.Name()))
Expand Down
10 changes: 10 additions & 0 deletions evaluate/task/task-code-repair_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,11 @@ func TestValidateCodeRepairRepository(t *testing.T) {
validate(t, &tasktesting.TestCaseValidateRepository{
Name: "Well-formed",

Before: func(repositoryPath string) {
require.NoError(t, osutil.MkdirAll(filepath.Join(repositoryPath, ".git")))
require.NoError(t, os.WriteFile(filepath.Join(repositoryPath, ".git", "index"), []byte(`content`), 0700))
},

TestdataPath: filepath.Join("..", "..", "testdata"),
RepositoryPath: filepath.Join("golang", "mistakes"),
Language: &golang.Language{},
Expand Down Expand Up @@ -464,6 +469,11 @@ func TestValidateCodeRepairRepository(t *testing.T) {
validate(t, &tasktesting.TestCaseValidateRepository{
Name: "Well-formed",

Before: func(repositoryPath string) {
require.NoError(t, osutil.MkdirAll(filepath.Join(repositoryPath, "target")))
require.NoError(t, os.WriteFile(filepath.Join(repositoryPath, "target", "someClass.class"), []byte(`content`), 0700))
},

TestdataPath: filepath.Join("..", "..", "testdata"),
RepositoryPath: filepath.Join("java", "mistakes"),
Language: &java.Language{},
Expand Down
Loading

0 comments on commit 5a439a6

Please sign in to comment.