Skip to content

Commit

Permalink
Remove the logic to write the "models-summed.csv" and "<language>-sum…
Browse files Browse the repository at this point in the history
…med.csv" files, since they will be handled afterwards with tooling

Part of #237
  • Loading branch information
ruiAzevedo19 committed Jul 12, 2024
1 parent aa44c7a commit 72ab41c
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 1,038 deletions.
4 changes: 0 additions & 4 deletions cmd/eval-dev-quality/cmd/evaluate.go
Original file line number Diff line number Diff line change
Expand Up @@ -437,10 +437,6 @@ func (command *Evaluate) evaluateLocal(evaluationContext *evaluate.Context) (err
return nil
})

if err := report.WriteCSVs(command.ResultPath); err != nil {
command.logger.Panicf("ERROR: %s", err)
}

return nil
}

Expand Down
176 changes: 11 additions & 165 deletions cmd/eval-dev-quality/cmd/evaluate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -246,38 +246,6 @@ func TestEvaluateExecute(t *testing.T) {
assert.Equal(t, actualAssessments[1][metrics.AssessmentKeyResponseCharacterCount], uint64(254))
},
filepath.Join("result-directory", "evaluation.log"): nil,
filepath.Join("result-directory", "golang-summed.csv"): func(t *testing.T, filePath, data string) {
actualAssessments := validateMetrics(t, extractMetricsCSVMatch, data, []metrics.Assessments{
metrics.Assessments{
metrics.AssessmentKeyCoverage: 20,
metrics.AssessmentKeyFilesExecuted: 2,
metrics.AssessmentKeyFilesExecutedMaximumReachable: 2,
metrics.AssessmentKeyResponseNoError: 2,
metrics.AssessmentKeyResponseNoExcess: 2,
metrics.AssessmentKeyResponseWithCode: 2,
},
}, []uint64{28})
// Assert non-deterministic behavior.
assert.Greater(t, actualAssessments[0][metrics.AssessmentKeyProcessingTime], uint64(0))
assert.Equal(t, actualAssessments[0][metrics.AssessmentKeyGenerateTestsForFileCharacterCount], uint64(508))
assert.Equal(t, actualAssessments[0][metrics.AssessmentKeyResponseCharacterCount], uint64(508))
},
filepath.Join("result-directory", "models-summed.csv"): func(t *testing.T, filePath, data string) {
actualAssessments := validateMetrics(t, extractMetricsCSVMatch, data, []metrics.Assessments{
metrics.Assessments{
metrics.AssessmentKeyCoverage: 20,
metrics.AssessmentKeyFilesExecuted: 2,
metrics.AssessmentKeyFilesExecutedMaximumReachable: 2,
metrics.AssessmentKeyResponseNoError: 2,
metrics.AssessmentKeyResponseNoExcess: 2,
metrics.AssessmentKeyResponseWithCode: 2,
},
}, []uint64{28})
// Assert non-deterministic behavior.
assert.Greater(t, actualAssessments[0][metrics.AssessmentKeyProcessingTime], uint64(0))
assert.Equal(t, actualAssessments[0][metrics.AssessmentKeyGenerateTestsForFileCharacterCount], uint64(508))
assert.Equal(t, actualAssessments[0][metrics.AssessmentKeyResponseCharacterCount], uint64(508))
},
filepath.Join("result-directory", "README.md"): func(t *testing.T, filePath, data string) {
validateReportLinks(t, data, []string{"symflower_symbolic-execution"})
},
Expand Down Expand Up @@ -363,54 +331,6 @@ func TestEvaluateExecute(t *testing.T) {
assert.Equal(t, actualAssessments[3][metrics.AssessmentKeyGenerateTestsForFileCharacterCount], uint64(139))
assert.Equal(t, actualAssessments[3][metrics.AssessmentKeyResponseCharacterCount], uint64(139))
},
filepath.Join("result-directory", "golang-summed.csv"): func(t *testing.T, filePath, data string) {
actualAssessments := validateMetrics(t, extractMetricsCSVMatch, data, []metrics.Assessments{
metrics.Assessments{
metrics.AssessmentKeyCoverage: 20,
metrics.AssessmentKeyFilesExecuted: 2,
metrics.AssessmentKeyFilesExecutedMaximumReachable: 2,
metrics.AssessmentKeyResponseNoError: 2,
metrics.AssessmentKeyResponseNoExcess: 2,
metrics.AssessmentKeyResponseWithCode: 2,
},
}, []uint64{28})
// Assert non-deterministic behavior.
assert.Greater(t, actualAssessments[0][metrics.AssessmentKeyProcessingTime], uint64(0))
assert.Equal(t, actualAssessments[0][metrics.AssessmentKeyGenerateTestsForFileCharacterCount], uint64(508))
assert.Equal(t, actualAssessments[0][metrics.AssessmentKeyResponseCharacterCount], uint64(508))
},
filepath.Join("result-directory", "java-summed.csv"): func(t *testing.T, filePath, data string) {
actualAssessments := validateMetrics(t, extractMetricsCSVMatch, data, []metrics.Assessments{
metrics.Assessments{
metrics.AssessmentKeyCoverage: 20,
metrics.AssessmentKeyFilesExecuted: 2,
metrics.AssessmentKeyFilesExecutedMaximumReachable: 2,
metrics.AssessmentKeyResponseNoError: 2,
metrics.AssessmentKeyResponseNoExcess: 2,
metrics.AssessmentKeyResponseWithCode: 2,
},
}, []uint64{28})
// Assert non-deterministic behavior.
assert.Greater(t, actualAssessments[0][metrics.AssessmentKeyProcessingTime], uint64(0))
assert.Equal(t, actualAssessments[0][metrics.AssessmentKeyGenerateTestsForFileCharacterCount], uint64(278))
assert.Equal(t, actualAssessments[0][metrics.AssessmentKeyResponseCharacterCount], uint64(278))
},
filepath.Join("result-directory", "models-summed.csv"): func(t *testing.T, filePath, data string) {
actualAssessments := validateMetrics(t, extractMetricsCSVMatch, data, []metrics.Assessments{
metrics.Assessments{
metrics.AssessmentKeyCoverage: 40,
metrics.AssessmentKeyFilesExecuted: 4,
metrics.AssessmentKeyFilesExecutedMaximumReachable: 4,
metrics.AssessmentKeyResponseNoError: 4,
metrics.AssessmentKeyResponseNoExcess: 4,
metrics.AssessmentKeyResponseWithCode: 4,
},
}, []uint64{56})
// Assert non-deterministic behavior.
assert.Greater(t, actualAssessments[0][metrics.AssessmentKeyProcessingTime], uint64(0))
assert.Equal(t, actualAssessments[0][metrics.AssessmentKeyGenerateTestsForFileCharacterCount], uint64(786))
assert.Equal(t, actualAssessments[0][metrics.AssessmentKeyResponseCharacterCount], uint64(786))
},
filepath.Join("result-directory", "evaluation.log"): nil,
filepath.Join("result-directory", "README.md"): func(t *testing.T, filePath, data string) {
validateReportLinks(t, data, []string{"symflower_symbolic-execution"})
Expand Down Expand Up @@ -485,38 +405,6 @@ func TestEvaluateExecute(t *testing.T) {
assert.Equal(t, actualAssessments[1][metrics.AssessmentKeyResponseCharacterCount], uint64(254))
},
filepath.Join("result-directory", "evaluation.log"): nil,
filepath.Join("result-directory", "golang-summed.csv"): func(t *testing.T, filePath, data string) {
actualAssessments := validateMetrics(t, extractMetricsCSVMatch, data, []metrics.Assessments{
metrics.Assessments{
metrics.AssessmentKeyCoverage: 20,
metrics.AssessmentKeyFilesExecuted: 2,
metrics.AssessmentKeyFilesExecutedMaximumReachable: 2,
metrics.AssessmentKeyResponseNoError: 2,
metrics.AssessmentKeyResponseNoExcess: 2,
metrics.AssessmentKeyResponseWithCode: 2,
},
}, []uint64{28})
// Assert non-deterministic behavior.
assert.Greater(t, actualAssessments[0][metrics.AssessmentKeyProcessingTime], uint64(0))
assert.Equal(t, actualAssessments[0][metrics.AssessmentKeyGenerateTestsForFileCharacterCount], uint64(508))
assert.Equal(t, actualAssessments[0][metrics.AssessmentKeyResponseCharacterCount], uint64(508))
},
filepath.Join("result-directory", "models-summed.csv"): func(t *testing.T, filePath, data string) {
actualAssessments := validateMetrics(t, extractMetricsCSVMatch, data, []metrics.Assessments{
metrics.Assessments{
metrics.AssessmentKeyCoverage: 20,
metrics.AssessmentKeyFilesExecuted: 2,
metrics.AssessmentKeyFilesExecutedMaximumReachable: 2,
metrics.AssessmentKeyResponseNoError: 2,
metrics.AssessmentKeyResponseNoExcess: 2,
metrics.AssessmentKeyResponseWithCode: 2,
},
}, []uint64{28})
// Assert non-deterministic behavior.
assert.Greater(t, actualAssessments[0][metrics.AssessmentKeyProcessingTime], uint64(0))
assert.Equal(t, actualAssessments[0][metrics.AssessmentKeyGenerateTestsForFileCharacterCount], uint64(508))
assert.Equal(t, actualAssessments[0][metrics.AssessmentKeyResponseCharacterCount], uint64(508))
},
filepath.Join("result-directory", "README.md"): func(t *testing.T, filePath, data string) {
validateReportLinks(t, data, []string{"symflower_symbolic-execution"})
},
Expand Down Expand Up @@ -567,38 +455,6 @@ func TestEvaluateExecute(t *testing.T) {
assert.Equal(t, actualAssessments[1][metrics.AssessmentKeyResponseCharacterCount], uint64(254))
},
filepath.Join("result-directory", "evaluation.log"): nil,
filepath.Join("result-directory", "golang-summed.csv"): func(t *testing.T, filePath, data string) {
actualAssessments := validateMetrics(t, extractMetricsCSVMatch, data, []metrics.Assessments{
metrics.Assessments{
metrics.AssessmentKeyCoverage: 20,
metrics.AssessmentKeyFilesExecuted: 2,
metrics.AssessmentKeyFilesExecutedMaximumReachable: 2,
metrics.AssessmentKeyResponseNoError: 2,
metrics.AssessmentKeyResponseNoExcess: 2,
metrics.AssessmentKeyResponseWithCode: 2,
},
}, []uint64{28})
// Assert non-deterministic behavior.
assert.Greater(t, actualAssessments[0][metrics.AssessmentKeyProcessingTime], uint64(0))
assert.Equal(t, actualAssessments[0][metrics.AssessmentKeyGenerateTestsForFileCharacterCount], uint64(508))
assert.Equal(t, actualAssessments[0][metrics.AssessmentKeyResponseCharacterCount], uint64(508))
},
filepath.Join("result-directory", "models-summed.csv"): func(t *testing.T, filePath, data string) {
actualAssessments := validateMetrics(t, extractMetricsCSVMatch, data, []metrics.Assessments{
metrics.Assessments{
metrics.AssessmentKeyCoverage: 20,
metrics.AssessmentKeyFilesExecuted: 2,
metrics.AssessmentKeyFilesExecutedMaximumReachable: 2,
metrics.AssessmentKeyResponseNoError: 2,
metrics.AssessmentKeyResponseNoExcess: 2,
metrics.AssessmentKeyResponseWithCode: 2,
},
}, []uint64{28})
// Assert non-deterministic behavior.
assert.Greater(t, actualAssessments[0][metrics.AssessmentKeyProcessingTime], uint64(0))
assert.Equal(t, actualAssessments[0][metrics.AssessmentKeyGenerateTestsForFileCharacterCount], uint64(508))
assert.Equal(t, actualAssessments[0][metrics.AssessmentKeyResponseCharacterCount], uint64(508))
},
filepath.Join("result-directory", "README.md"): func(t *testing.T, filePath, data string) {
validateReportLinks(t, data, []string{"symflower_symbolic-execution"})
},
Expand Down Expand Up @@ -665,9 +521,7 @@ func TestEvaluateExecute(t *testing.T) {
assert.Contains(t, data, "preloading model")
assert.Contains(t, data, "unloading model")
},
filepath.Join("result-directory", "golang-summed.csv"): nil,
filepath.Join("result-directory", "models-summed.csv"): nil,
filepath.Join("result-directory", "README.md"): nil,
filepath.Join("result-directory", "README.md"): nil,
filepath.Join("result-directory", string(evaluatetask.IdentifierWriteTests), "ollama_"+model.CleanModelNameForFileSystem(providertesting.OllamaTestModel), "golang", "golang", "plain.log"): nil,
},
})
Expand Down Expand Up @@ -713,9 +567,7 @@ func TestEvaluateExecute(t *testing.T) {
assert.Contains(t, data, fmt.Sprintf(`Evaluation score for "custom-ollama/%s"`, providertesting.OllamaTestModel))
assert.Contains(t, data, "response-no-error=2")
},
filepath.Join("result-directory", "golang-summed.csv"): nil,
filepath.Join("result-directory", "models-summed.csv"): nil,
filepath.Join("result-directory", "README.md"): nil,
filepath.Join("result-directory", "README.md"): nil,
filepath.Join("result-directory", string(evaluatetask.IdentifierWriteTests), "custom-ollama_"+model.CleanModelNameForFileSystem(providertesting.OllamaTestModel), "golang", "golang", "plain.log"): nil,
},
})
Expand Down Expand Up @@ -815,9 +667,7 @@ func TestEvaluateExecute(t *testing.T) {
assert.Contains(t, data, "Run 2/3")
assert.Contains(t, data, "Run 3/3")
},
filepath.Join("result-directory", "golang-summed.csv"): nil,
filepath.Join("result-directory", "models-summed.csv"): nil,
filepath.Join("result-directory", "README.md"): nil,
filepath.Join("result-directory", "README.md"): nil,
filepath.Join("result-directory", string(evaluatetask.IdentifierWriteTests), "symflower_symbolic-execution", "golang", "golang", "plain.log"): func(t *testing.T, filePath, data string) {
assert.Equal(t, 3, strings.Count(data, `Evaluating model "symflower/symbolic-execution"`))
},
Expand Down Expand Up @@ -850,12 +700,10 @@ func TestEvaluateExecute(t *testing.T) {
},

ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
filepath.Join("result-directory", "categories.svg"): nil,
filepath.Join("result-directory", "evaluation.csv"): nil,
filepath.Join("result-directory", "evaluation.log"): nil,
filepath.Join("result-directory", "golang-summed.csv"): nil,
filepath.Join("result-directory", "models-summed.csv"): nil,
filepath.Join("result-directory", "README.md"): nil,
filepath.Join("result-directory", "categories.svg"): nil,
filepath.Join("result-directory", "evaluation.csv"): nil,
filepath.Join("result-directory", "evaluation.log"): nil,
filepath.Join("result-directory", "README.md"): nil,
filepath.Join("result-directory", string(evaluatetask.IdentifierWriteTests), "symflower_symbolic-execution", "golang", "golang", "plain.log"): nil,
},
})
Expand All @@ -873,12 +721,10 @@ func TestEvaluateExecute(t *testing.T) {
},

ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
filepath.Join("result-directory-0", "categories.svg"): nil,
filepath.Join("result-directory-0", "evaluation.csv"): nil,
filepath.Join("result-directory-0", "evaluation.log"): nil,
filepath.Join("result-directory-0", "golang-summed.csv"): nil,
filepath.Join("result-directory-0", "models-summed.csv"): nil,
filepath.Join("result-directory-0", "README.md"): nil,
filepath.Join("result-directory-0", "categories.svg"): nil,
filepath.Join("result-directory-0", "evaluation.csv"): nil,
filepath.Join("result-directory-0", "evaluation.log"): nil,
filepath.Join("result-directory-0", "README.md"): nil,
filepath.Join("result-directory-0", string(evaluatetask.IdentifierWriteTests), "symflower_symbolic-execution", "golang", "golang", "plain.log"): nil,
},
})
Expand Down
Loading

0 comments on commit 72ab41c

Please sign in to comment.