Skip to content

Commit

Permalink
Remove the logic to write the "models-summed.csv" and "<language>-sum…
Browse files Browse the repository at this point in the history
…med.csv" files, since they will be handled afterwards with tooling

Part of #237
  • Loading branch information
ruiAzevedo19 committed Jul 10, 2024
1 parent 2e18845 commit 4062f93
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 1,008 deletions.
4 changes: 0 additions & 4 deletions cmd/eval-dev-quality/cmd/evaluate.go
Original file line number Diff line number Diff line change
Expand Up @@ -436,10 +436,6 @@ func (command *Evaluate) evaluateLocal(evaluationContext *evaluate.Context) (err
return nil
})

if err := report.WriteCSVs(command.ResultPath); err != nil {
command.logger.Panicf("ERROR: %s", err)
}

return nil
}

Expand Down
167 changes: 11 additions & 156 deletions cmd/eval-dev-quality/cmd/evaluate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -241,36 +241,6 @@ func TestEvaluateExecute(t *testing.T) {
assert.Equal(t, actualAssessments[1][metrics.AssessmentKeyResponseCharacterCount], uint64(254))
},
filepath.Join("result-directory", "evaluation.log"): nil,
filepath.Join("result-directory", "golang-summed.csv"): func(t *testing.T, filePath, data string) {
actualAssessments := validateMetrics(t, extractMetricsCSVMatch, data, []metrics.Assessments{
metrics.Assessments{
metrics.AssessmentKeyCoverage: 20,
metrics.AssessmentKeyFilesExecuted: 2,
metrics.AssessmentKeyResponseNoError: 2,
metrics.AssessmentKeyResponseNoExcess: 2,
metrics.AssessmentKeyResponseWithCode: 2,
},
}, []uint64{28})
// Assert non-deterministic behavior.
assert.Greater(t, actualAssessments[0][metrics.AssessmentKeyProcessingTime], uint64(0))
assert.Equal(t, actualAssessments[0][metrics.AssessmentKeyGenerateTestsForFileCharacterCount], uint64(508))
assert.Equal(t, actualAssessments[0][metrics.AssessmentKeyResponseCharacterCount], uint64(508))
},
filepath.Join("result-directory", "models-summed.csv"): func(t *testing.T, filePath, data string) {
actualAssessments := validateMetrics(t, extractMetricsCSVMatch, data, []metrics.Assessments{
metrics.Assessments{
metrics.AssessmentKeyCoverage: 20,
metrics.AssessmentKeyFilesExecuted: 2,
metrics.AssessmentKeyResponseNoError: 2,
metrics.AssessmentKeyResponseNoExcess: 2,
metrics.AssessmentKeyResponseWithCode: 2,
},
}, []uint64{28})
// Assert non-deterministic behavior.
assert.Greater(t, actualAssessments[0][metrics.AssessmentKeyProcessingTime], uint64(0))
assert.Equal(t, actualAssessments[0][metrics.AssessmentKeyGenerateTestsForFileCharacterCount], uint64(508))
assert.Equal(t, actualAssessments[0][metrics.AssessmentKeyResponseCharacterCount], uint64(508))
},
filepath.Join("result-directory", "README.md"): func(t *testing.T, filePath, data string) {
validateReportLinks(t, data, []string{"symflower_symbolic-execution"})
},
Expand Down Expand Up @@ -351,51 +321,6 @@ func TestEvaluateExecute(t *testing.T) {
assert.Equal(t, actualAssessments[3][metrics.AssessmentKeyGenerateTestsForFileCharacterCount], uint64(139))
assert.Equal(t, actualAssessments[3][metrics.AssessmentKeyResponseCharacterCount], uint64(139))
},
filepath.Join("result-directory", "golang-summed.csv"): func(t *testing.T, filePath, data string) {
actualAssessments := validateMetrics(t, extractMetricsCSVMatch, data, []metrics.Assessments{
metrics.Assessments{
metrics.AssessmentKeyCoverage: 20,
metrics.AssessmentKeyFilesExecuted: 2,
metrics.AssessmentKeyResponseNoError: 2,
metrics.AssessmentKeyResponseNoExcess: 2,
metrics.AssessmentKeyResponseWithCode: 2,
},
}, []uint64{28})
// Assert non-deterministic behavior.
assert.Greater(t, actualAssessments[0][metrics.AssessmentKeyProcessingTime], uint64(0))
assert.Equal(t, actualAssessments[0][metrics.AssessmentKeyGenerateTestsForFileCharacterCount], uint64(508))
assert.Equal(t, actualAssessments[0][metrics.AssessmentKeyResponseCharacterCount], uint64(508))
},
filepath.Join("result-directory", "java-summed.csv"): func(t *testing.T, filePath, data string) {
actualAssessments := validateMetrics(t, extractMetricsCSVMatch, data, []metrics.Assessments{
metrics.Assessments{
metrics.AssessmentKeyCoverage: 20,
metrics.AssessmentKeyFilesExecuted: 2,
metrics.AssessmentKeyResponseNoError: 2,
metrics.AssessmentKeyResponseNoExcess: 2,
metrics.AssessmentKeyResponseWithCode: 2,
},
}, []uint64{28})
// Assert non-deterministic behavior.
assert.Greater(t, actualAssessments[0][metrics.AssessmentKeyProcessingTime], uint64(0))
assert.Equal(t, actualAssessments[0][metrics.AssessmentKeyGenerateTestsForFileCharacterCount], uint64(278))
assert.Equal(t, actualAssessments[0][metrics.AssessmentKeyResponseCharacterCount], uint64(278))
},
filepath.Join("result-directory", "models-summed.csv"): func(t *testing.T, filePath, data string) {
actualAssessments := validateMetrics(t, extractMetricsCSVMatch, data, []metrics.Assessments{
metrics.Assessments{
metrics.AssessmentKeyCoverage: 40,
metrics.AssessmentKeyFilesExecuted: 4,
metrics.AssessmentKeyResponseNoError: 4,
metrics.AssessmentKeyResponseNoExcess: 4,
metrics.AssessmentKeyResponseWithCode: 4,
},
}, []uint64{56})
// Assert non-deterministic behavior.
assert.Greater(t, actualAssessments[0][metrics.AssessmentKeyProcessingTime], uint64(0))
assert.Equal(t, actualAssessments[0][metrics.AssessmentKeyGenerateTestsForFileCharacterCount], uint64(786))
assert.Equal(t, actualAssessments[0][metrics.AssessmentKeyResponseCharacterCount], uint64(786))
},
filepath.Join("result-directory", "evaluation.log"): nil,
filepath.Join("result-directory", "README.md"): func(t *testing.T, filePath, data string) {
validateReportLinks(t, data, []string{"symflower_symbolic-execution"})
Expand Down Expand Up @@ -467,36 +392,6 @@ func TestEvaluateExecute(t *testing.T) {
assert.Equal(t, actualAssessments[1][metrics.AssessmentKeyResponseCharacterCount], uint64(254))
},
filepath.Join("result-directory", "evaluation.log"): nil,
filepath.Join("result-directory", "golang-summed.csv"): func(t *testing.T, filePath, data string) {
actualAssessments := validateMetrics(t, extractMetricsCSVMatch, data, []metrics.Assessments{
metrics.Assessments{
metrics.AssessmentKeyCoverage: 20,
metrics.AssessmentKeyFilesExecuted: 2,
metrics.AssessmentKeyResponseNoError: 2,
metrics.AssessmentKeyResponseNoExcess: 2,
metrics.AssessmentKeyResponseWithCode: 2,
},
}, []uint64{28})
// Assert non-deterministic behavior.
assert.Greater(t, actualAssessments[0][metrics.AssessmentKeyProcessingTime], uint64(0))
assert.Equal(t, actualAssessments[0][metrics.AssessmentKeyGenerateTestsForFileCharacterCount], uint64(508))
assert.Equal(t, actualAssessments[0][metrics.AssessmentKeyResponseCharacterCount], uint64(508))
},
filepath.Join("result-directory", "models-summed.csv"): func(t *testing.T, filePath, data string) {
actualAssessments := validateMetrics(t, extractMetricsCSVMatch, data, []metrics.Assessments{
metrics.Assessments{
metrics.AssessmentKeyCoverage: 20,
metrics.AssessmentKeyFilesExecuted: 2,
metrics.AssessmentKeyResponseNoError: 2,
metrics.AssessmentKeyResponseNoExcess: 2,
metrics.AssessmentKeyResponseWithCode: 2,
},
}, []uint64{28})
// Assert non-deterministic behavior.
assert.Greater(t, actualAssessments[0][metrics.AssessmentKeyProcessingTime], uint64(0))
assert.Equal(t, actualAssessments[0][metrics.AssessmentKeyGenerateTestsForFileCharacterCount], uint64(508))
assert.Equal(t, actualAssessments[0][metrics.AssessmentKeyResponseCharacterCount], uint64(508))
},
filepath.Join("result-directory", "README.md"): func(t *testing.T, filePath, data string) {
validateReportLinks(t, data, []string{"symflower_symbolic-execution"})
},
Expand Down Expand Up @@ -545,36 +440,6 @@ func TestEvaluateExecute(t *testing.T) {
assert.Equal(t, actualAssessments[1][metrics.AssessmentKeyResponseCharacterCount], uint64(254))
},
filepath.Join("result-directory", "evaluation.log"): nil,
filepath.Join("result-directory", "golang-summed.csv"): func(t *testing.T, filePath, data string) {
actualAssessments := validateMetrics(t, extractMetricsCSVMatch, data, []metrics.Assessments{
metrics.Assessments{
metrics.AssessmentKeyCoverage: 20,
metrics.AssessmentKeyFilesExecuted: 2,
metrics.AssessmentKeyResponseNoError: 2,
metrics.AssessmentKeyResponseNoExcess: 2,
metrics.AssessmentKeyResponseWithCode: 2,
},
}, []uint64{28})
// Assert non-deterministic behavior.
assert.Greater(t, actualAssessments[0][metrics.AssessmentKeyProcessingTime], uint64(0))
assert.Equal(t, actualAssessments[0][metrics.AssessmentKeyGenerateTestsForFileCharacterCount], uint64(508))
assert.Equal(t, actualAssessments[0][metrics.AssessmentKeyResponseCharacterCount], uint64(508))
},
filepath.Join("result-directory", "models-summed.csv"): func(t *testing.T, filePath, data string) {
actualAssessments := validateMetrics(t, extractMetricsCSVMatch, data, []metrics.Assessments{
metrics.Assessments{
metrics.AssessmentKeyCoverage: 20,
metrics.AssessmentKeyFilesExecuted: 2,
metrics.AssessmentKeyResponseNoError: 2,
metrics.AssessmentKeyResponseNoExcess: 2,
metrics.AssessmentKeyResponseWithCode: 2,
},
}, []uint64{28})
// Assert non-deterministic behavior.
assert.Greater(t, actualAssessments[0][metrics.AssessmentKeyProcessingTime], uint64(0))
assert.Equal(t, actualAssessments[0][metrics.AssessmentKeyGenerateTestsForFileCharacterCount], uint64(508))
assert.Equal(t, actualAssessments[0][metrics.AssessmentKeyResponseCharacterCount], uint64(508))
},
filepath.Join("result-directory", "README.md"): func(t *testing.T, filePath, data string) {
validateReportLinks(t, data, []string{"symflower_symbolic-execution"})
},
Expand Down Expand Up @@ -641,9 +506,7 @@ func TestEvaluateExecute(t *testing.T) {
assert.Contains(t, data, "preloading model")
assert.Contains(t, data, "unloading model")
},
filepath.Join("result-directory", "golang-summed.csv"): nil,
filepath.Join("result-directory", "models-summed.csv"): nil,
filepath.Join("result-directory", "README.md"): nil,
filepath.Join("result-directory", "README.md"): nil,
filepath.Join("result-directory", string(evaluatetask.IdentifierWriteTests), "ollama_"+model.CleanModelNameForFileSystem(providertesting.OllamaTestModel), "golang", "golang", "plain.log"): nil,
},
})
Expand Down Expand Up @@ -689,9 +552,7 @@ func TestEvaluateExecute(t *testing.T) {
assert.Contains(t, data, fmt.Sprintf(`Evaluation score for "custom-ollama/%s"`, providertesting.OllamaTestModel))
assert.Contains(t, data, "response-no-error=2")
},
filepath.Join("result-directory", "golang-summed.csv"): nil,
filepath.Join("result-directory", "models-summed.csv"): nil,
filepath.Join("result-directory", "README.md"): nil,
filepath.Join("result-directory", "README.md"): nil,
filepath.Join("result-directory", string(evaluatetask.IdentifierWriteTests), "custom-ollama_"+model.CleanModelNameForFileSystem(providertesting.OllamaTestModel), "golang", "golang", "plain.log"): nil,
},
})
Expand Down Expand Up @@ -784,9 +645,7 @@ func TestEvaluateExecute(t *testing.T) {
assert.Contains(t, data, "Run 2/3")
assert.Contains(t, data, "Run 3/3")
},
filepath.Join("result-directory", "golang-summed.csv"): nil,
filepath.Join("result-directory", "models-summed.csv"): nil,
filepath.Join("result-directory", "README.md"): nil,
filepath.Join("result-directory", "README.md"): nil,
filepath.Join("result-directory", string(evaluatetask.IdentifierWriteTests), "symflower_symbolic-execution", "golang", "golang", "plain.log"): func(t *testing.T, filePath, data string) {
assert.Equal(t, 3, strings.Count(data, `Evaluating model "symflower/symbolic-execution"`))
},
Expand Down Expand Up @@ -819,12 +678,10 @@ func TestEvaluateExecute(t *testing.T) {
},

ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
filepath.Join("result-directory", "categories.svg"): nil,
filepath.Join("result-directory", "evaluation.csv"): nil,
filepath.Join("result-directory", "evaluation.log"): nil,
filepath.Join("result-directory", "golang-summed.csv"): nil,
filepath.Join("result-directory", "models-summed.csv"): nil,
filepath.Join("result-directory", "README.md"): nil,
filepath.Join("result-directory", "categories.svg"): nil,
filepath.Join("result-directory", "evaluation.csv"): nil,
filepath.Join("result-directory", "evaluation.log"): nil,
filepath.Join("result-directory", "README.md"): nil,
filepath.Join("result-directory", string(evaluatetask.IdentifierWriteTests), "symflower_symbolic-execution", "golang", "golang", "plain.log"): nil,
},
})
Expand All @@ -842,12 +699,10 @@ func TestEvaluateExecute(t *testing.T) {
},

ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
filepath.Join("result-directory-0", "categories.svg"): nil,
filepath.Join("result-directory-0", "evaluation.csv"): nil,
filepath.Join("result-directory-0", "evaluation.log"): nil,
filepath.Join("result-directory-0", "golang-summed.csv"): nil,
filepath.Join("result-directory-0", "models-summed.csv"): nil,
filepath.Join("result-directory-0", "README.md"): nil,
filepath.Join("result-directory-0", "categories.svg"): nil,
filepath.Join("result-directory-0", "evaluation.csv"): nil,
filepath.Join("result-directory-0", "evaluation.log"): nil,
filepath.Join("result-directory-0", "README.md"): nil,
filepath.Join("result-directory-0", string(evaluatetask.IdentifierWriteTests), "symflower_symbolic-execution", "golang", "golang", "plain.log"): nil,
},
})
Expand Down
Loading

0 comments on commit 4062f93

Please sign in to comment.