Skip to content

Commit

Permalink
Remove the logic to write the markdown report, since it will be handl…
Browse files Browse the repository at this point in the history
…ed afterwards with tooling

Part of #237
  • Loading branch information
ruiAzevedo19 committed Jul 10, 2024
1 parent 4062f93 commit b54fad5
Show file tree
Hide file tree
Showing 4 changed files with 0 additions and 532 deletions.
16 changes: 0 additions & 16 deletions cmd/eval-dev-quality/cmd/evaluate.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ import (

"github.com/symflower/eval-dev-quality/evaluate"
"github.com/symflower/eval-dev-quality/evaluate/metrics"
"github.com/symflower/eval-dev-quality/evaluate/report"
"github.com/symflower/eval-dev-quality/language"
_ "github.com/symflower/eval-dev-quality/language/golang" // Register language.
_ "github.com/symflower/eval-dev-quality/language/java" // Register language.
Expand Down Expand Up @@ -413,22 +412,7 @@ func (command *Evaluate) evaluateLocal(evaluationContext *evaluate.Context) (err
}

assessments, totalScore := evaluate.Evaluate(evaluationContext)

assessmentsPerModel := assessments.CollapseByModel()
if err := (report.Markdown{
DateTime: command.timestamp,
Version: evaluate.Version,

CSVPath: "./evaluation.csv",
LogPath: "./evaluation.log",
ModelLogsPath: ".",
SVGPath: "./categories.svg",

AssessmentPerModel: assessmentsPerModel,
TotalScore: totalScore,
}).WriteToFile(filepath.Join(command.ResultPath, "README.md")); err != nil {
command.logger.Panicf("ERROR: %s", err)
}

_ = assessmentsPerModel.WalkByScore(func(model model.Model, assessment metrics.Assessments, score uint64) (err error) {
command.logger.Printf("Evaluation score for %q (%q): %s", model.ID(), assessment.Category(totalScore).ID, assessment)
Expand Down
82 changes: 0 additions & 82 deletions cmd/eval-dev-quality/cmd/evaluate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,24 +27,6 @@ import (
toolstesting "github.com/symflower/eval-dev-quality/tools/testing"
)

// validateReportLinks checks if the Markdown report data contains all the links to other relevant report files.
func validateReportLinks(t *testing.T, data string, modelLogNames []string) {
assert.Contains(t, data, "](./categories.svg)")
assert.Contains(t, data, "](./evaluation.csv)")
assert.Contains(t, data, "](./evaluation.log)")
for _, m := range modelLogNames {
assert.Contains(t, data, fmt.Sprintf("](./%s/)", m))
}
}

// validateSVGContent checks if the SVG data contains all given categories and an axis label for the maximal model count.
func validateSVGContent(t *testing.T, data string, categories []*metrics.AssessmentCategory, maxModelCount uint) {
for _, category := range categories {
assert.Contains(t, data, fmt.Sprintf("%s</text>", category.Name))
}
assert.Contains(t, data, fmt.Sprintf("%d</text>", maxModelCount))
}

func atoiUint64(t *testing.T, s string) uint64 {
value, err := strconv.ParseUint(s, 10, 64)
assert.NoErrorf(t, err, "parsing unsigned integer from: %q", s)
Expand Down Expand Up @@ -212,9 +194,6 @@ func TestEvaluateExecute(t *testing.T) {
assert.Equal(t, 1, strings.Count(output, "Evaluation score for"))
},
ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
filepath.Join("result-directory", "categories.svg"): func(t *testing.T, filePath, data string) {
validateSVGContent(t, data, []*metrics.AssessmentCategory{metrics.AssessmentCategoryCodeNoExcess}, 1)
},
filepath.Join("result-directory", "evaluation.csv"): func(t *testing.T, filePath, data string) {
actualAssessments := validateMetrics(t, extractMetricsCSVMatch, data, []metrics.Assessments{
metrics.Assessments{
Expand All @@ -241,9 +220,6 @@ func TestEvaluateExecute(t *testing.T) {
assert.Equal(t, actualAssessments[1][metrics.AssessmentKeyResponseCharacterCount], uint64(254))
},
filepath.Join("result-directory", "evaluation.log"): nil,
filepath.Join("result-directory", "README.md"): func(t *testing.T, filePath, data string) {
validateReportLinks(t, data, []string{"symflower_symbolic-execution"})
},
filepath.Join("result-directory", string(evaluatetask.IdentifierWriteTests), "symflower_symbolic-execution", "golang", "golang", "plain.log"): nil,
},
})
Expand Down Expand Up @@ -273,9 +249,6 @@ func TestEvaluateExecute(t *testing.T) {
assert.Equal(t, 1, strings.Count(output, "Evaluation score for"))
},
ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
filepath.Join("result-directory", "categories.svg"): func(t *testing.T, filePath, data string) {
validateSVGContent(t, data, []*metrics.AssessmentCategory{metrics.AssessmentCategoryCodeNoExcess}, 1)
},
filepath.Join("result-directory", "evaluation.csv"): func(t *testing.T, filePath, data string) {
actualAssessments := validateMetrics(t, extractMetricsCSVMatch, data, []metrics.Assessments{
metrics.Assessments{
Expand Down Expand Up @@ -322,9 +295,6 @@ func TestEvaluateExecute(t *testing.T) {
assert.Equal(t, actualAssessments[3][metrics.AssessmentKeyResponseCharacterCount], uint64(139))
},
filepath.Join("result-directory", "evaluation.log"): nil,
filepath.Join("result-directory", "README.md"): func(t *testing.T, filePath, data string) {
validateReportLinks(t, data, []string{"symflower_symbolic-execution"})
},
filepath.Join("result-directory", string(evaluatetask.IdentifierWriteTests), "symflower_symbolic-execution", "golang", "golang", "plain.log"): func(t *testing.T, filePath, data string) {
assert.Contains(t, data, "coverage objects: [{")
},
Expand Down Expand Up @@ -363,9 +333,6 @@ func TestEvaluateExecute(t *testing.T) {
assert.Equal(t, 1, strings.Count(output, "Evaluation score for"))
},
ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
filepath.Join("result-directory", "categories.svg"): func(t *testing.T, filePath, data string) {
validateSVGContent(t, data, []*metrics.AssessmentCategory{metrics.AssessmentCategoryCodeNoExcess}, 1)
},
filepath.Join("result-directory", "evaluation.csv"): func(t *testing.T, filePath, data string) {
actualAssessments := validateMetrics(t, extractMetricsCSVMatch, data, []metrics.Assessments{
metrics.Assessments{
Expand All @@ -392,9 +359,6 @@ func TestEvaluateExecute(t *testing.T) {
assert.Equal(t, actualAssessments[1][metrics.AssessmentKeyResponseCharacterCount], uint64(254))
},
filepath.Join("result-directory", "evaluation.log"): nil,
filepath.Join("result-directory", "README.md"): func(t *testing.T, filePath, data string) {
validateReportLinks(t, data, []string{"symflower_symbolic-execution"})
},
filepath.Join("result-directory", string(evaluatetask.IdentifierWriteTests), "symflower_symbolic-execution", "golang", "golang", "plain.log"): nil,
},
})
Expand All @@ -411,9 +375,6 @@ func TestEvaluateExecute(t *testing.T) {
assert.Equal(t, 1, strings.Count(output, "Evaluation score for"))
},
ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
filepath.Join("result-directory", "categories.svg"): func(t *testing.T, filePath, data string) {
validateSVGContent(t, data, []*metrics.AssessmentCategory{metrics.AssessmentCategoryCodeNoExcess}, 1)
},
filepath.Join("result-directory", "evaluation.csv"): func(t *testing.T, filePath, data string) {
actualAssessments := validateMetrics(t, extractMetricsCSVMatch, data, []metrics.Assessments{
metrics.Assessments{
Expand All @@ -440,9 +401,6 @@ func TestEvaluateExecute(t *testing.T) {
assert.Equal(t, actualAssessments[1][metrics.AssessmentKeyResponseCharacterCount], uint64(254))
},
filepath.Join("result-directory", "evaluation.log"): nil,
filepath.Join("result-directory", "README.md"): func(t *testing.T, filePath, data string) {
validateReportLinks(t, data, []string{"symflower_symbolic-execution"})
},
filepath.Join("result-directory", string(evaluatetask.IdentifierWriteTests), "symflower_symbolic-execution", "golang", "golang", "plain.log"): nil,
},
})
Expand Down Expand Up @@ -497,7 +455,6 @@ func TestEvaluateExecute(t *testing.T) {
},

ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
filepath.Join("result-directory", "categories.svg"): nil,
filepath.Join("result-directory", "evaluation.csv"): nil,
filepath.Join("result-directory", "evaluation.log"): func(t *testing.T, filePath, data string) {
// Since the model is non-deterministic, we can only assert that the model did at least not error.
Expand All @@ -506,7 +463,6 @@ func TestEvaluateExecute(t *testing.T) {
assert.Contains(t, data, "preloading model")
assert.Contains(t, data, "unloading model")
},
filepath.Join("result-directory", "README.md"): nil,
filepath.Join("result-directory", string(evaluatetask.IdentifierWriteTests), "ollama_"+model.CleanModelNameForFileSystem(providertesting.OllamaTestModel), "golang", "golang", "plain.log"): nil,
},
})
Expand Down Expand Up @@ -545,14 +501,12 @@ func TestEvaluateExecute(t *testing.T) {
},

ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
filepath.Join("result-directory", "categories.svg"): nil,
filepath.Join("result-directory", "evaluation.csv"): nil,
filepath.Join("result-directory", "evaluation.log"): func(t *testing.T, filePath, data string) {
// Since the model is non-deterministic, we can only assert that the model did at least not error.
assert.Contains(t, data, fmt.Sprintf(`Evaluation score for "custom-ollama/%s"`, providertesting.OllamaTestModel))
assert.Contains(t, data, "response-no-error=2")
},
filepath.Join("result-directory", "README.md"): nil,
filepath.Join("result-directory", string(evaluatetask.IdentifierWriteTests), "custom-ollama_"+model.CleanModelNameForFileSystem(providertesting.OllamaTestModel), "golang", "golang", "plain.log"): nil,
},
})
Expand Down Expand Up @@ -587,7 +541,6 @@ func TestEvaluateExecute(t *testing.T) {
assert.Equal(t, 1, strings.Count(output, "Evaluation score for"))
},
ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
filepath.Join("result-directory", "categories.svg"): nil,
filepath.Join("result-directory", "evaluation.csv"): func(t *testing.T, filePath, data string) {
actualAssessments := validateMetrics(t, extractMetricsCSVMatch, data, []metrics.Assessments{
metrics.Assessments{
Expand Down Expand Up @@ -645,46 +598,13 @@ func TestEvaluateExecute(t *testing.T) {
assert.Contains(t, data, "Run 2/3")
assert.Contains(t, data, "Run 3/3")
},
filepath.Join("result-directory", "README.md"): nil,
filepath.Join("result-directory", string(evaluatetask.IdentifierWriteTests), "symflower_symbolic-execution", "golang", "golang", "plain.log"): func(t *testing.T, filePath, data string) {
assert.Equal(t, 3, strings.Count(data, `Evaluating model "symflower/symbolic-execution"`))
},
},
})
})

// This case checks a beautiful bug where the Markdown export crashed when the current working directory contained a README.md file. While this is not the case during the tests (as the current work directory is the directory of this file), it certainly caused problems when our binary was executed from the repository root (which of course contained a README.md). Therefore, we sadly have to modify the current work directory right within the tests of this case to reproduce the problem and fix it forever.
validate(t, &testCase{
Name: "Current work directory contains a README.md",

Before: func(t *testing.T, logger *log.Logger, resultPath string) {
if err := os.Remove("README.md"); err != nil {
if osutil.IsWindows() {
require.Contains(t, err.Error(), "The system cannot find the file specified")
} else {
require.Contains(t, err.Error(), "no such file or directory")
}
}
require.NoError(t, os.WriteFile("README.md", []byte(""), 0644))
},
After: func(t *testing.T, logger *log.Logger, resultPath string) {
require.NoError(t, os.Remove("README.md"))
},

Arguments: []string{
"--language", "golang",
"--model", "symflower/symbolic-execution",
"--repository", filepath.Join("golang", "plain"),
},

ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
filepath.Join("result-directory", "categories.svg"): nil,
filepath.Join("result-directory", "evaluation.csv"): nil,
filepath.Join("result-directory", "evaluation.log"): nil,
filepath.Join("result-directory", "README.md"): nil,
filepath.Join("result-directory", string(evaluatetask.IdentifierWriteTests), "symflower_symbolic-execution", "golang", "golang", "plain.log"): nil,
},
})
validate(t, &testCase{
Name: "Don't overwrite results path if it already exists",

Expand All @@ -699,10 +619,8 @@ func TestEvaluateExecute(t *testing.T) {
},

ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
filepath.Join("result-directory-0", "categories.svg"): nil,
filepath.Join("result-directory-0", "evaluation.csv"): nil,
filepath.Join("result-directory-0", "evaluation.log"): nil,
filepath.Join("result-directory-0", "README.md"): nil,
filepath.Join("result-directory-0", string(evaluatetask.IdentifierWriteTests), "symflower_symbolic-execution", "golang", "golang", "plain.log"): nil,
},
})
Expand Down
Loading

0 comments on commit b54fad5

Please sign in to comment.