diff --git a/cmd/eval-dev-quality/cmd/evaluate.go b/cmd/eval-dev-quality/cmd/evaluate.go index 20b1a9b3..09ea35e3 100644 --- a/cmd/eval-dev-quality/cmd/evaluate.go +++ b/cmd/eval-dev-quality/cmd/evaluate.go @@ -19,7 +19,6 @@ import ( "github.com/symflower/eval-dev-quality/evaluate" "github.com/symflower/eval-dev-quality/evaluate/metrics" - "github.com/symflower/eval-dev-quality/evaluate/report" "github.com/symflower/eval-dev-quality/language" _ "github.com/symflower/eval-dev-quality/language/golang" // Register language. _ "github.com/symflower/eval-dev-quality/language/java" // Register language. @@ -413,22 +412,7 @@ func (command *Evaluate) evaluateLocal(evaluationContext *evaluate.Context) (err } assessments, totalScore := evaluate.Evaluate(evaluationContext) - assessmentsPerModel := assessments.CollapseByModel() - if err := (report.Markdown{ - DateTime: command.timestamp, - Version: evaluate.Version, - - CSVPath: "./evaluation.csv", - LogPath: "./evaluation.log", - ModelLogsPath: ".", - SVGPath: "./categories.svg", - - AssessmentPerModel: assessmentsPerModel, - TotalScore: totalScore, - }).WriteToFile(filepath.Join(command.ResultPath, "README.md")); err != nil { - command.logger.Panicf("ERROR: %s", err) - } _ = assessmentsPerModel.WalkByScore(func(model model.Model, assessment metrics.Assessments, score uint64) (err error) { command.logger.Printf("Evaluation score for %q (%q): %s", model.ID(), assessment.Category(totalScore).ID, assessment) diff --git a/cmd/eval-dev-quality/cmd/evaluate_test.go b/cmd/eval-dev-quality/cmd/evaluate_test.go index 6ba093d3..6a1c5a05 100644 --- a/cmd/eval-dev-quality/cmd/evaluate_test.go +++ b/cmd/eval-dev-quality/cmd/evaluate_test.go @@ -27,24 +27,6 @@ import ( toolstesting "github.com/symflower/eval-dev-quality/tools/testing" ) -// validateReportLinks checks if the Markdown report data contains all the links to other relevant report files. -func validateReportLinks(t *testing.T, data string, modelLogNames []string) { - assert.Contains(t, data, "](./categories.svg)") - assert.Contains(t, data, "](./evaluation.csv)") - assert.Contains(t, data, "](./evaluation.log)") - for _, m := range modelLogNames { - assert.Contains(t, data, fmt.Sprintf("](./%s/)", m)) - } -} - -// validateSVGContent checks if the SVG data contains all given categories and an axis label for the maximal model count. -func validateSVGContent(t *testing.T, data string, categories []*metrics.AssessmentCategory, maxModelCount uint) { - for _, category := range categories { - assert.Contains(t, data, fmt.Sprintf("%s", category.Name)) - } - assert.Contains(t, data, fmt.Sprintf("%d", maxModelCount)) -} - func atoiUint64(t *testing.T, s string) uint64 { value, err := strconv.ParseUint(s, 10, 64) assert.NoErrorf(t, err, "parsing unsigned integer from: %q", s) @@ -212,9 +194,6 @@ func TestEvaluateExecute(t *testing.T) { assert.Equal(t, 1, strings.Count(output, "Evaluation score for")) }, ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){ - filepath.Join("result-directory", "categories.svg"): func(t *testing.T, filePath, data string) { - validateSVGContent(t, data, []*metrics.AssessmentCategory{metrics.AssessmentCategoryCodeNoExcess}, 1) - }, filepath.Join("result-directory", "evaluation.csv"): func(t *testing.T, filePath, data string) { actualAssessments := validateMetrics(t, extractMetricsCSVMatch, data, []metrics.Assessments{ metrics.Assessments{ @@ -241,9 +220,6 @@ func TestEvaluateExecute(t *testing.T) { assert.Equal(t, actualAssessments[1][metrics.AssessmentKeyResponseCharacterCount], uint64(254)) }, filepath.Join("result-directory", "evaluation.log"): nil, - filepath.Join("result-directory", "README.md"): func(t *testing.T, filePath, data string) { - validateReportLinks(t, data, []string{"symflower_symbolic-execution"}) - }, filepath.Join("result-directory", string(evaluatetask.IdentifierWriteTests), "symflower_symbolic-execution", "golang", "golang", "plain.log"): nil, }, }) @@ -273,9 +249,6 @@ func TestEvaluateExecute(t *testing.T) { assert.Equal(t, 1, strings.Count(output, "Evaluation score for")) }, ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){ - filepath.Join("result-directory", "categories.svg"): func(t *testing.T, filePath, data string) { - validateSVGContent(t, data, []*metrics.AssessmentCategory{metrics.AssessmentCategoryCodeNoExcess}, 1) - }, filepath.Join("result-directory", "evaluation.csv"): func(t *testing.T, filePath, data string) { actualAssessments := validateMetrics(t, extractMetricsCSVMatch, data, []metrics.Assessments{ metrics.Assessments{ @@ -322,9 +295,6 @@ func TestEvaluateExecute(t *testing.T) { assert.Equal(t, actualAssessments[3][metrics.AssessmentKeyResponseCharacterCount], uint64(139)) }, filepath.Join("result-directory", "evaluation.log"): nil, - filepath.Join("result-directory", "README.md"): func(t *testing.T, filePath, data string) { - validateReportLinks(t, data, []string{"symflower_symbolic-execution"}) - }, filepath.Join("result-directory", string(evaluatetask.IdentifierWriteTests), "symflower_symbolic-execution", "golang", "golang", "plain.log"): func(t *testing.T, filePath, data string) { assert.Contains(t, data, "coverage objects: [{") }, @@ -363,9 +333,6 @@ func TestEvaluateExecute(t *testing.T) { assert.Equal(t, 1, strings.Count(output, "Evaluation score for")) }, ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){ - filepath.Join("result-directory", "categories.svg"): func(t *testing.T, filePath, data string) { - validateSVGContent(t, data, []*metrics.AssessmentCategory{metrics.AssessmentCategoryCodeNoExcess}, 1) - }, filepath.Join("result-directory", "evaluation.csv"): func(t *testing.T, filePath, data string) { actualAssessments := validateMetrics(t, extractMetricsCSVMatch, data, []metrics.Assessments{ metrics.Assessments{ @@ -392,9 +359,6 @@ func TestEvaluateExecute(t *testing.T) { assert.Equal(t, actualAssessments[1][metrics.AssessmentKeyResponseCharacterCount], uint64(254)) }, filepath.Join("result-directory", "evaluation.log"): nil, - filepath.Join("result-directory", "README.md"): func(t *testing.T, filePath, data string) { - validateReportLinks(t, data, []string{"symflower_symbolic-execution"}) - }, filepath.Join("result-directory", string(evaluatetask.IdentifierWriteTests), "symflower_symbolic-execution", "golang", "golang", "plain.log"): nil, }, }) @@ -411,9 +375,6 @@ func TestEvaluateExecute(t *testing.T) { assert.Equal(t, 1, strings.Count(output, "Evaluation score for")) }, ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){ - filepath.Join("result-directory", "categories.svg"): func(t *testing.T, filePath, data string) { - validateSVGContent(t, data, []*metrics.AssessmentCategory{metrics.AssessmentCategoryCodeNoExcess}, 1) - }, filepath.Join("result-directory", "evaluation.csv"): func(t *testing.T, filePath, data string) { actualAssessments := validateMetrics(t, extractMetricsCSVMatch, data, []metrics.Assessments{ metrics.Assessments{ @@ -440,9 +401,6 @@ func TestEvaluateExecute(t *testing.T) { assert.Equal(t, actualAssessments[1][metrics.AssessmentKeyResponseCharacterCount], uint64(254)) }, filepath.Join("result-directory", "evaluation.log"): nil, - filepath.Join("result-directory", "README.md"): func(t *testing.T, filePath, data string) { - validateReportLinks(t, data, []string{"symflower_symbolic-execution"}) - }, filepath.Join("result-directory", string(evaluatetask.IdentifierWriteTests), "symflower_symbolic-execution", "golang", "golang", "plain.log"): nil, }, }) @@ -497,7 +455,6 @@ func TestEvaluateExecute(t *testing.T) { }, ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){ - filepath.Join("result-directory", "categories.svg"): nil, filepath.Join("result-directory", "evaluation.csv"): nil, filepath.Join("result-directory", "evaluation.log"): func(t *testing.T, filePath, data string) { // Since the model is non-deterministic, we can only assert that the model did at least not error. @@ -506,7 +463,6 @@ func TestEvaluateExecute(t *testing.T) { assert.Contains(t, data, "preloading model") assert.Contains(t, data, "unloading model") }, - filepath.Join("result-directory", "README.md"): nil, filepath.Join("result-directory", string(evaluatetask.IdentifierWriteTests), "ollama_"+model.CleanModelNameForFileSystem(providertesting.OllamaTestModel), "golang", "golang", "plain.log"): nil, }, }) @@ -545,14 +501,12 @@ func TestEvaluateExecute(t *testing.T) { }, ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){ - filepath.Join("result-directory", "categories.svg"): nil, filepath.Join("result-directory", "evaluation.csv"): nil, filepath.Join("result-directory", "evaluation.log"): func(t *testing.T, filePath, data string) { // Since the model is non-deterministic, we can only assert that the model did at least not error. assert.Contains(t, data, fmt.Sprintf(`Evaluation score for "custom-ollama/%s"`, providertesting.OllamaTestModel)) assert.Contains(t, data, "response-no-error=2") }, - filepath.Join("result-directory", "README.md"): nil, filepath.Join("result-directory", string(evaluatetask.IdentifierWriteTests), "custom-ollama_"+model.CleanModelNameForFileSystem(providertesting.OllamaTestModel), "golang", "golang", "plain.log"): nil, }, }) @@ -587,7 +541,6 @@ func TestEvaluateExecute(t *testing.T) { assert.Equal(t, 1, strings.Count(output, "Evaluation score for")) }, ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){ - filepath.Join("result-directory", "categories.svg"): nil, filepath.Join("result-directory", "evaluation.csv"): func(t *testing.T, filePath, data string) { actualAssessments := validateMetrics(t, extractMetricsCSVMatch, data, []metrics.Assessments{ metrics.Assessments{ @@ -645,7 +598,6 @@ func TestEvaluateExecute(t *testing.T) { assert.Contains(t, data, "Run 2/3") assert.Contains(t, data, "Run 3/3") }, - filepath.Join("result-directory", "README.md"): nil, filepath.Join("result-directory", string(evaluatetask.IdentifierWriteTests), "symflower_symbolic-execution", "golang", "golang", "plain.log"): func(t *testing.T, filePath, data string) { assert.Equal(t, 3, strings.Count(data, `Evaluating model "symflower/symbolic-execution"`)) }, @@ -653,38 +605,6 @@ func TestEvaluateExecute(t *testing.T) { }) }) - // This case checks a beautiful bug where the Markdown export crashed when the current working directory contained a README.md file. While this is not the case during the tests (as the current work directory is the directory of this file), it certainly caused problems when our binary was executed from the repository root (which of course contained a README.md). Therefore, we sadly have to modify the current work directory right within the tests of this case to reproduce the problem and fix it forever. - validate(t, &testCase{ - Name: "Current work directory contains a README.md", - - Before: func(t *testing.T, logger *log.Logger, resultPath string) { - if err := os.Remove("README.md"); err != nil { - if osutil.IsWindows() { - require.Contains(t, err.Error(), "The system cannot find the file specified") - } else { - require.Contains(t, err.Error(), "no such file or directory") - } - } - require.NoError(t, os.WriteFile("README.md", []byte(""), 0644)) - }, - After: func(t *testing.T, logger *log.Logger, resultPath string) { - require.NoError(t, os.Remove("README.md")) - }, - - Arguments: []string{ - "--language", "golang", - "--model", "symflower/symbolic-execution", - "--repository", filepath.Join("golang", "plain"), - }, - - ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){ - filepath.Join("result-directory", "categories.svg"): nil, - filepath.Join("result-directory", "evaluation.csv"): nil, - filepath.Join("result-directory", "evaluation.log"): nil, - filepath.Join("result-directory", "README.md"): nil, - filepath.Join("result-directory", string(evaluatetask.IdentifierWriteTests), "symflower_symbolic-execution", "golang", "golang", "plain.log"): nil, - }, - }) validate(t, &testCase{ Name: "Don't overwrite results path if it already exists", @@ -699,10 +619,8 @@ func TestEvaluateExecute(t *testing.T) { }, ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){ - filepath.Join("result-directory-0", "categories.svg"): nil, filepath.Join("result-directory-0", "evaluation.csv"): nil, filepath.Join("result-directory-0", "evaluation.log"): nil, - filepath.Join("result-directory-0", "README.md"): nil, filepath.Join("result-directory-0", string(evaluatetask.IdentifierWriteTests), "symflower_symbolic-execution", "golang", "golang", "plain.log"): nil, }, }) diff --git a/evaluate/report/markdown.go b/evaluate/report/markdown.go deleted file mode 100644 index 4cea0549..00000000 --- a/evaluate/report/markdown.go +++ /dev/null @@ -1,214 +0,0 @@ -package report - -import ( - "errors" - "io" - "os" - "path/filepath" - "strconv" - "strings" - "text/template" - "time" - - pkgerrors "github.com/pkg/errors" - "github.com/wcharczuk/go-chart/v2" - "github.com/zimmski/osutil" - "github.com/zimmski/osutil/bytesutil" - - "github.com/symflower/eval-dev-quality/evaluate/metrics" - "github.com/symflower/eval-dev-quality/model" -) - -// Markdown holds the values for exporting a Markdown report. -type Markdown struct { - // DateTime holds the timestamp of the evaluation. - DateTime time.Time - // Version holds the version of the evaluation tool. - Version string - - // CSVPath holds the path of detailed CSV results. - CSVPath string - // LogPath holds the path of detailed logs. - LogPath string - // ModelLogsPath holds the path of the model logs. - ModelLogsPath string - // SVGPath holds the path of the charted results. - SVGPath string - - // AssessmentPerModel holds a collection of assessments per model. - AssessmentPerModel AssessmentPerModel - // TotalScore holds the total reachable score per task. - TotalScore uint64 -} - -// markdownTemplateContext holds the template for a Markdown report. -type markdownTemplateContext struct { - Markdown - - Categories []*metrics.AssessmentCategory - ModelsPerCategory map[*metrics.AssessmentCategory][]string -} - -// ModelLogName formats a model name to match the logging structure. -func (c markdownTemplateContext) ModelLogName(modelName string) string { - modelPath := filepath.Join(c.ModelLogsPath, model.CleanModelNameForFileSystem(modelName)) + string(os.PathSeparator) - if !filepath.IsAbs(modelPath) { - // Ensure we reference the models relative to the Markdown file itself. - modelPath = "." + string(os.PathSeparator) + modelPath - } - - if osutil.IsWindows() { - // Markdown should be able to handle "/" for file paths. - modelPath = strings.ReplaceAll(modelPath, "\\", "/") - } - - return modelPath -} - -// markdownTemplate holds the template for a Markdown report. -var markdownTemplate = template.Must(template.New("template-report").Parse(bytesutil.StringTrimIndentations(` - # Evaluation from {{.DateTime.Format "2006-01-02 15:04:05"}} - - ![Bar chart that categorizes all evaluated models.]({{.SVGPath}}) - - This report was generated by [DevQualityEval benchmark](https://github.com/symflower/eval-dev-quality) in ` + "`" + `version {{.Version}}` + "`" + `. - - ## Results - - > Keep in mind that LLMs are nondeterministic. The following results just reflect a current snapshot. - - The results of all models have been divided into the following categories: - {{ range $category := .Categories -}} - - {{ $category.Name }}: {{ $category.Description }} - {{ end }} - The following sections list all models with their categories. The complete log of the evaluation with all outputs can be found [here]({{.LogPath}}). Detailed scoring can be found [here]({{.CSVPath}}). - - {{ range $category := .Categories -}} - {{ with $modelNames := index $.ModelsPerCategory $category -}} - ### Result category "{{ $category.Name }}" - - {{ $category.Description }} - - {{ range $modelName := $modelNames -}} - - [` + "`" + `{{ $modelName }}` + "`" + `]({{ $.ModelLogName $modelName }}) - {{ end }} - {{ end }} - {{- end -}} -`))) - -// barChartModelsPerCategoriesSVG generates a bar chart showing models per category and writes it out as an SVG. -func barChartModelsPerCategoriesSVG(writer io.Writer, categories []*metrics.AssessmentCategory, modelsPerCategory map[*metrics.AssessmentCategory][]string) (err error) { - bars := make([]chart.Value, 0, len(categories)) - maxCount := 0 - for _, category := range categories { - count := len(modelsPerCategory[category]) - if count > maxCount { - maxCount = count - } - if count == 0 { - continue - } - - bars = append(bars, chart.Value{ - Label: category.Name, - Value: float64(count), - }) - } - ticks := make([]chart.Tick, maxCount+1) - for i := range ticks { - ticks[i] = chart.Tick{ - Value: float64(i), - Label: strconv.Itoa(i), - } - } - graph := chart.BarChart{ - Title: "Models per Category", - Bars: bars, - YAxis: chart.YAxis{ - Ticks: ticks, - }, - - Background: chart.Style{ - Padding: chart.Box{ - Top: 60, - Bottom: 40, - }, - }, - Height: 300, - Width: (len(bars) + 2) * 60, - BarWidth: 60, - } - - if err := graph.Render(chart.SVG, writer); err != nil { - return pkgerrors.WithStack(err) - } - - return nil -} - -// format formats the markdown values in the template to the given writer. -func (m Markdown) format(writer io.Writer, markdownFileDirectoryPath string) (err error) { - templateContext := markdownTemplateContext{ - Markdown: m, - Categories: metrics.AllAssessmentCategories, - } - templateContext.ModelsPerCategory = make(map[*metrics.AssessmentCategory][]string, len(metrics.AllAssessmentCategories)) - for model, assessment := range m.AssessmentPerModel { - category := assessment.Category(m.TotalScore) - templateContext.ModelsPerCategory[category] = append(templateContext.ModelsPerCategory[category], model.ID()) - } - - svgFile, err := os.Create(filepath.Join(markdownFileDirectoryPath, m.SVGPath)) - if err != nil { - return pkgerrors.WithStack(err) - } - defer func() { - if e := svgFile.Close(); e != nil { - e = pkgerrors.WithStack(e) - if err == nil { - err = e - } else { - err = errors.Join(err, e) - } - } - }() - - if len(templateContext.AssessmentPerModel) > 0 { - if err := barChartModelsPerCategoriesSVG(svgFile, metrics.AllAssessmentCategories, templateContext.ModelsPerCategory); err != nil { - return pkgerrors.WithStack(err) - } - } - - if err := markdownTemplate.Execute(writer, templateContext); err != nil { - return pkgerrors.WithStack(err) - } - - return nil -} - -// WriteToFile renders the Markdown to the given file. -func (m Markdown) WriteToFile(path string) (err error) { - if err = os.MkdirAll(filepath.Dir(path), 0755); err != nil { - return pkgerrors.WithStack(err) - } - file, err := os.Create(path) - if err != nil { - return pkgerrors.WithStack(err) - } - defer func() { - if e := file.Close(); e != nil { - e = pkgerrors.WithStack(e) - if err == nil { - err = e - } else { - err = errors.Join(err, e) - } - } - }() - - if err := m.format(file, filepath.Dir(path)); err != nil { - return pkgerrors.WithStack(err) - } - - return nil -} diff --git a/evaluate/report/markdown_test.go b/evaluate/report/markdown_test.go deleted file mode 100644 index fd03c128..00000000 --- a/evaluate/report/markdown_test.go +++ /dev/null @@ -1,220 +0,0 @@ -package report - -import ( - "bytes" - "os" - "path/filepath" - "strings" - "testing" - "time" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - "github.com/zimmski/osutil" - "github.com/zimmski/osutil/bytesutil" - - "github.com/symflower/eval-dev-quality/evaluate/metrics" - modeltesting "github.com/symflower/eval-dev-quality/model/testing" -) - -func TestMarkdownWriteToFile(t *testing.T) { - type testCase struct { - Name string - - Markdown Markdown - - ExpectedReport string - ExpectedSVGFile string - ExpectedError error - } - - validate := func(t *testing.T, tc *testCase) { - t.Run(tc.Name, func(t *testing.T) { - temporaryDirectory := t.TempDir() - markdownFilePath := filepath.Join(temporaryDirectory, "REPORT.md") - - actualError := tc.Markdown.WriteToFile(markdownFilePath) - assert.Equal(t, tc.ExpectedError, actualError) - actualReport, err := os.ReadFile(markdownFilePath) - assert.NoError(t, err) - - assert.Equalf(t, bytesutil.StringTrimIndentations(tc.ExpectedReport), string(actualReport), "Full output:\n%s", actualReport) - - actualSVGContent, err := os.ReadFile(filepath.Join(temporaryDirectory, tc.Markdown.SVGPath)) - assert.NoError(t, err) - expectedSVGContent, err := os.ReadFile(tc.ExpectedSVGFile) - require.NoError(t, err) - expectedSVGContentCleaned := string(expectedSVGContent) - if osutil.IsWindows() { - expectedSVGContentCleaned = strings.ReplaceAll(expectedSVGContentCleaned, "\r", "") - } - assert.Equal(t, expectedSVGContentCleaned, string(actualSVGContent)) - }) - } - - testTimeString := "2000-01-01 00:00:00" - testTime, err := time.Parse(time.DateTime, testTimeString) - require.NoError(t, err) - - validate(t, &testCase{ - Name: "No Models", - - Markdown: Markdown{ - DateTime: testTime, - Version: "1234", - - CSVPath: "./file.csv", - LogPath: "./file.log", - ModelLogsPath: ".", - SVGPath: "./file.svg", - }, - - ExpectedReport: ` - # Evaluation from 2000-01-01 00:00:00 - - ![Bar chart that categorizes all evaluated models.](./file.svg) - - This report was generated by [DevQualityEval benchmark](https://github.com/symflower/eval-dev-quality) in ` + "`" + `version 1234` + "`" + `. - - ## Results - - > Keep in mind that LLMs are nondeterministic. The following results just reflect a current snapshot. - - The results of all models have been divided into the following categories: - - category unknown: Models in this category could not be categorized. - - response error: Models in this category encountered an error. - - no code: Models in this category produced no code. - - invalid code: Models in this category produced invalid code. - - executable code: Models in this category produced executable code. - - statement coverage reached: Models in this category produced code that reached full statement coverage. - - no excess response: Models in this category did not respond with more content than requested. - - The following sections list all models with their categories. The complete log of the evaluation with all outputs can be found [here](./file.log). Detailed scoring can be found [here](./file.csv). - - `, - ExpectedSVGFile: "testdata/empty.svg", - }) - - validate(t, &testCase{ - Name: "Simple Models", - - Markdown: Markdown{ - DateTime: testTime, - Version: "1234", - - CSVPath: "./file.csv", - LogPath: "./file.log", - ModelLogsPath: ".", - SVGPath: "./file.svg", - - TotalScore: 1, - AssessmentPerModel: AssessmentPerModel{ - modeltesting.NewMockCapabilityWriteTestsNamed(t, "model/response/error"): metrics.NewAssessments(), - modeltesting.NewMockCapabilityWriteTestsNamed(t, "model/no/code"): metrics.Assessments{ - metrics.AssessmentKeyResponseNoError: 1, - }, - }, - }, - - ExpectedReport: ` - # Evaluation from 2000-01-01 00:00:00 - - ![Bar chart that categorizes all evaluated models.](./file.svg) - - This report was generated by [DevQualityEval benchmark](https://github.com/symflower/eval-dev-quality) in ` + "`" + `version 1234` + "`" + `. - - ## Results - - > Keep in mind that LLMs are nondeterministic. The following results just reflect a current snapshot. - - The results of all models have been divided into the following categories: - - category unknown: Models in this category could not be categorized. - - response error: Models in this category encountered an error. - - no code: Models in this category produced no code. - - invalid code: Models in this category produced invalid code. - - executable code: Models in this category produced executable code. - - statement coverage reached: Models in this category produced code that reached full statement coverage. - - no excess response: Models in this category did not respond with more content than requested. - - The following sections list all models with their categories. The complete log of the evaluation with all outputs can be found [here](./file.log). Detailed scoring can be found [here](./file.csv). - - ### Result category "response error" - - Models in this category encountered an error. - - - [` + "`model/response/error`" + `](./model_response_error/) - - ### Result category "no code" - - Models in this category produced no code. - - - [` + "`model/no/code`" + `](./model_no_code/) - - `, - ExpectedSVGFile: "testdata/two_models.svg", - }) -} - -func TestBarChartModelsPerCategoriesSVG(t *testing.T) { - type testCase struct { - Name string - - Categories []*metrics.AssessmentCategory - ModelsPerCategory map[*metrics.AssessmentCategory]uint - - ExpectedFile string - ExpectedError error - } - - validate := func(t *testing.T, tc *testCase) { - t.Run(tc.Name, func(t *testing.T) { - var actualSVGContent bytes.Buffer - dummyModelsPerCategory := make(map[*metrics.AssessmentCategory][]string) - for category, count := range tc.ModelsPerCategory { - dummyModelsPerCategory[category] = make([]string, count) - } - - actualError := barChartModelsPerCategoriesSVG(&actualSVGContent, tc.Categories, dummyModelsPerCategory) - assert.Equal(t, tc.ExpectedError, actualError) - - expectedSVGContent, err := os.ReadFile(tc.ExpectedFile) - require.NoError(t, err) - expectedSVGContentCleaned := string(expectedSVGContent) - if osutil.IsWindows() { - expectedSVGContentCleaned = strings.ReplaceAll(expectedSVGContentCleaned, "\r", "") - } - assert.Equal(t, expectedSVGContentCleaned, actualSVGContent.String()) - }) - } - - validate(t, &testCase{ - Name: "Two Categories", - - Categories: []*metrics.AssessmentCategory{ - metrics.AssessmentCategoryResponseError, - metrics.AssessmentCategoryResponseNoCode, - }, - ModelsPerCategory: map[*metrics.AssessmentCategory]uint{ - metrics.AssessmentCategoryResponseError: 1, - metrics.AssessmentCategoryResponseNoCode: 3, - }, - - ExpectedFile: "testdata/two_categories.svg", - }) - - validate(t, &testCase{ - Name: "All Categories", - - Categories: metrics.AllAssessmentCategories, - ModelsPerCategory: map[*metrics.AssessmentCategory]uint{ - metrics.AssessmentCategoryResponseError: 1, - metrics.AssessmentCategoryResponseNoCode: 2, - metrics.AssessmentCategoryCodeInvalid: 3, - metrics.AssessmentCategoryCodeExecuted: 4, - metrics.AssessmentCategoryCodeCoverageStatementReached: 5, - metrics.AssessmentCategoryCodeNoExcess: 6, - }, - - ExpectedFile: "testdata/all_categories.svg", - }) -}