Skip to content

Commit

Permalink
Generate the "models-summed.csv" and "language-summed.csv" files base…
Browse files Browse the repository at this point in the history
…d on the "evaluation.csv" file

Part of #237
  • Loading branch information
ruiAzevedo19 committed Jul 8, 2024
1 parent 8c9af43 commit be6a6ef
Show file tree
Hide file tree
Showing 5 changed files with 861 additions and 252 deletions.
29 changes: 1 addition & 28 deletions cmd/eval-dev-quality/cmd/evaluate.go
Original file line number Diff line number Diff line change
Expand Up @@ -437,7 +437,7 @@ func (command *Evaluate) evaluateLocal(evaluationContext *evaluate.Context) (err
return nil
})

if err := writeCSVs(command.ResultPath, assessments); err != nil {
if err := report.WriteCSVs(command.ResultPath); err != nil {
command.logger.Panicf("ERROR: %s", err)
}

Expand Down Expand Up @@ -636,30 +636,3 @@ func (command *Evaluate) evaluateKubernetes(ctx *evaluate.Context) (err error) {

return nil
}

// WriteCSVs writes the various CSV reports to disk.
func writeCSVs(resultPath string, assessments *report.AssessmentStore) (err error) {
// Write the "models-summed.csv" containing the summary per model.
byModel := assessments.CollapseByModel()
csvByModel, err := report.GenerateCSV(byModel)
if err != nil {
return pkgerrors.Wrap(err, "could not create models-summed.csv summary")
}
if err := os.WriteFile(filepath.Join(resultPath, "models-summed.csv"), []byte(csvByModel), 0644); err != nil {
return pkgerrors.Wrap(err, "could not write models-summed.csv summary")
}

// Write the individual "language-summed.csv" containing the summary per model per language.
byLanguage := assessments.CollapseByLanguage()
for language, modelsByLanguage := range byLanguage {
csvByLanguage, err := report.GenerateCSV(modelsByLanguage)
if err != nil {
return pkgerrors.Wrap(err, "could not create "+language.ID()+"-summed.csv summary")
}
if err := os.WriteFile(filepath.Join(resultPath, language.ID()+"-summed.csv"), []byte(csvByLanguage), 0644); err != nil {
return pkgerrors.Wrap(err, "could not write "+language.ID()+"-summed.csv summary")
}
}

return nil
}
23 changes: 0 additions & 23 deletions evaluate/report/collection.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,6 @@ import (
"github.com/symflower/eval-dev-quality/task"
)

// AssessmentPerLanguagePerModel holds a collection of assessments per language and model.
type AssessmentPerLanguagePerModel map[language.Language]AssessmentPerModel

// AssessmentPerModel holds a collection of assessments per model.
type AssessmentPerModel map[model.Model]metrics.Assessments

Expand Down Expand Up @@ -133,23 +130,3 @@ func (a *AssessmentStore) CollapseByModel() AssessmentPerModel {

return perModel
}

// CollapseByLanguage returns all assessments aggregated per language and model.
func (a *AssessmentStore) CollapseByLanguage() AssessmentPerLanguagePerModel {
assessments := AssessmentPerLanguagePerModel{}
_ = a.Walk(func(m model.Model, l language.Language, r string, t task.Identifier, a metrics.Assessments) (err error) {
if _, ok := assessments[l]; !ok {
assessments[l] = map[model.Model]metrics.Assessments{}
}

if _, ok := assessments[l][m]; !ok {
assessments[l][m] = metrics.NewAssessments()
}

assessments[l][m].Add(a)

return nil
})

return assessments
}
123 changes: 0 additions & 123 deletions evaluate/report/collection_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -388,129 +388,6 @@ func TestAssessmentCollapseByModel(t *testing.T) {
})
}

func TestAssessmentCollapseByLanguage(t *testing.T) {
type testCase struct {
Name string

Assessments metricstesting.AssessmentTuples

ExpectedAssessmentPerLanguagePerModel AssessmentPerLanguagePerModel
}

validate := func(t *testing.T, tc *testCase) {
t.Run(tc.Name, func(t *testing.T) {
assessmentStore := assessmentTuplesToStore(tc.Assessments)

actualAssessmentPerLanguagePerModel := assessmentStore.CollapseByLanguage()

assert.Equal(t, tc.ExpectedAssessmentPerLanguagePerModel, actualAssessmentPerLanguagePerModel)
})
}

modelA := modeltesting.NewMockCapabilityWriteTestsNamed(t, "some-model-a")
modelB := modeltesting.NewMockCapabilityWriteTestsNamed(t, "some-model-b")
languageA := languagetesting.NewMockLanguageNamed(t, "some-language-a")
languageB := languagetesting.NewMockLanguageNamed(t, "some-language-b")

validate(t, &testCase{
Name: "Collapse",

Assessments: metricstesting.AssessmentTuples{
&metricstesting.AssessmentTuple{
Model: modelA,
Language: languageA,
RepositoryPath: "some-repository-a",
Task: evaluatetask.IdentifierWriteTests,
Assessment: metrics.Assessments{
metrics.AssessmentKeyResponseNoExcess: 1,
},
},
&metricstesting.AssessmentTuple{
Model: modelA,
Language: languageA,
RepositoryPath: "some-repository-b",
Task: evaluatetask.IdentifierWriteTests,
Assessment: metrics.Assessments{
metrics.AssessmentKeyResponseNoExcess: 2,
},
},
&metricstesting.AssessmentTuple{
Model: modelA,
Language: languageB,
RepositoryPath: "some-repository-a",
Task: evaluatetask.IdentifierWriteTests,
Assessment: metrics.Assessments{
metrics.AssessmentKeyResponseNoExcess: 3,
},
},
&metricstesting.AssessmentTuple{
Model: modelA,
Language: languageB,
RepositoryPath: "some-repository-b",
Task: evaluatetask.IdentifierWriteTests,
Assessment: metrics.Assessments{
metrics.AssessmentKeyResponseNoExcess: 4,
},
},
&metricstesting.AssessmentTuple{
Model: modelB,
Language: languageA,
RepositoryPath: "some-repository-a",
Task: evaluatetask.IdentifierWriteTests,
Assessment: metrics.Assessments{
metrics.AssessmentKeyResponseNoExcess: 5,
},
},
&metricstesting.AssessmentTuple{
Model: modelB,
Language: languageA,
RepositoryPath: "some-repository-b",
Task: evaluatetask.IdentifierWriteTests,
Assessment: metrics.Assessments{
metrics.AssessmentKeyResponseNoExcess: 6,
},
},
&metricstesting.AssessmentTuple{
Model: modelB,
Language: languageB,
RepositoryPath: "some-repository-a",
Task: evaluatetask.IdentifierWriteTests,
Assessment: metrics.Assessments{
metrics.AssessmentKeyResponseNoExcess: 7,
},
},
&metricstesting.AssessmentTuple{
Model: modelB,
Language: languageB,
RepositoryPath: "some-repository-b",
Task: evaluatetask.IdentifierWriteTests,
Assessment: metrics.Assessments{
metrics.AssessmentKeyResponseNoExcess: 8,
},
},
},

ExpectedAssessmentPerLanguagePerModel: AssessmentPerLanguagePerModel{
languageA: map[model.Model]metrics.Assessments{
modelA: {
metrics.AssessmentKeyResponseNoExcess: 3,
},
modelB: {
metrics.AssessmentKeyResponseNoExcess: 11,
},
},
languageB: map[model.Model]metrics.Assessments{
modelA: {
metrics.AssessmentKeyResponseNoExcess: 7,
},
modelB: {
metrics.AssessmentKeyResponseNoExcess: 15,
},
},
},
})
}

func assessmentTuplesToStore(at metricstesting.AssessmentTuples) (store *AssessmentStore) {
store = NewAssessmentStore()
for _, a := range at {
Expand Down
Loading

0 comments on commit be6a6ef

Please sign in to comment.