Skip to content

Commit

Permalink
Generate the "models-summed.csv" and "language-summed.csv" files base…
Browse files Browse the repository at this point in the history
…d on the "evaluation.csv" file

Part of #237
  • Loading branch information
ruiAzevedo19 committed Jul 4, 2024
1 parent fc5a8af commit efbd939
Show file tree
Hide file tree
Showing 3 changed files with 888 additions and 79 deletions.
29 changes: 1 addition & 28 deletions cmd/eval-dev-quality/cmd/evaluate.go
Original file line number Diff line number Diff line change
Expand Up @@ -437,7 +437,7 @@ func (command *Evaluate) evaluateLocal(evaluationContext *evaluate.Context) (err
return nil
})

if err := writeCSVs(command.ResultPath, assessments); err != nil {
if err := report.WriteCSVs(command.ResultPath); err != nil {
command.logger.Panicf("ERROR: %s", err)
}

Expand Down Expand Up @@ -626,30 +626,3 @@ func (command *Evaluate) evaluateKubernetes(ctx *evaluate.Context) (err error) {

return nil
}

// WriteCSVs writes the various CSV reports to disk.
func writeCSVs(resultPath string, assessments *report.AssessmentStore) (err error) {
// Write the "models-summed.csv" containing the summary per model.
byModel := assessments.CollapseByModel()
csvByModel, err := report.GenerateCSV(byModel)
if err != nil {
return pkgerrors.Wrap(err, "could not create models-summed.csv summary")
}
if err := os.WriteFile(filepath.Join(resultPath, "models-summed.csv"), []byte(csvByModel), 0644); err != nil {
return pkgerrors.Wrap(err, "could not write models-summed.csv summary")
}

// Write the individual "language-summed.csv" containing the summary per model per language.
byLanguage := assessments.CollapseByLanguage()
for language, modelsByLanguage := range byLanguage {
csvByLanguage, err := report.GenerateCSV(modelsByLanguage)
if err != nil {
return pkgerrors.Wrap(err, "could not create "+language.ID()+"-summed.csv summary")
}
if err := os.WriteFile(filepath.Join(resultPath, language.ID()+"-summed.csv"), []byte(csvByLanguage), 0644); err != nil {
return pkgerrors.Wrap(err, "could not write "+language.ID()+"-summed.csv summary")
}
}

return nil
}
265 changes: 243 additions & 22 deletions evaluate/report/csv.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"encoding/csv"
"os"
"path/filepath"
"reflect"
"slices"
"strconv"
"strings"
Expand All @@ -26,50 +27,109 @@ type CSVFormatter interface {
Rows() (rows [][]string)
}

// GenerateCSV returns the whole CSV as string.
func GenerateCSV(formatter CSVFormatter) (csvData string, err error) {
var out strings.Builder
csv := csv.NewWriter(&out)
// EvaluationRecord holds a line of the evaluation CSV.
type EvaluationRecord struct {
// ModelID holds the model id.
ModelID string
// ModelName holds the model name.
ModelName string
// ModelCost holds the model cost.
ModelCost float64

if err := csv.Write(formatter.Header()); err != nil {
return "", pkgerrors.WithStack(err)
}
// LanguageID holds the language id.
LanguageID string

for _, row := range formatter.Rows() {
if err := csv.Write(row); err != nil {
return "", pkgerrors.WithStack(err)
// Assessments holds the assessments of an entry.
Assessments metrics.Assessments
}

// Clone clones an evaluation record.
func (e *EvaluationRecord) Clone() (new *EvaluationRecord) {
new = &EvaluationRecord{}

new.ModelID = e.ModelID
new.ModelName = e.ModelName
new.ModelCost = e.ModelCost
new.LanguageID = e.LanguageID
new.Assessments = metrics.Merge(e.Assessments, nil)

return new
}

// EvaluationRecords holds all the evaluation records.
type EvaluationRecords []*EvaluationRecord

// EvaluationRecordsPerModel holds the collection of evaluation records per model.
type EvaluationRecordsPerModel map[string]*EvaluationRecord

// GroupByModel groups the evaluation records by model.
func (e EvaluationRecords) GroupByModel() EvaluationRecordsPerModel {
perModel := map[string]*EvaluationRecord{}

for _, record := range e {
_, ok := perModel[record.ModelID]
if !ok {
perModel[record.ModelID] = record.Clone()
} else {
r := perModel[record.ModelID]
r.Assessments = metrics.Merge(r.Assessments, record.Assessments)
}
}

csv.Flush()

return out.String(), nil
return perModel
}

// Header returns the header description as a CSV row.
func (a AssessmentPerModel) Header() (header []string) {
func (EvaluationRecordsPerModel) Header() (header []string) {
return append([]string{"model-id", "model-name", "cost", "score"}, metrics.AllAssessmentKeysStrings...)
}

// Rows returns all data as CSV rows.
func (a AssessmentPerModel) Rows() (rows [][]string) {
models := maps.Keys(a)
slices.SortStableFunc(models, func(a, b model.Model) int {
return cmp.Compare(a.ID(), b.ID())
func (e EvaluationRecordsPerModel) Rows() (rows [][]string) {
models := maps.Keys(e)
slices.SortStableFunc(models, func(a, b string) int {
return cmp.Compare(a, b)
})

for _, model := range models {
metrics := a[model].StringCSV()
score := a[model].Score()
cost := model.Cost()
record := e[model]
metrics := record.Assessments.StringCSV()
score := record.Assessments.Score()
modelCost := record.ModelCost

row := append([]string{model.ID(), model.Name(), strconv.FormatFloat(cost, 'f', -1, 64), strconv.FormatUint(uint64(score), 10)}, metrics...)
row := append([]string{record.ModelID, record.ModelName, strconv.FormatFloat(modelCost, 'f', -1, 64), strconv.FormatUint(uint64(score), 10)}, metrics...)
rows = append(rows, row)
}

return rows
}

// EvaluationRecordsPerModel holds the collection of evaluation records per model.
type EvaluationRecordsPerLanguagePerModel map[string]EvaluationRecordsPerModel

// GroupByLanguageAndModel groups the evaluation records by language and model.
func (e EvaluationRecords) GroupByLanguageAndModel() EvaluationRecordsPerLanguagePerModel {
perLanguageAndModel := map[string]EvaluationRecordsPerModel{}

for _, record := range e {
perModel, ok := perLanguageAndModel[record.LanguageID]
if !ok {
perLanguageAndModel[record.LanguageID] = EvaluationRecordsPerModel{
record.ModelID: record,
}
} else {
_, ok := perModel[record.ModelID]
if !ok {
perModel[record.ModelID] = record.Clone()
} else {
perModel[record.ModelID].Assessments = metrics.Merge(perModel[record.ModelID].Assessments, record.Assessments)
}
}
}

return perLanguageAndModel
}

// Evaluation header returns the CSV header for the evaluation CSV.
func EvaluationHeader() (header []string) {
return append([]string{"model-id", "model-name", "cost", "language", "repository", "task", "score"}, metrics.AllAssessmentKeysStrings...)
Expand Down Expand Up @@ -121,3 +181,164 @@ func WriteEvaluationRecord(resultPath string, model model.Model, language langua

return nil
}

// loadEvaluationRecords reads and returns the evaluation records from the evaluation CSV file.
func loadEvaluationRecords(resultPath string) (evaluationRecords EvaluationRecords, err error) {
evaluationFilePath := filepath.Join(resultPath, "evaluation.csv")
evaluationFile, err := os.Open(evaluationFilePath)
if err != nil {
return nil, pkgerrors.WithStack(err)
}
defer evaluationFile.Close()

reader := csv.NewReader(evaluationFile)

// Check if the evaluation CSV header is correct.
if header, err := reader.Read(); err != nil {
return nil, pkgerrors.Wrap(err, "file is empty")
} else if !reflect.DeepEqual(header, EvaluationHeader()) {
return nil, pkgerrors.WithStack(pkgerrors.Errorf("expected header %+v\nfound header %+v", EvaluationHeader(), header))
}

// Read the raw records from the evaluation CSV file.
records, err := reader.ReadAll()
if err != nil {
return nil, pkgerrors.WithStack(err)
}

// Convert the raw records into assessments that can be easily manipulated.
evaluationRecords = EvaluationRecords{}
for _, record := range records {
evaluationRecord, err := convertRawRecordToEvaluationRecord(record)
if err != nil {
return nil, err
}
evaluationRecords = append(evaluationRecords, evaluationRecord)
}

return evaluationRecords, nil
}

// convertRawRecordToEvaluationRecord converts a raw CSV record into an evaluation record.
func convertRawRecordToEvaluationRecord(raw []string) (record *EvaluationRecord, err error) {
assessments := metrics.NewAssessments()

modelID := raw[0]
modelName := raw[1]
modelCost, err := strconv.ParseFloat(raw[2], 64)
if err != nil {
return nil, pkgerrors.WithStack(err)
}

languageID := raw[3]

coverage, err := strconv.ParseUint(raw[7], 10, 64)
if err != nil {
return nil, pkgerrors.WithStack(err)
}
assessments[metrics.AssessmentKeyCoverage] = coverage

filesExecuted, err := strconv.ParseUint(raw[8], 10, 64)
if err != nil {
return nil, pkgerrors.WithStack(err)
}
assessments[metrics.AssessmentKeyFilesExecuted] = filesExecuted

generateTestsForFileCharacterCount, err := strconv.ParseUint(raw[9], 10, 64)
if err != nil {
return nil, pkgerrors.WithStack(err)
}
assessments[metrics.AssessmentKeyGenerateTestsForFileCharacterCount] = generateTestsForFileCharacterCount

processingTime, err := strconv.ParseUint(raw[10], 10, 64)
if err != nil {
return nil, pkgerrors.WithStack(err)
}
assessments[metrics.AssessmentKeyProcessingTime] = processingTime

responseCharacterCount, err := strconv.ParseUint(raw[11], 10, 64)
if err != nil {
return nil, pkgerrors.WithStack(err)
}
assessments[metrics.AssessmentKeyResponseCharacterCount] = responseCharacterCount

responseNoError, err := strconv.ParseUint(raw[12], 10, 64)
if err != nil {
return nil, pkgerrors.WithStack(err)
}
assessments[metrics.AssessmentKeyResponseNoError] = responseNoError

responseNoExcess, err := strconv.ParseUint(raw[13], 10, 64)
if err != nil {
return nil, pkgerrors.WithStack(err)
}
assessments[metrics.AssessmentKeyResponseNoExcess] = responseNoExcess

responseWithCode, err := strconv.ParseUint(raw[14], 10, 64)
if err != nil {
return nil, pkgerrors.WithStack(err)
}
assessments[metrics.AssessmentKeyResponseWithCode] = responseWithCode

return &EvaluationRecord{
ModelID: modelID,
ModelName: modelName,
ModelCost: modelCost,

LanguageID: languageID,

Assessments: assessments,
}, nil
}

// generateCSV returns the whole CSV as string.
func generateCSV(formatter CSVFormatter) (csvData string, err error) {
var out strings.Builder
csv := csv.NewWriter(&out)

if err := csv.Write(formatter.Header()); err != nil {
return "", pkgerrors.WithStack(err)
}

for _, row := range formatter.Rows() {
if err := csv.Write(row); err != nil {
return "", pkgerrors.WithStack(err)
}
}

csv.Flush()

return out.String(), nil
}

// WriteCSVs writes the various CSV reports to disk.
func WriteCSVs(resultPath string) (err error) {
evaluationRecords, err := loadEvaluationRecords(resultPath)
if err != nil {
return err
}

// Write the "models-summed.csv" containing the summary per model.
perModel := evaluationRecords.GroupByModel()
csvByModel, err := generateCSV(perModel)
if err != nil {
return pkgerrors.Wrap(err, "could not create models-summed.csv summary")
}
if err := os.WriteFile(filepath.Join(resultPath, "models-summed.csv"), []byte(csvByModel), 0644); err != nil {
return pkgerrors.Wrap(err, "could not write models-summed.csv summary")
}

// Write the individual "language-summed.csv" containing the summary per model per language.
perLanguage := evaluationRecords.GroupByLanguageAndModel()
for language, modelsByLanguage := range perLanguage {
csvByLanguage, err := generateCSV(modelsByLanguage)
if err != nil {
return pkgerrors.Wrap(err, "could not create "+language+"-summed.csv summary")
}
if err := os.WriteFile(filepath.Join(resultPath, language+"-summed.csv"), []byte(csvByLanguage), 0644); err != nil {
return pkgerrors.Wrap(err, "could not write "+language+"-summed.csv summary")
}
}

return nil
}
Loading

0 comments on commit efbd939

Please sign in to comment.