Skip to content

Commit

Permalink
Store models cost information along with the corresponding scoring in…
Browse files Browse the repository at this point in the history
… a CSV file, so it can be used for data visualization

Part of #296
  • Loading branch information
ruiAzevedo19 committed Jul 30, 2024
1 parent 74f81a9 commit c6ddd10
Show file tree
Hide file tree
Showing 6 changed files with 262 additions and 0 deletions.
23 changes: 23 additions & 0 deletions cmd/eval-dev-quality/cmd/report.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"github.com/symflower/eval-dev-quality/evaluate"
"github.com/symflower/eval-dev-quality/evaluate/report"
"github.com/symflower/eval-dev-quality/log"
"github.com/symflower/eval-dev-quality/provider/openrouter"
"github.com/symflower/eval-dev-quality/util"
)

Expand Down Expand Up @@ -80,6 +81,28 @@ func (command *Report) Execute(args []string) (err error) {
command.logger.Panicf("ERROR: %s", err)
}

// Create a CSV file that holds the models total scores along with the corresponding model costs.
var costsCSVFile *os.File
if costsCSVFile, err = util.CreateFileIfNotExists(filepath.Join(command.ResultPath, "costs.csv")); err != nil {
command.logger.Panicf("ERROR: %s", err)
}
defer costsCSVFile.Close()

modelsWithScores, err := report.NewModelsWithScores(records)
if err != nil {
command.logger.Panicf("ERROR: %s", err)
}
provider := openrouter.NewProvider().(*openrouter.Provider)
modelCosts, err := provider.ModelsCosts()
if err != nil {
command.logger.Panicf("ERROR: %s", err)
}
modelsWithScoresAndCosts := modelsWithScores.ModelsWithScoresAndCosts(modelCosts)
report.SortEvaluationRecords(modelsWithScoresAndCosts)
if err = report.WriteCostsCSV(costsCSVFile, modelsWithScoresAndCosts); err != nil {
command.logger.Panicf("ERROR: %s", err)
}

// Write markdown reports.
assessmentsPerModel, err := report.RecordsToAssessmentsPerModel(records)
if err != nil {
Expand Down
3 changes: 3 additions & 0 deletions cmd/eval-dev-quality/cmd/report_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@ func TestReportExecute(t *testing.T) {
expectedContent := fmt.Sprintf("%s\n%s", strings.Join(report.EvaluationHeader(), ","), claudeEvaluationCSVFileContent)
assert.Equal(t, expectedContent, data)
},
filepath.Join("result-directory", "costs.csv"): nil,
},
})
validate(t, &testCase{
Expand Down Expand Up @@ -213,6 +214,7 @@ func TestReportExecute(t *testing.T) {
expectedContent := fmt.Sprintf("%s\n%s%s%s", strings.Join(report.EvaluationHeader(), ","), claudeEvaluationCSVFileContent, gemmaEvaluationCSVFileContent, gpt4EvaluationCSVFileContent)
assert.Equal(t, expectedContent, data)
},
filepath.Join("result-directory", "costs.csv"): nil,
},
})
validate(t, &testCase{
Expand Down Expand Up @@ -253,6 +255,7 @@ func TestReportExecute(t *testing.T) {
expectedContent := fmt.Sprintf("%s\n%s%s%s", strings.Join(report.EvaluationHeader(), ","), claudeEvaluationCSVFileContent, gemmaEvaluationCSVFileContent, gpt4EvaluationCSVFileContent)
assert.Equal(t, expectedContent, data)
},
filepath.Join("result-directory", "costs.csv"): nil,
},
})
}
Expand Down
53 changes: 53 additions & 0 deletions evaluate/report/csv.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,59 @@ func assessmentFromRecord(assessmentFields []string) (assessments metrics.Assess
return assessments, nil
}

// ModelsWithScores holds all unique models with summed scores.
type ModelsWithScores map[string]uint64

// NewModelsWithScores returns all unique models with summed scores.
func NewModelsWithScores(records [][]string) (modelsWithScores ModelsWithScores, err error) {
modelsWithScores = map[string]uint64{}

for _, record := range records {
model := record[0]
modelScore, err := strconv.ParseUint(record[4], 10, 64)
if err != nil {
return nil, pkgerrors.WithStack(err)
}
if _, ok := modelsWithScores[model]; !ok {
modelsWithScores[model] = modelScore
} else {
modelsWithScores[model] += modelScore
}
}

return modelsWithScores, nil
}

// ModelsWithScoresAndCosts returns a list of records with each model's cost and scoring information.
func (r ModelsWithScores) ModelsWithScoresAndCosts(modelsWithCosts map[string]float64) (records [][]string) {
records = [][]string{}
for model, score := range r {
if _, ok := modelsWithCosts[model]; !ok {
records = append(records, []string{model, "0", strconv.FormatUint(score, 10)})
} else {
records = append(records, []string{model, strconv.FormatFloat(modelsWithCosts[model], 'f', -1, 64), strconv.FormatUint(score, 10)})
}
}

return records
}

// WriteCostsCSV writes a CSV file with each model's cost and score information.
func WriteCostsCSV(writer io.Writer, records [][]string) (err error) {
csv := csv.NewWriter(writer)

costsHeader := []string{"model-id", "model-cost", "score"}
if err := csv.Write(costsHeader); err != nil {
return pkgerrors.WithStack(err)
}
if err := csv.WriteAll(records); err != nil {
return pkgerrors.WithStack(err)
}
csv.Flush()

return nil
}

// SortEvaluationRecords sorts the evaluation records.
func SortEvaluationRecords(records [][]string) {
sort.Slice(records, func(i, j int) bool {
Expand Down
138 changes: 138 additions & 0 deletions evaluate/report/csv_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -480,3 +480,141 @@ func TestRecordsToAssessmentsPerModel(t *testing.T) {
},
})
}

func TestNewModelsWithScores(t *testing.T) {
type testCase struct {
Name string

Records [][]string

ExpectedModelsWithScores ModelsWithScores
}

validate := func(t *testing.T, tc *testCase) {
t.Run(tc.Name, func(t *testing.T) {
actualModelsWithScores, actualErr := NewModelsWithScores(tc.Records)
require.NoError(t, actualErr)

assert.Equal(t, tc.ExpectedModelsWithScores, actualModelsWithScores)
})
}

validate(t, &testCase{
Name: "Single record",

Records: [][]string{
[]string{"modelA", "languageB", "repositoryA", "taskA", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10"},
},

ExpectedModelsWithScores: ModelsWithScores{
"modelA": 0,
},
})
validate(t, &testCase{
Name: "Multiple records",

Records: [][]string{
[]string{"modelA", "languageB", "repositoryA", "taskA", "10", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10"},
[]string{"modelA", "languageB", "repositoryA", "taskA", "20", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10"},
[]string{"modelA", "languageB", "repositoryA", "taskA", "30", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10"},
[]string{"modelB", "languageB", "repositoryA", "taskA", "40", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10"},
[]string{"modelB", "languageB", "repositoryA", "taskA", "50", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10"},
[]string{"modelC", "languageB", "repositoryA", "taskA", "60", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10"},
},

ExpectedModelsWithScores: ModelsWithScores{
"modelA": 60,
"modelB": 90,
"modelC": 60,
},
})
}

func TestModelsWithScoresAndCosts(t *testing.T) {
type testCase struct {
Name string

ModelsWithScores ModelsWithScores

ModelsWithCosts map[string]float64

ExpectedRecords [][]string
}

validate := func(t *testing.T, tc *testCase) {
t.Run(tc.Name, func(t *testing.T) {
actualRecords := tc.ModelsWithScores.ModelsWithScoresAndCosts(tc.ModelsWithCosts)

assert.ElementsMatch(t, tc.ExpectedRecords, actualRecords)
})
}

validate(t, &testCase{
Name: "Model without costs information",

ModelsWithScores: ModelsWithScores{
"modelA": 60,
},

ExpectedRecords: [][]string{
[]string{"modelA", "0", "60"},
},
})
validate(t, &testCase{
Name: "Single model",

ModelsWithScores: ModelsWithScores{
"modelA": 60,
},

ModelsWithCosts: map[string]float64{
"modelA": 0.001,
},

ExpectedRecords: [][]string{
[]string{"modelA", "0.001", "60"},
},
})
validate(t, &testCase{
Name: "Multiple models",

ModelsWithScores: ModelsWithScores{
"modelA": 10,
"modelB": 20,
"modelC": 30,
"modelD": 40,
},

ModelsWithCosts: map[string]float64{
"modelA": 0.001,
"modelD": 0.002,
},

ExpectedRecords: [][]string{
[]string{"modelA", "0.001", "10"},
[]string{"modelB", "0", "20"},
[]string{"modelC", "0", "30"},
[]string{"modelD", "0.002", "40"},
},
})
}

func TestWriteCostsCSV(t *testing.T) {
var file strings.Builder
WriteCostsCSV(&file, [][]string{
[]string{"modelA", "0.001", "10"},
[]string{"modelB", "0", "20"},
[]string{"modelC", "0", "30"},
[]string{"modelD", "0.002", "40"},
})

expectedFileContent := bytesutil.StringTrimIndentations(`
model-id,model-cost,score
modelA,0.001,10
modelB,0,20
modelC,0,30
modelD,0.002,40
`)

assert.Equal(t, expectedFileContent, file.String())
}
34 changes: 34 additions & 0 deletions provider/openrouter/openrouter.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"io"
"net/http"
"net/url"
"strconv"
"strings"
"time"

Expand Down Expand Up @@ -146,6 +147,39 @@ func (p *Provider) fetchModels() (models ModelsList, err error) {
return models, nil
}

// ModelsCosts returns the costs information for the models.
func (p *Provider) ModelsCosts() (modelsCosts map[string]float64, err error) {
modelsCosts = map[string]float64{}

models, err := p.fetchModels()
if err != nil {
return nil, pkgerrors.WithStack(err)
}

for _, model := range models.Models {
prompt, err := strconv.ParseFloat(strings.TrimSpace(model.Pricing.Prompt), 64)
if err != nil {
return nil, pkgerrors.WithStack(err)
}
completion, err := strconv.ParseFloat(strings.TrimSpace(model.Pricing.Completion), 64)
if err != nil {
return nil, pkgerrors.WithStack(err)
}
request, err := strconv.ParseFloat(strings.TrimSpace(model.Pricing.Request), 64)
if err != nil {
return nil, pkgerrors.WithStack(err)
}
image, err := strconv.ParseFloat(strings.TrimSpace(model.Pricing.Image), 64)
if err != nil {
return nil, pkgerrors.WithStack(err)
}

modelsCosts[model.ID] = prompt + completion + request + image
}

return modelsCosts, nil
}

var _ provider.InjectToken = (*Provider)(nil)

// SetToken sets a potential token to be used in case the provider needs to authenticate a remote API.
Expand Down
11 changes: 11 additions & 0 deletions provider/openrouter/openrouter_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package openrouter

import (
"fmt"
"testing"

"github.com/stretchr/testify/assert"
Expand All @@ -15,3 +16,13 @@ func TestProviderModels(t *testing.T) {
require.NoError(t, err)
assert.NotEmpty(t, models)
}

func TestProviderModelsCosts(t *testing.T) {
provider := NewProvider().(*Provider)

modelsCosts, err := provider.ModelsCosts()
fmt.Println(modelsCosts)

require.NoError(t, err)
assert.NotEmpty(t, modelsCosts)
}

0 comments on commit c6ddd10

Please sign in to comment.