Store models cost information along with the corresponding scoring in…

… a CSV file, so it can be used for data visualization Part of #296
symflower · Jul 30, 2024 · c6ddd10 · c6ddd10
1 parent 74f81a9
commit c6ddd10
Show file tree

Hide file tree

Showing 6 changed files with 262 additions and 0 deletions.
diff --git a/cmd/eval-dev-quality/cmd/report.go b/cmd/eval-dev-quality/cmd/report.go
@@ -12,6 +12,7 @@ import (
 	"github.com/symflower/eval-dev-quality/evaluate"
 	"github.com/symflower/eval-dev-quality/evaluate/report"
 	"github.com/symflower/eval-dev-quality/log"
+	"github.com/symflower/eval-dev-quality/provider/openrouter"
 	"github.com/symflower/eval-dev-quality/util"
 )
 
@@ -80,6 +81,28 @@ func (command *Report) Execute(args []string) (err error) {
 		command.logger.Panicf("ERROR: %s", err)
 	}
 
+	// Create a CSV file that holds the models total scores along with the corresponding model costs.
+	var costsCSVFile *os.File
+	if costsCSVFile, err = util.CreateFileIfNotExists(filepath.Join(command.ResultPath, "costs.csv")); err != nil {
+		command.logger.Panicf("ERROR: %s", err)
+	}
+	defer costsCSVFile.Close()
+
+	modelsWithScores, err := report.NewModelsWithScores(records)
+	if err != nil {
+		command.logger.Panicf("ERROR: %s", err)
+	}
+	provider := openrouter.NewProvider().(*openrouter.Provider)
+	modelCosts, err := provider.ModelsCosts()
+	if err != nil {
+		command.logger.Panicf("ERROR: %s", err)
+	}
+	modelsWithScoresAndCosts := modelsWithScores.ModelsWithScoresAndCosts(modelCosts)
+	report.SortEvaluationRecords(modelsWithScoresAndCosts)
+	if err = report.WriteCostsCSV(costsCSVFile, modelsWithScoresAndCosts); err != nil {
+		command.logger.Panicf("ERROR: %s", err)
+	}
+
 	// Write markdown reports.
 	assessmentsPerModel, err := report.RecordsToAssessmentsPerModel(records)
 	if err != nil {

diff --git a/cmd/eval-dev-quality/cmd/report_test.go b/cmd/eval-dev-quality/cmd/report_test.go
@@ -170,6 +170,7 @@ func TestReportExecute(t *testing.T) {
 				expectedContent := fmt.Sprintf("%s\n%s", strings.Join(report.EvaluationHeader(), ","), claudeEvaluationCSVFileContent)
 				assert.Equal(t, expectedContent, data)
 			},
+			filepath.Join("result-directory", "costs.csv"): nil,
 		},
 	})
 	validate(t, &testCase{
@@ -213,6 +214,7 @@ func TestReportExecute(t *testing.T) {
 				expectedContent := fmt.Sprintf("%s\n%s%s%s", strings.Join(report.EvaluationHeader(), ","), claudeEvaluationCSVFileContent, gemmaEvaluationCSVFileContent, gpt4EvaluationCSVFileContent)
 				assert.Equal(t, expectedContent, data)
 			},
+			filepath.Join("result-directory", "costs.csv"): nil,
 		},
 	})
 	validate(t, &testCase{
@@ -253,6 +255,7 @@ func TestReportExecute(t *testing.T) {
 				expectedContent := fmt.Sprintf("%s\n%s%s%s", strings.Join(report.EvaluationHeader(), ","), claudeEvaluationCSVFileContent, gemmaEvaluationCSVFileContent, gpt4EvaluationCSVFileContent)
 				assert.Equal(t, expectedContent, data)
 			},
+			filepath.Join("result-directory", "costs.csv"): nil,
 		},
 	})
 }

diff --git a/evaluate/report/csv.go b/evaluate/report/csv.go
@@ -140,6 +140,59 @@ func assessmentFromRecord(assessmentFields []string) (assessments metrics.Assess
 	return assessments, nil
 }
 
+// ModelsWithScores holds all unique models with summed scores.
+type ModelsWithScores map[string]uint64
+
+// NewModelsWithScores returns all unique models with summed scores.
+func NewModelsWithScores(records [][]string) (modelsWithScores ModelsWithScores, err error) {
+	modelsWithScores = map[string]uint64{}
+
+	for _, record := range records {
+		model := record[0]
+		modelScore, err := strconv.ParseUint(record[4], 10, 64)
+		if err != nil {
+			return nil, pkgerrors.WithStack(err)
+		}
+		if _, ok := modelsWithScores[model]; !ok {
+			modelsWithScores[model] = modelScore
+		} else {
+			modelsWithScores[model] += modelScore
+		}
+	}
+
+	return modelsWithScores, nil
+}
+
+// ModelsWithScoresAndCosts returns a list of records with each model's cost and scoring information.
+func (r ModelsWithScores) ModelsWithScoresAndCosts(modelsWithCosts map[string]float64) (records [][]string) {
+	records = [][]string{}
+	for model, score := range r {
+		if _, ok := modelsWithCosts[model]; !ok {
+			records = append(records, []string{model, "0", strconv.FormatUint(score, 10)})
+		} else {
+			records = append(records, []string{model, strconv.FormatFloat(modelsWithCosts[model], 'f', -1, 64), strconv.FormatUint(score, 10)})
+		}
+	}
+
+	return records
+}
+
+// WriteCostsCSV writes a CSV file with each model's cost and score information.
+func WriteCostsCSV(writer io.Writer, records [][]string) (err error) {
+	csv := csv.NewWriter(writer)
+
+	costsHeader := []string{"model-id", "model-cost", "score"}
+	if err := csv.Write(costsHeader); err != nil {
+		return pkgerrors.WithStack(err)
+	}
+	if err := csv.WriteAll(records); err != nil {
+		return pkgerrors.WithStack(err)
+	}
+	csv.Flush()
+
+	return nil
+}
+
 // SortEvaluationRecords sorts the evaluation records.
 func SortEvaluationRecords(records [][]string) {
 	sort.Slice(records, func(i, j int) bool {

diff --git a/evaluate/report/csv_test.go b/evaluate/report/csv_test.go
@@ -480,3 +480,141 @@ func TestRecordsToAssessmentsPerModel(t *testing.T) {
 		},
 	})
 }
+
+func TestNewModelsWithScores(t *testing.T) {
+	type testCase struct {
+		Name string
+
+		Records [][]string
+
+		ExpectedModelsWithScores ModelsWithScores
+	}
+
+	validate := func(t *testing.T, tc *testCase) {
+		t.Run(tc.Name, func(t *testing.T) {
+			actualModelsWithScores, actualErr := NewModelsWithScores(tc.Records)
+			require.NoError(t, actualErr)
+
+			assert.Equal(t, tc.ExpectedModelsWithScores, actualModelsWithScores)
+		})
+	}
+
+	validate(t, &testCase{
+		Name: "Single record",
+
+		Records: [][]string{
+			[]string{"modelA", "languageB", "repositoryA", "taskA", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10"},
+		},
+
+		ExpectedModelsWithScores: ModelsWithScores{
+			"modelA": 0,
+		},
+	})
+	validate(t, &testCase{
+		Name: "Multiple records",
+
+		Records: [][]string{
+			[]string{"modelA", "languageB", "repositoryA", "taskA", "10", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10"},
+			[]string{"modelA", "languageB", "repositoryA", "taskA", "20", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10"},
+			[]string{"modelA", "languageB", "repositoryA", "taskA", "30", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10"},
+			[]string{"modelB", "languageB", "repositoryA", "taskA", "40", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10"},
+			[]string{"modelB", "languageB", "repositoryA", "taskA", "50", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10"},
+			[]string{"modelC", "languageB", "repositoryA", "taskA", "60", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10"},
+		},
+
+		ExpectedModelsWithScores: ModelsWithScores{
+			"modelA": 60,
+			"modelB": 90,
+			"modelC": 60,
+		},
+	})
+}
+
+func TestModelsWithScoresAndCosts(t *testing.T) {
+	type testCase struct {
+		Name string
+
+		ModelsWithScores ModelsWithScores
+
+		ModelsWithCosts map[string]float64
+
+		ExpectedRecords [][]string
+	}
+
+	validate := func(t *testing.T, tc *testCase) {
+		t.Run(tc.Name, func(t *testing.T) {
+			actualRecords := tc.ModelsWithScores.ModelsWithScoresAndCosts(tc.ModelsWithCosts)
+
+			assert.ElementsMatch(t, tc.ExpectedRecords, actualRecords)
+		})
+	}
+
+	validate(t, &testCase{
+		Name: "Model without costs information",
+
+		ModelsWithScores: ModelsWithScores{
+			"modelA": 60,
+		},
+
+		ExpectedRecords: [][]string{
+			[]string{"modelA", "0", "60"},
+		},
+	})
+	validate(t, &testCase{
+		Name: "Single model",
+
+		ModelsWithScores: ModelsWithScores{
+			"modelA": 60,
+		},
+
+		ModelsWithCosts: map[string]float64{
+			"modelA": 0.001,
+		},
+
+		ExpectedRecords: [][]string{
+			[]string{"modelA", "0.001", "60"},
+		},
+	})
+	validate(t, &testCase{
+		Name: "Multiple models",
+
+		ModelsWithScores: ModelsWithScores{
+			"modelA": 10,
+			"modelB": 20,
+			"modelC": 30,
+			"modelD": 40,
+		},
+
+		ModelsWithCosts: map[string]float64{
+			"modelA": 0.001,
+			"modelD": 0.002,
+		},
+
+		ExpectedRecords: [][]string{
+			[]string{"modelA", "0.001", "10"},
+			[]string{"modelB", "0", "20"},
+			[]string{"modelC", "0", "30"},
+			[]string{"modelD", "0.002", "40"},
+		},
+	})
+}
+
+func TestWriteCostsCSV(t *testing.T) {
+	var file strings.Builder
+	WriteCostsCSV(&file, [][]string{
+		[]string{"modelA", "0.001", "10"},
+		[]string{"modelB", "0", "20"},
+		[]string{"modelC", "0", "30"},
+		[]string{"modelD", "0.002", "40"},
+	})
+
+	expectedFileContent := bytesutil.StringTrimIndentations(`
+		model-id,model-cost,score
+		modelA,0.001,10
+		modelB,0,20
+		modelC,0,30
+		modelD,0.002,40
+	`)
+
+	assert.Equal(t, expectedFileContent, file.String())
+}
diff --git a/provider/openrouter/openrouter.go b/provider/openrouter/openrouter.go
@@ -7,6 +7,7 @@ import (
 	"io"
 	"net/http"
 	"net/url"
+	"strconv"
 	"strings"
 	"time"
 
@@ -146,6 +147,39 @@ func (p *Provider) fetchModels() (models ModelsList, err error) {
 	return models, nil
 }
 
+// ModelsCosts returns the costs information for the models.
+func (p *Provider) ModelsCosts() (modelsCosts map[string]float64, err error) {
+	modelsCosts = map[string]float64{}
+
+	models, err := p.fetchModels()
+	if err != nil {
+		return nil, pkgerrors.WithStack(err)
+	}
+
+	for _, model := range models.Models {
+		prompt, err := strconv.ParseFloat(strings.TrimSpace(model.Pricing.Prompt), 64)
+		if err != nil {
+			return nil, pkgerrors.WithStack(err)
+		}
+		completion, err := strconv.ParseFloat(strings.TrimSpace(model.Pricing.Completion), 64)
+		if err != nil {
+			return nil, pkgerrors.WithStack(err)
+		}
+		request, err := strconv.ParseFloat(strings.TrimSpace(model.Pricing.Request), 64)
+		if err != nil {
+			return nil, pkgerrors.WithStack(err)
+		}
+		image, err := strconv.ParseFloat(strings.TrimSpace(model.Pricing.Image), 64)
+		if err != nil {
+			return nil, pkgerrors.WithStack(err)
+		}
+
+		modelsCosts[model.ID] = prompt + completion + request + image
+	}
+
+	return modelsCosts, nil
+}
+
 var _ provider.InjectToken = (*Provider)(nil)
 
 // SetToken sets a potential token to be used in case the provider needs to authenticate a remote API.

diff --git a/provider/openrouter/openrouter_test.go b/provider/openrouter/openrouter_test.go
@@ -1,6 +1,7 @@
 package openrouter
 
 import (
+	"fmt"
 	"testing"
 
 	"github.com/stretchr/testify/assert"
@@ -15,3 +16,13 @@ func TestProviderModels(t *testing.T) {
 	require.NoError(t, err)
 	assert.NotEmpty(t, models)
 }
+
+func TestProviderModelsCosts(t *testing.T) {
+	provider := NewProvider().(*Provider)
+
+	modelsCosts, err := provider.ModelsCosts()
+	fmt.Println(modelsCosts)
+
+	require.NoError(t, err)
+	assert.NotEmpty(t, modelsCosts)
+}