Skip to content

Commit

Permalink
Remove all occurrences of model costs and human-readable names, since…
Browse files Browse the repository at this point in the history
… they will be handled afterwards with tooling

Part of #237
  • Loading branch information
ruiAzevedo19 committed Jul 12, 2024
1 parent 16d55ab commit aa44c7a
Show file tree
Hide file tree
Showing 12 changed files with 73 additions and 293 deletions.
5 changes: 2 additions & 3 deletions cmd/eval-dev-quality/cmd/evaluate.go
Original file line number Diff line number Diff line change
Expand Up @@ -232,8 +232,7 @@ func (command *Evaluate) Initialize(args []string) (evaluationContext *evaluate.
command.logger.Panicf("ERROR: unknown custom provider %q for model %q", providerID, model)
}

modelName := strings.Split(model, provider.ProviderModelSeparator)
modelProvider.AddModel(llm.NewNamedModelWithCost(modelProvider, model, modelName[len(modelName)-1], 0))
modelProvider.AddModel(llm.NewModel(modelProvider, model))
}
}

Expand Down Expand Up @@ -433,7 +432,7 @@ func (command *Evaluate) evaluateLocal(evaluationContext *evaluate.Context) (err
}

_ = assessmentsPerModel.WalkByScore(func(model model.Model, assessment metrics.Assessments, score uint64) (err error) {
command.logger.Printf("Evaluation score for %q (%q): cost=%.2f, %s", model.ID(), assessment.Category(totalScore).ID, model.Cost(), assessment)
command.logger.Printf("Evaluation score for %q (%q): %s", model.ID(), assessment.Category(totalScore).ID, assessment)

return nil
})
Expand Down
4 changes: 2 additions & 2 deletions cmd/eval-dev-quality/cmd/evaluate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ var extractMetricsLogsMatch = extractMetricsMatch(regexp.MustCompile(`score=(\d+

// extractMetricsCSVMatch is a regular expression to extract metrics from CSV rows.
// REMARK The cost is not match as a group since it's just a model property that we carry along for informational purposes.
var extractMetricsCSVMatch = extractMetricsMatch(regexp.MustCompile(`(?:\d+(?:\.\d+)?,)?(\d+),(\d+),(\d+),(\d+),(\d+),(\d+),(\d+),(\d+),(\d+),(\d+)`))
var extractMetricsCSVMatch = extractMetricsMatch(regexp.MustCompile(`(\d+),(\d+),(\d+),(\d+),(\d+),(\d+),(\d+),(\d+),(\d+),(\d+)`))

// extractMetrics extracts multiple assessment metrics from the given string according to a given regular expression.
func extractMetrics(t *testing.T, regex extractMetricsMatch, data string) (assessments []metrics.Assessments, scores []uint64) {
Expand Down Expand Up @@ -532,7 +532,7 @@ func TestEvaluateExecute(t *testing.T) {
},

ExpectedOutputValidate: func(t *testing.T, output string, resultPath string) {
assert.Regexp(t, `Evaluation score for "symflower/symbolic-execution" \("code-no-excess"\): cost=0.00, score=28, coverage=20, files-executed=2, files-executed-maximum-reachable=2, generate-tests-for-file-character-count=508, processing-time=\d+, response-character-count=508, response-no-error=2, response-no-excess=2, response-with-code=2`, output)
assert.Regexp(t, `Evaluation score for "symflower/symbolic-execution" \("code-no-excess"\): score=28, coverage=20, files-executed=2, files-executed-maximum-reachable=2, generate-tests-for-file-character-count=508, processing-time=\d+, response-character-count=508, response-no-error=2, response-no-excess=2, response-with-code=2`, output)
assert.Equal(t, 1, strings.Count(output, "Evaluation score for"))
},
ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){
Expand Down
18 changes: 9 additions & 9 deletions evaluate/evaluate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ func TestEvaluate(t *testing.T) {

{
languageGolang := &golang.Language{}
mockedModel := modeltesting.NewMockCapabilityWriteTestsNamedWithCost(t, "empty-response-model", "Empty Response Model", 0.0001)
mockedModel := modeltesting.NewMockCapabilityWriteTestsNamed(t, "empty-response-model")
repositoryPath := filepath.Join("golang", "plain")

validate(t, &testCase{
Expand Down Expand Up @@ -443,7 +443,7 @@ func TestEvaluate(t *testing.T) {
{
languageGolang := &golang.Language{}
mockedModelID := "mocked-generation-model"
mockedModel := modeltesting.NewMockCapabilityWriteTestsNamedWithCost(t, mockedModelID, "Mocked Generation Model", 0.0001)
mockedModel := modeltesting.NewMockCapabilityWriteTestsNamed(t, mockedModelID)

validate(t, &testCase{
Name: "Problems of previous runs shouldn't cancel successive runs",
Expand Down Expand Up @@ -544,7 +544,7 @@ func TestEvaluate(t *testing.T) {
{
languageGolang := &golang.Language{}
mockedModelID := "mocked-generation-model"
mockedModel := modeltesting.NewMockCapabilityWriteTestsNamedWithCost(t, mockedModelID, "Mocked Generation Model", 0.0001)
mockedModel := modeltesting.NewMockCapabilityWriteTestsNamed(t, mockedModelID)

validate(t, &testCase{
Name: "Solving basic checks once is enough",
Expand Down Expand Up @@ -644,7 +644,7 @@ func TestEvaluate(t *testing.T) {
{
languageGolang := &golang.Language{}
mockedModelID := "mocked-generation-model"
mockedModel := modeltesting.NewMockCapabilityWriteTestsNamedWithCost(t, mockedModelID, "Mocked Generation Model", 0.0001)
mockedModel := modeltesting.NewMockCapabilityWriteTestsNamed(t, mockedModelID)

validate(t, &testCase{
Name: "Never solving basic checks leads to exclusion",
Expand Down Expand Up @@ -714,7 +714,7 @@ func TestEvaluate(t *testing.T) {
{
languageGolang := &golang.Language{}
mockedModelID := "mocked-generation-model"
mockedModel := modeltesting.NewMockCapabilityWriteTestsNamedWithCost(t, mockedModelID, "Mocked Generation Model", 0.0001)
mockedModel := modeltesting.NewMockCapabilityWriteTestsNamed(t, mockedModelID)

repositoryPath := filepath.Join("golang", "plain")
validate(t, &testCase{
Expand Down Expand Up @@ -785,7 +785,7 @@ func TestEvaluate(t *testing.T) {
{
languageGolang := &golang.Language{}
mockedModelID := "mocked-generation-model"
mockedModel := modeltesting.NewMockCapabilityWriteTestsNamedWithCost(t, mockedModelID, "Mocked Generation Model", 0.0001)
mockedModel := modeltesting.NewMockCapabilityWriteTestsNamed(t, mockedModelID)

repositoryPath := filepath.Join("golang", "plain")
validate(t, &testCase{
Expand Down Expand Up @@ -864,7 +864,7 @@ func TestEvaluate(t *testing.T) {
// Setup provider and model mocking.
languageGolang := &golang.Language{}
mockedModelID := "testing-provider/testing-model"
mockedModel := modeltesting.NewMockCapabilityWriteTestsNamedWithCost(t, mockedModelID, "Testing Model", 0.0001)
mockedModel := modeltesting.NewMockCapabilityWriteTestsNamed(t, mockedModelID)
mockedProviderID := "testing-provider"
mockedProvider := providertesting.NewMockProviderNamedWithModels(t, mockedProviderID, []model.Model{mockedModel})
mockedLoader := providertesting.NewMockLoader(t)
Expand Down Expand Up @@ -949,7 +949,7 @@ func TestEvaluate(t *testing.T) {
// Setup provider and model mocking.
languageGolang := &golang.Language{}
mockedModelID := "testing-provider/testing-model"
mockedModel := modeltesting.NewMockCapabilityWriteTestsNamedWithCost(t, mockedModelID, "Testing Model", 0.0001)
mockedModel := modeltesting.NewMockCapabilityWriteTestsNamed(t, mockedModelID)
mockedProviderID := "testing-provider"
mockedProvider := providertesting.NewMockProviderNamedWithModels(t, mockedProviderID, []model.Model{mockedModel})
mockedLoader := providertesting.NewMockLoader(t)
Expand Down Expand Up @@ -1033,7 +1033,7 @@ func TestEvaluate(t *testing.T) {
// Setup provider and model mocking.
languageGolang := &golang.Language{}
mockedModelID := "testing-provider/testing-model"
mockedModel := modeltesting.NewMockCapabilityWriteTestsNamedWithCost(t, mockedModelID, "Testing Model", 0.0001)
mockedModel := modeltesting.NewMockCapabilityWriteTestsNamed(t, mockedModelID)

repositoryPath := filepath.Join("golang", "plain")

Expand Down
25 changes: 7 additions & 18 deletions evaluate/report/csv.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ func (e *EvaluationFile) WriteEvaluationRecord(model model.Model, language langu

for _, task := range tasks {
assessment := assessmentsPerTask[task]
row := append([]string{model.ID(), model.Name(), strconv.FormatFloat(model.Cost(), 'f', -1, 64), language.ID(), repositoryName, string(task), strconv.FormatUint(uint64(assessment.Score()), 10)}, assessment.StringCSV()...)
row := append([]string{model.ID(), language.ID(), repositoryName, string(task), strconv.FormatUint(uint64(assessment.Score()), 10)}, assessment.StringCSV()...)
csv.Write(row)
}
csv.Flush()
Expand All @@ -70,17 +70,13 @@ func (e *EvaluationFile) WriteEvaluationRecord(model model.Model, language langu

// evaluationHeader returns the CSV header for the evaluation CSV.
func evaluationHeader() (header []string) {
return append([]string{"model-id", "model-name", "cost", "language", "repository", "task", "score"}, metrics.AllAssessmentKeysStrings...)
return append([]string{"model-id", "language", "repository", "task", "score"}, metrics.AllAssessmentKeysStrings...)
}

// EvaluationRecord holds a line of the evaluation CSV.
type EvaluationRecord struct {
// ModelID holds the model id.
ModelID string
// ModelName holds the model name.
ModelName string
// ModelCost holds the model cost.
ModelCost float64

// LanguageID holds the language id.
LanguageID string
Expand All @@ -94,8 +90,6 @@ func (e *EvaluationRecord) Clone() (new *EvaluationRecord) {
new = &EvaluationRecord{}

new.ModelID = e.ModelID
new.ModelName = e.ModelName
new.ModelCost = e.ModelCost
new.LanguageID = e.LanguageID
new.Assessments = metrics.Merge(e.Assessments, nil)

Expand Down Expand Up @@ -127,7 +121,7 @@ func (e EvaluationRecords) GroupByModel() EvaluationRecordsPerModel {

// Header returns the header description as a CSV row.
func (EvaluationRecordsPerModel) Header() (header []string) {
return append([]string{"model-id", "model-name", "cost", "score"}, metrics.AllAssessmentKeysStrings...)
return append([]string{"model-id", "score"}, metrics.AllAssessmentKeysStrings...)
}

// Rows returns all data as CSV rows.
Expand All @@ -141,9 +135,8 @@ func (e EvaluationRecordsPerModel) Rows() (rows [][]string) {
record := e[model]
metrics := record.Assessments.StringCSV()
score := record.Assessments.Score()
modelCost := record.ModelCost

row := append([]string{record.ModelID, record.ModelName, strconv.FormatFloat(modelCost, 'f', -1, 64), strconv.FormatUint(uint64(score), 10)}, metrics...)
row := append([]string{record.ModelID, strconv.FormatUint(uint64(score), 10)}, metrics...)
rows = append(rows, row)
}

Expand Down Expand Up @@ -217,15 +210,13 @@ func convertRawRecordToEvaluationRecord(raw []string) (record *EvaluationRecord,
assessments := metrics.NewAssessments()

modelID := raw[0]
modelName := raw[1]
modelCost, err := strconv.ParseFloat(raw[2], 64)
if err != nil {
return nil, pkgerrors.WithStack(err)
}

languageID := raw[3]
languageID := raw[1]

rawMetrics := raw[7:]
rawMetrics := raw[5:]
for i, assessementKey := range metrics.AllAssessmentKeysStrings {
metric, err := strconv.ParseUint(rawMetrics[i], 10, 64)
if err != nil {
Expand All @@ -236,9 +227,7 @@ func convertRawRecordToEvaluationRecord(raw []string) (record *EvaluationRecord,
}

return &EvaluationRecord{
ModelID: modelID,
ModelName: modelName,
ModelCost: modelCost,
ModelID: modelID,

LanguageID: languageID,

Expand Down
Loading

0 comments on commit aa44c7a

Please sign in to comment.