Skip to content

Commit

Permalink
Extract model names, to obtain a human-readable name for each model
Browse files Browse the repository at this point in the history
Closes #206
  • Loading branch information
ruiAzevedo19 committed Jun 26, 2024
1 parent 0af4eab commit 8b70010
Show file tree
Hide file tree
Showing 10 changed files with 64 additions and 26 deletions.
3 changes: 2 additions & 1 deletion cmd/eval-dev-quality/cmd/evaluate.go
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,8 @@ func (command *Evaluate) Initialize(args []string) (evaluationContext *evaluate.
command.logger.Panicf("ERROR: unknown custom provider %q for model %q", providerID, model)
}

modelProvider.AddModel(llm.NewModel(modelProvider, model))
modelName := strings.Split(model, provider.ProviderModelSeparator)
modelProvider.AddModel(llm.NewNamedModelWithCost(modelProvider, model, modelName[len(modelName)-1], 0))
}
}

Expand Down
8 changes: 4 additions & 4 deletions evaluate/report/csv.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ func GenerateCSV(formatter CSVFormatter) (csvData string, err error) {

// Header returns the header description as a CSV row.
func (a *AssessmentStore) Header() (header []string) {
return append([]string{"model", "cost", "language", "repository", "task", "score"}, metrics.AllAssessmentKeysStrings...)
return append([]string{"model-id", "model-name", "cost", "language", "repository", "task", "score"}, metrics.AllAssessmentKeysStrings...)
}

// Rows returns all data as CSV rows.
Expand All @@ -56,7 +56,7 @@ func (a *AssessmentStore) Rows() (rows [][]string) {
score := a.Score()
cost := m.Cost()

row := append([]string{m.ID(), strconv.FormatFloat(cost, 'f', -1, 64), l.ID(), r, string(t), strconv.FormatUint(uint64(score), 10)}, metrics...)
row := append([]string{m.ID(), m.Name(), strconv.FormatFloat(cost, 'f', -1, 64), l.ID(), r, string(t), strconv.FormatUint(uint64(score), 10)}, metrics...)
rows = append(rows, row)

return nil
Expand All @@ -67,7 +67,7 @@ func (a *AssessmentStore) Rows() (rows [][]string) {

// Header returns the header description as a CSV row.
func (a AssessmentPerModel) Header() (header []string) {
return append([]string{"model", "cost", "score"}, metrics.AllAssessmentKeysStrings...)
return append([]string{"model-id", "model-name", "cost", "score"}, metrics.AllAssessmentKeysStrings...)
}

// Rows returns all data as CSV rows.
Expand All @@ -82,7 +82,7 @@ func (a AssessmentPerModel) Rows() (rows [][]string) {
score := a[model].Score()
cost := model.Cost()

row := append([]string{model.ID(), strconv.FormatFloat(cost, 'f', -1, 64), strconv.FormatUint(uint64(score), 10)}, metrics...)
row := append([]string{model.ID(), model.Name(), strconv.FormatFloat(cost, 'f', -1, 64), strconv.FormatUint(uint64(score), 10)}, metrics...)
rows = append(rows, row)
}

Expand Down
32 changes: 16 additions & 16 deletions evaluate/report/csv_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ func TestGenerateCSVForAssessmentPerModelPerLanguagePerRepository(t *testing.T)

Assessments: metricstesting.AssessmentTuples{
&metricstesting.AssessmentTuple{
Model: modeltesting.NewMockModelNamedWithCosts(t, "some-model", 0),
Model: modeltesting.NewMockModelNamedWithCosts(t, "some-model", "Some Model", 0),
Language: languagetesting.NewMockLanguageNamed(t, "some-language"),
RepositoryPath: "some-repository",
Task: evaluatetask.IdentifierWriteTests,
Expand All @@ -47,16 +47,16 @@ func TestGenerateCSVForAssessmentPerModelPerLanguagePerRepository(t *testing.T)
},

ExpectedString: `
model,cost,language,repository,task,score,coverage,files-executed,generate-tests-for-file-character-count,processing-time,response-character-count,response-no-error,response-no-excess,response-with-code
some-model,0,some-language,some-repository,write-tests,0,0,0,0,0,0,0,0,0
model-id,model-name,cost,language,repository,task,score,coverage,files-executed,generate-tests-for-file-character-count,processing-time,response-character-count,response-no-error,response-no-excess,response-with-code
some-model,Some Model,0,some-language,some-repository,write-tests,0,0,0,0,0,0,0,0,0
`,
})
validate(t, &testCase{
Name: "Multiple Models",

Assessments: metricstesting.AssessmentTuples{
&metricstesting.AssessmentTuple{
Model: modeltesting.NewMockModelNamedWithCosts(t, "some-model-a", 0.0001),
Model: modeltesting.NewMockModelNamedWithCosts(t, "some-model-a", "Some Model A", 0.0001),
Language: languagetesting.NewMockLanguageNamed(t, "some-language"),
RepositoryPath: "some-repository",
Task: evaluatetask.IdentifierWriteTests,
Expand All @@ -72,7 +72,7 @@ func TestGenerateCSVForAssessmentPerModelPerLanguagePerRepository(t *testing.T)
},
},
&metricstesting.AssessmentTuple{
Model: modeltesting.NewMockModelNamedWithCosts(t, "some-model-b", 0.0005),
Model: modeltesting.NewMockModelNamedWithCosts(t, "some-model-b", "Some Model B", 0.0005),
Language: languagetesting.NewMockLanguageNamed(t, "some-language"),
RepositoryPath: "some-repository",
Task: evaluatetask.IdentifierWriteTests,
Expand All @@ -90,9 +90,9 @@ func TestGenerateCSVForAssessmentPerModelPerLanguagePerRepository(t *testing.T)
},

ExpectedString: `
model,cost,language,repository,task,score,coverage,files-executed,generate-tests-for-file-character-count,processing-time,response-character-count,response-no-error,response-no-excess,response-with-code
some-model-a,0.0001,some-language,some-repository,write-tests,15,1,2,50,200,100,3,4,5
some-model-b,0.0005,some-language,some-repository,write-tests,15,1,2,100,300,200,3,4,5
model-id,model-name,cost,language,repository,task,score,coverage,files-executed,generate-tests-for-file-character-count,processing-time,response-character-count,response-no-error,response-no-excess,response-with-code
some-model-a,Some Model A,0.0001,some-language,some-repository,write-tests,15,1,2,50,200,100,3,4,5
some-model-b,Some Model B,0.0005,some-language,some-repository,write-tests,15,1,2,100,300,200,3,4,5
`,
})
}
Expand All @@ -119,19 +119,19 @@ func TestGenerateCSVForAssessmentPerModel(t *testing.T) {
Name: "Single Empty Model",

Assessments: AssessmentPerModel{
modeltesting.NewMockModelNamedWithCosts(t, "some-model", 0): {},
modeltesting.NewMockModelNamedWithCosts(t, "some-model", "Some Model", 0): {},
},

ExpectedString: `
model,cost,score,coverage,files-executed,generate-tests-for-file-character-count,processing-time,response-character-count,response-no-error,response-no-excess,response-with-code
some-model,0,0,0,0,0,0,0,0,0,0
model-id,model-name,cost,score,coverage,files-executed,generate-tests-for-file-character-count,processing-time,response-character-count,response-no-error,response-no-excess,response-with-code
some-model,Some Model,0,0,0,0,0,0,0,0,0,0
`,
})
validate(t, &testCase{
Name: "Multiple Models",

Assessments: AssessmentPerModel{
modeltesting.NewMockModelNamedWithCosts(t, "some-model-a", 0.0001): {
modeltesting.NewMockModelNamedWithCosts(t, "some-model-a", "Some Model A", 0.0001): {
metrics.AssessmentKeyGenerateTestsForFileCharacterCount: 50,
metrics.AssessmentKeyResponseCharacterCount: 100,
metrics.AssessmentKeyCoverage: 1,
Expand All @@ -141,7 +141,7 @@ func TestGenerateCSVForAssessmentPerModel(t *testing.T) {
metrics.AssessmentKeyResponseWithCode: 5,
metrics.AssessmentKeyProcessingTime: 200,
},
modeltesting.NewMockModelNamedWithCosts(t, "some-model-b", 0.0005): {
modeltesting.NewMockModelNamedWithCosts(t, "some-model-b", "Some Model B", 0.0005): {
metrics.AssessmentKeyGenerateTestsForFileCharacterCount: 100,
metrics.AssessmentKeyResponseCharacterCount: 200,
metrics.AssessmentKeyCoverage: 1,
Expand All @@ -154,9 +154,9 @@ func TestGenerateCSVForAssessmentPerModel(t *testing.T) {
},

ExpectedString: `
model,cost,score,coverage,files-executed,generate-tests-for-file-character-count,processing-time,response-character-count,response-no-error,response-no-excess,response-with-code
some-model-a,0.0001,15,1,2,50,200,100,3,4,5
some-model-b,0.0005,15,1,2,100,300,200,3,4,5
model-id,model-name,cost,score,coverage,files-executed,generate-tests-for-file-character-count,processing-time,response-character-count,response-no-error,response-no-excess,response-with-code
some-model-a,Some Model A,0.0001,15,1,2,50,200,100,3,4,5
some-model-b,Some Model B,0.0005,15,1,2,100,300,200,3,4,5
`,
})
}
12 changes: 10 additions & 2 deletions model/llm/llm.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ type Model struct {
provider provider.Query
// model holds the identifier for the LLM model.
model string
// name holds the name for the LLM model.
name string

// queryAttempts holds the number of query attempts to perform when a model request errors in the process of solving a task.
queryAttempts uint
Expand All @@ -46,11 +48,12 @@ func NewModel(provider provider.Query, modelIdentifier string) *Model {
}
}

// NewModelWithCost returns an LLM model corresponding to the given identifier which is queried via the given provider, and with pricing information.
func NewModelWithCost(provider provider.Query, modelIdentifier string, cost float64) *Model {
// NewNamedModelWithCost returns an LLM model corresponding to the given identifier which is queried via the given provider, and with name and pricing information.
func NewNamedModelWithCost(provider provider.Query, modelIdentifier string, name string, cost float64) *Model {
return &Model{
provider: provider,
model: modelIdentifier,
name: name,

queryAttempts: 1,

Expand Down Expand Up @@ -135,6 +138,11 @@ func (m *Model) ID() (id string) {
return m.model
}

// Name returns the name of this model.
func (m *Model) Name() (name string) {
return m.name
}

// IsTaskSupported returns whether the model supports the given task or not.
func (m *Model) IsTaskSupported(taskIdentifier task.Identifier) (isSupported bool) {
switch taskIdentifier {
Expand Down
2 changes: 2 additions & 0 deletions model/model.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ import (
type Model interface {
// ID returns the unique ID of this model.
ID() (id string)
// Name returns the human-readable name of this model.
Name() (name string)

// IsTaskSupported returns whether the model supports the given task or not.
IsTaskSupported(taskIdentifier task.Identifier) (isSupported bool)
Expand Down
5 changes: 5 additions & 0 deletions model/symflower/symflower.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,11 @@ func (m *Model) ID() (id string) {
return "symflower" + provider.ProviderModelSeparator + "symbolic-execution"
}

// Name returns the name of this model.
func (m *Model) Name() (name string) {
return "Symbolic Execution"
}

// IsTaskSupported returns whether the model supports the given task or not.
func (m *Model) IsTaskSupported(taskIdentifier task.Identifier) (isSupported bool) {
switch taskIdentifier {
Expand Down
18 changes: 18 additions & 0 deletions model/testing/Model_mock_gen.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion model/testing/helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,10 @@ func NewMockModelNamed(t *testing.T, id string) *MockModel {
}

// NewMockModelNamed returns a new named mocked model with cost information.
func NewMockModelNamedWithCosts(t *testing.T, id string, cost float64) *MockModel {
func NewMockModelNamedWithCosts(t *testing.T, id string, name string, cost float64) *MockModel {
m := NewMockModel(t)
m.On("ID").Return(id).Maybe()
m.On("Name").Return(name).Maybe()
m.On("Cost").Return(cost).Maybe()

return m
Expand Down
2 changes: 1 addition & 1 deletion provider/ollama/ollama.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ func (p *Provider) Models() (models []model.Model, err error) {

models = make([]model.Model, len(ms))
for i, modelName := range ms {
models[i] = llm.NewModel(p, p.ID()+provider.ProviderModelSeparator+modelName)
models[i] = llm.NewNamedModelWithCost(p, p.ID()+provider.ProviderModelSeparator+modelName, modelName, 0)
}

return models, nil
Expand Down
5 changes: 4 additions & 1 deletion provider/openrouter/openrouter.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ type ModelsList struct {
type Model struct {
// ID holds the model id.
ID string `json:"id"`
// Name holds the model name.
Name string `json:"name"`

// Pricing holds the pricing information of a model.
Pricing Pricing `json:"pricing"`
}
Expand Down Expand Up @@ -93,7 +96,7 @@ func (p *Provider) Models() (models []model.Model, err error) {
if err != nil {
return nil, err
}
models[i] = llm.NewModelWithCost(p, p.ID()+provider.ProviderModelSeparator+model.ID, cost)
models[i] = llm.NewNamedModelWithCost(p, p.ID()+provider.ProviderModelSeparator+model.ID, model.Name, cost)
}

return models, nil
Expand Down

0 comments on commit 8b70010

Please sign in to comment.