Skip to content

Commit

Permalink
Log model responses as artifact in separate file
Browse files Browse the repository at this point in the history
Part of #204
  • Loading branch information
ahumenberger committed Jul 19, 2024
1 parent de6e56d commit 9adadfe
Show file tree
Hide file tree
Showing 6 changed files with 131 additions and 16 deletions.
2 changes: 2 additions & 0 deletions cmd/eval-dev-quality/cmd/evaluate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -527,6 +527,7 @@ func TestEvaluateExecute(t *testing.T) {
},
filepath.Join("result-directory", "README.md"): nil,
filepath.Join("result-directory", string(evaluatetask.IdentifierWriteTests), "ollama_"+log.CleanModelNameForFileSystem(providertesting.OllamaTestModel), "golang", "golang", "plain", "evaluation.log"): nil,
filepath.Join("result-directory", string(evaluatetask.IdentifierWriteTests), "ollama_"+log.CleanModelNameForFileSystem(providertesting.OllamaTestModel), "golang", "golang", "plain", "response-1.log"): nil,
},
ExpectedOutputValidate: func(t *testing.T, output, resultPath string) {
assert.Contains(t, output, `Starting services for provider "ollama"`)
Expand Down Expand Up @@ -598,6 +599,7 @@ func TestEvaluateExecute(t *testing.T) {
},
filepath.Join("result-directory", "README.md"): nil,
filepath.Join("result-directory", string(evaluatetask.IdentifierWriteTests), "custom-ollama_"+log.CleanModelNameForFileSystem(providertesting.OllamaTestModel), "golang", "golang", "plain", "evaluation.log"): nil,
filepath.Join("result-directory", string(evaluatetask.IdentifierWriteTests), "custom-ollama_"+log.CleanModelNameForFileSystem(providertesting.OllamaTestModel), "golang", "golang", "plain", "response-1.log"): nil,
},
})
}
Expand Down
4 changes: 4 additions & 0 deletions evaluate/evaluate.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,8 @@ func Evaluate(ctx *Context) (assessments *report.AssessmentStore, totalScore uin
logger.Printf("Run %d/%d", rl+1, ctx.Runs)
}

logger := logger.With(log.AttributeKeyRun, rl+1)

for _, language := range ctx.Languages {
logger := logger.With(log.AttributeKeyLanguage, language.ID())

Expand Down Expand Up @@ -214,6 +216,8 @@ func Evaluate(ctx *Context) (assessments *report.AssessmentStore, totalScore uin
logger.Printf("Run %d/%d", rl+1, ctx.Runs)
}

logger := logger.With(log.AttributeKeyRun, rl+1)

for _, language := range ctx.Languages {
languageID := language.ID()
logger := logger.With(log.AttributeKeyLanguage, languageID)
Expand Down
2 changes: 2 additions & 0 deletions evaluate/evaluate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,7 @@ func TestEvaluate(t *testing.T) {
filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain", "evaluation.log"): func(t *testing.T, filePath, data string) {
assert.Contains(t, data, "Attempt 1/3: "+ErrEmptyResponseFromModel.Error())
},
filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain", "response-1.log"): nil,
"evaluation.csv": nil,
},
})
Expand Down Expand Up @@ -421,6 +422,7 @@ func TestEvaluate(t *testing.T) {
filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain", "evaluation.log"): func(t *testing.T, filePath, data string) {
assert.Contains(t, data, "DONE 0 tests, 1 error")
},
filepath.Join(string(evaluatetask.IdentifierWriteTests), log.CleanModelNameForFileSystem(mockedModelID), "golang", "golang", "plain", "response-1.log"): nil,
"evaluation.csv": nil,
},
})
Expand Down
91 changes: 79 additions & 12 deletions log/logger.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,20 @@ import (
type AttributeKey string

const (
AttributeKeyLanguage AttributeKey = "Language"
AttributeKeyArtifact AttributeKey = "Artifact"
AttributeKeyLanguage = "Language"
AttributeKeyModel = "Model"
AttributeKeyRepository = "Repository"
AttributeKeyResultPath = "ResultPath"
AttributeKeyRun = "Run"
AttributeKeyTask = "Task"
)

// Attribute returns a logging attribute.
func Attribute(key AttributeKey, value any) (attribute slog.Attr) {
return slog.Any(string(key), value)
}

// Flags defines how log messages should be printed.
type Flags int

Expand Down Expand Up @@ -104,6 +111,11 @@ func (l *Logger) Printf(format string, args ...any) {
l.Logger.Info(fmt.Sprintf(format, args...))
}

// PrintWith logs the given message at the "info" level.
func (l *Logger) PrintWith(message string, args ...any) {
l.Logger.Info(message, args...)
}

// Panicf is equivalent to "Printf" followed by a panic.
func (l *Logger) Panicf(format string, args ...any) {
message := fmt.Sprintf(format, args...)
Expand Down Expand Up @@ -169,19 +181,29 @@ func STDOUT() (logger *Logger) {

// newLogWriter returns a logger that writes to a file and to the parent logger at the same time.
func newLogWriter(parent io.Writer, filePath string) (writer io.Writer, err error) {
file, err := openLogFile(filePath)
if err != nil {
return nil, err
}
addOpenLogFile(file)

writer = io.MultiWriter(parent, file)

return writer, nil
}

// openLogFile opens the given file and creates it if necessary.
func openLogFile(filePath string) (file *os.File, err error) {
if err := os.MkdirAll(filepath.Dir(filePath), 0755); err != nil {
return nil, pkgerrors.WithStack(err)
}

file, err := os.OpenFile(filePath, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0644)
file, err = os.OpenFile(filePath, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0644)
if err != nil {
return nil, pkgerrors.WithStack(err)
}
addOpenLogFile(file)

writer = io.MultiWriter(parent, file)

return writer, nil
return file, nil
}

// spawningHandler is a structural logging handler which spawns a new log file if one of the given log file spawners triggers.
Expand Down Expand Up @@ -231,17 +253,36 @@ func (h *spawningHandler) Enabled(ctx context.Context, level slog.Level) bool {
}

// Handle handles the Record.
func (h *spawningHandler) Handle(ctx context.Context, record slog.Record) error {
func (h *spawningHandler) Handle(ctx context.Context, record slog.Record) (err error) {
writer := h.writer
attributes := maps.Clone(h.attributes)
record.Attrs(func(attribute slog.Attr) bool {
attributes[AttributeKey(attribute.Key)] = attribute.Value.String()

return true
})
for _, spawner := range artifactLogFileSpawners {
if !spawner.NeedsSpawn(attributes) {
continue
}

logFilePath := spawner.FilePath(attributes)
writer, err = newLogWriter(writer, logFilePath)
if err != nil {
return err
}
}

if h.flags&FlagDate != 0 {
fmt.Fprint(h.writer, record.Time.Format("2006/01/02"))
fmt.Fprint(h.writer, " ")
fmt.Fprint(writer, record.Time.Format("2006/01/02"))
fmt.Fprint(writer, " ")
}
if h.flags&FlagTime != 0 {
fmt.Fprint(h.writer, record.Time.Format("15:04:05"))
fmt.Fprint(h.writer, " ")
fmt.Fprint(writer, record.Time.Format("15:04:05"))
fmt.Fprint(writer, " ")
}

fmt.Fprintln(h.writer, record.Message)
fmt.Fprintln(writer, record.Message)

return nil
}
Expand Down Expand Up @@ -338,6 +379,32 @@ var defaultLogFileSpawners = []logFileSpawner{
},
}

var artifactLogFileSpawners = []logFileSpawner{
logFileSpawner{
NeededAttributes: []AttributeKey{
AttributeKeyResultPath,

AttributeKeyArtifact,
AttributeKeyLanguage,
AttributeKeyModel,
AttributeKeyRepository,
AttributeKeyRun,
AttributeKeyTask,
},
FilePath: func(attributes map[AttributeKey]string) string {
resultPath := attributes[AttributeKeyResultPath]
modelID := attributes[AttributeKeyModel]
languageID := attributes[AttributeKeyLanguage]
repositoryName := attributes[AttributeKeyRepository]
taskIdentifier := attributes[AttributeKeyTask]
run := attributes[AttributeKeyRun]
artifact := attributes[AttributeKeyArtifact]

return filepath.Join(resultPath, taskIdentifier, CleanModelNameForFileSystem(modelID), languageID, repositoryName, fmt.Sprintf("%s-%s.log", artifact, run))
},
},
}

// logFileSpawner defines when a new log file is spawned.
type logFileSpawner struct {
// NeededAttributes holds the list of attributes that need to be set in order to spawn a new log file.
Expand Down
39 changes: 39 additions & 0 deletions log/logger_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,45 @@ func TestLoggerWith(t *testing.T) {
filepath.Join("taskA", "modelA", "languageA", "repositoryB", "evaluation.log"): "",
},
})

t.Run("Artifacts", func(t *testing.T) {
validate(t, &testCase{
Name: "Response",

Do: func(logger *Logger, temporaryPath string) {
logger = logger.With(AttributeKeyResultPath, temporaryPath)
logger = logger.With(AttributeKeyLanguage, "languageA")
logger = logger.With(AttributeKeyModel, "modelA")
logger = logger.With(AttributeKeyRepository, "repositoryA")
logger = logger.With(AttributeKeyTask, "taskA")
logger = logger.With(AttributeKeyRun, "1")

logger.PrintWith("Artifact content", Attribute(AttributeKeyArtifact, "response"))
logger.Print("No artifact content")
},

ExpectedLogOutput: `
Spawning new log file at $TEMPORARY_PATH/evaluation.log
Spawning new log file at $TEMPORARY_PATH/taskA/modelA/languageA/repositoryA/evaluation.log
Artifact content
No artifact content
`,
ExpectedFiles: map[string]string{
"evaluation.log": `
Spawning new log file at $TEMPORARY_PATH/taskA/modelA/languageA/repositoryA/evaluation.log
Artifact content
No artifact content
`,
filepath.Join("taskA", "modelA", "languageA", "repositoryA", "evaluation.log"): `
Artifact content
No artifact content
`,
filepath.Join("taskA", "modelA", "languageA", "repositoryA", "response-1.log"): `
Artifact content
`,
},
})
})
}

func TestCleanModelNameForFileSystem(t *testing.T) {
Expand Down
9 changes: 5 additions & 4 deletions model/llm/llm.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package llm

import (
"context"
"fmt"
"os"
"path/filepath"
"strings"
Expand Down Expand Up @@ -170,17 +171,17 @@ func (m *Model) WriteTests(ctx model.Context) (assessment metrics.Assessments, e
return assessment, nil
}

func (m *Model) query(log *log.Logger, request string) (response string, duration time.Duration, err error) {
func (m *Model) query(logger *log.Logger, request string) (response string, duration time.Duration, err error) {
if err := retry.Do(
func() error {
log.Printf("Querying model %q with:\n%s", m.ID(), string(bytesutil.PrefixLines([]byte(request), []byte("\t"))))
logger.Printf("Querying model %q with:\n%s", m.ID(), string(bytesutil.PrefixLines([]byte(request), []byte("\t"))))
start := time.Now()
response, err = m.provider.Query(context.Background(), m.model, request)
if err != nil {
return err
}
duration = time.Since(start)
log.Printf("Model %q responded (%d ms) with:\n%s", m.ID(), duration.Milliseconds(), string(bytesutil.PrefixLines([]byte(response), []byte("\t"))))
logger.PrintWith(fmt.Sprintf("Model %q responded (%d ms) with:\n%s", m.ID(), duration.Milliseconds(), string(bytesutil.PrefixLines([]byte(response), []byte("\t")))), log.Attribute(log.AttributeKeyArtifact, "response"))

return nil
},
Expand All @@ -189,7 +190,7 @@ func (m *Model) query(log *log.Logger, request string) (response string, duratio
retry.DelayType(retry.BackOffDelay),
retry.LastErrorOnly(true),
retry.OnRetry(func(n uint, err error) {
log.Printf("Attempt %d/%d: %s", n+1, m.queryAttempts, err)
logger.Printf("Attempt %d/%d: %s", n+1, m.queryAttempts, err)
}),
); err != nil {
return "", 0, err
Expand Down

0 comments on commit 9adadfe

Please sign in to comment.