diff --git a/cmd/eval-dev-quality/cmd/command.go b/cmd/eval-dev-quality/cmd/command.go index 74808743..676885b7 100644 --- a/cmd/eval-dev-quality/cmd/command.go +++ b/cmd/eval-dev-quality/cmd/command.go @@ -31,6 +31,10 @@ func Execute(logger *log.Logger, arguments []string) { c.SetLogger(logger) } + if c, ok := command.(SetArguments); ok { + c.SetArguments(arguments) + } + return command.Execute(args) } @@ -51,3 +55,9 @@ type SetLogger interface { // SetLogger sets the logger of the command. SetLogger(logger *log.Logger) } + +// SetArguments defines a command that allows to set its arguments. +type SetArguments interface { + // SetArguments sets the commands arguments. + SetArguments(args []string) +} diff --git a/cmd/eval-dev-quality/cmd/evaluate.go b/cmd/eval-dev-quality/cmd/evaluate.go index 44c0bc42..d2a252ab 100644 --- a/cmd/eval-dev-quality/cmd/evaluate.go +++ b/cmd/eval-dev-quality/cmd/evaluate.go @@ -6,6 +6,7 @@ import ( "fmt" "os" "os/exec" + "os/user" "path/filepath" "slices" "sort" @@ -82,6 +83,8 @@ type Evaluate struct { // Namespace the namespace under which the kubernetes resources should be created. Namespace string `long:"namespace" description:"The Namespace which should be used for kubernetes resources." default:"eval-dev-quality"` + // args holds a list of all the passed arguments. + args []string // logger holds the logger of the command. logger *log.Logger // timestamp holds the timestamp of the command execution. @@ -95,6 +98,14 @@ func (command *Evaluate) SetLogger(logger *log.Logger) { command.logger = logger } +var _ SetArguments = (*Evaluate)(nil) + +// SetArguments sets the commands arguments. +func (command *Evaluate) SetArguments(args []string) { + availableFlags := util.Flags(command) + command.args = util.FilterArgsKeep(args, availableFlags) +} + // Initialize initializes the command according to the arguments. func (command *Evaluate) Initialize(args []string) (evaluationContext *evaluate.Context, cleanup func()) { // Ensure the cleanup always runs in case there is a panic. @@ -448,18 +459,31 @@ func (command *Evaluate) evaluateLocal(evaluationContext *evaluate.Context) (err // evaluateDocker executes the evaluation for each model inside a docker container. func (command *Evaluate) evaluateDocker(ctx *evaluate.Context) (err error) { - availableFlags := util.Flags(command) ignoredFlags := []string{ "model", "parallel", "result-path", + "runtime-image", "runtime", } - // Filter all the args to only contain flags which can be used. - args := util.FilterArgsKeep(os.Args[2:], availableFlags) // Filter the args to remove all flags unsuited for running the container. - args = util.FilterArgsRemove(args, ignoredFlags) + args := util.FilterArgsRemove(command.args, ignoredFlags) + + // Get current user for volume ID mapping. + user, err := user.Current() + if err != nil { + return pkgerrors.WithStack(err) + } + + resultPath, err := filepath.Abs(command.ResultPath) + if err != nil { + return err + } + // Set permission 777 so the non-root docker image is able to store its results inside the result path. + if err := os.Chmod(resultPath, 0777); err != nil { + return err + } parallel := util.NewParallel(command.Parallel) @@ -472,22 +496,14 @@ func (command *Evaluate) evaluateDocker(ctx *evaluate.Context) (err error) { continue } - // Create for each model a dedicated subfolder inside the results path. - resultPath, err := filepath.Abs(command.ResultPath) - if err != nil { - return err - } - // Set permission 777 so the non-root docker image is able to store its results inside the result path. - if err := os.Chmod(resultPath, 0777); err != nil { - return err - } - // Commands regarding the docker runtime. dockerCommand := []string{ "docker", "run", "-v", // bind volume resultPath + ":/home/ubuntu/evaluation", + "--user", + user.Uid + ":" + user.Gid, "--rm", // automatically remove container after it finished command.RuntimeImage, } @@ -533,6 +549,7 @@ func (command *Evaluate) evaluateKubernetes(ctx *evaluate.Context) (err error) { "model", "parallel", "result-path", + "runtime-image", "runtime", } diff --git a/cmd/eval-dev-quality/cmd/evaluate_test.go b/cmd/eval-dev-quality/cmd/evaluate_test.go index 51a7fb2b..b21696f5 100644 --- a/cmd/eval-dev-quality/cmd/evaluate_test.go +++ b/cmd/eval-dev-quality/cmd/evaluate_test.go @@ -1,9 +1,11 @@ package cmd import ( + "context" "fmt" "net/url" "os" + "os/exec" "path/filepath" "regexp" "sort" @@ -25,6 +27,7 @@ import ( providertesting "github.com/symflower/eval-dev-quality/provider/testing" "github.com/symflower/eval-dev-quality/tools" toolstesting "github.com/symflower/eval-dev-quality/tools/testing" + "github.com/symflower/eval-dev-quality/util" ) // validateReportLinks checks if the Markdown report data contains all the links to other relevant report files. @@ -768,6 +771,243 @@ func TestEvaluateExecute(t *testing.T) { }) }) + t.Run("Runtime", func(t *testing.T) { + // Skip containerized runtime tests if special cases. + if osutil.IsWindows() { + t.Skip("Docker runtime not supported on Windows") + } else if _, err := exec.LookPath("docker"); err != nil { + t.Skip("Docker runtime not found") + } + + // Prerequisite + dockerImage := "" + { + // Get current branch name + _, logger := log.Buffer() + branch, err := util.CommandWithResult(context.Background(), logger, &util.Command{ + Command: []string{ + "git", + "branch", + "--show-current", + }, + }) + assert.NoError(t, err) + + dockerImage = "ghcr.io/symflower/eval-dev-quality:" + strings.TrimSpace(branch) + + // Pull the image + _, err = util.CommandWithResult(context.Background(), logger, &util.Command{ + Command: []string{ + "docker", + "pull", + dockerImage, + }, + }) + assert.NoError(t, err) + } + + validate(t, &testCase{ + Name: "Docker", + + Arguments: []string{ + "--runtime", "docker", + "--model", "symflower/symbolic-execution", + "--model", "symflower/symbolic-execution", + "--model", "symflower/symbolic-execution", + "--testdata", "testdata/", // Our own tests set the "testdata" argument to the temporary directory that they create. This temporary directory does not exist in docker, so set the "testdata" manually here to overrule the testing behavior and use the original one. + "--repository", filepath.Join("golang", "plain"), + "--repository", filepath.Join("java", "plain"), + "--runs=1", + "--parallel=3", + "--runtime-image=" + dockerImage, + }, + + ExpectedOutputValidate: func(t *testing.T, output string, resultPath string) { + actualAssessments := validateMetrics(t, extractMetricsLogsMatch, output, []metrics.Assessments{ + metrics.Assessments{ + metrics.AssessmentKeyCoverage: 40, + metrics.AssessmentKeyFilesExecuted: 4, + metrics.AssessmentKeyResponseNoError: 4, + metrics.AssessmentKeyResponseNoExcess: 4, + metrics.AssessmentKeyResponseWithCode: 4, + }, + metrics.Assessments{ + metrics.AssessmentKeyCoverage: 40, + metrics.AssessmentKeyFilesExecuted: 4, + metrics.AssessmentKeyResponseNoError: 4, + metrics.AssessmentKeyResponseNoExcess: 4, + metrics.AssessmentKeyResponseWithCode: 4, + }, + metrics.Assessments{ + metrics.AssessmentKeyCoverage: 40, + metrics.AssessmentKeyFilesExecuted: 4, + metrics.AssessmentKeyResponseNoError: 4, + metrics.AssessmentKeyResponseNoExcess: 4, + metrics.AssessmentKeyResponseWithCode: 4, + }, + }, []uint64{56, 56, 56}) + // Assert non-deterministic behavior. + assert.Greater(t, actualAssessments[0][metrics.AssessmentKeyProcessingTime], uint64(0)) + assert.Equal(t, uint64(786), actualAssessments[0][metrics.AssessmentKeyGenerateTestsForFileCharacterCount]) + assert.Equal(t, uint64(786), actualAssessments[0][metrics.AssessmentKeyResponseCharacterCount]) + assert.Equal(t, 3, strings.Count(output, "Evaluation score for")) + }, + ExpectedResultFiles: map[string]func(t *testing.T, filePath string, data string){ + filepath.Join("result-directory", "evaluation.log"): nil, + + // Parallel run 1 + filepath.Join("result-directory", "symflower", "symbolic-execution", "categories.svg"): nil, + filepath.Join("result-directory", "symflower", "symbolic-execution", "evaluation.csv"): func(t *testing.T, filePath, data string) { + actualAssessments := validateMetrics(t, extractMetricsCSVMatch, data, []metrics.Assessments{ + metrics.Assessments{ + metrics.AssessmentKeyCoverage: 10, + metrics.AssessmentKeyFilesExecuted: 1, + metrics.AssessmentKeyResponseNoError: 1, + metrics.AssessmentKeyResponseNoExcess: 1, + metrics.AssessmentKeyResponseWithCode: 1, + }, + metrics.Assessments{ + metrics.AssessmentKeyCoverage: 10, + metrics.AssessmentKeyFilesExecuted: 1, + metrics.AssessmentKeyResponseNoError: 1, + metrics.AssessmentKeyResponseNoExcess: 1, + metrics.AssessmentKeyResponseWithCode: 1, + }, + metrics.Assessments{ + metrics.AssessmentKeyCoverage: 10, + metrics.AssessmentKeyFilesExecuted: 1, + metrics.AssessmentKeyResponseNoError: 1, + metrics.AssessmentKeyResponseNoExcess: 1, + metrics.AssessmentKeyResponseWithCode: 1, + }, + metrics.Assessments{ + metrics.AssessmentKeyCoverage: 10, + metrics.AssessmentKeyFilesExecuted: 1, + metrics.AssessmentKeyResponseNoError: 1, + metrics.AssessmentKeyResponseNoExcess: 1, + metrics.AssessmentKeyResponseWithCode: 1, + }, + }, []uint64{14, 14, 14, 14}) + // Assert non-deterministic behavior. + assert.Greater(t, actualAssessments[0][metrics.AssessmentKeyProcessingTime], uint64(0)) + assert.Equal(t, uint64(254), actualAssessments[0][metrics.AssessmentKeyGenerateTestsForFileCharacterCount]) + assert.Equal(t, uint64(254), actualAssessments[0][metrics.AssessmentKeyResponseCharacterCount]) + }, + filepath.Join("result-directory", "symflower", "symbolic-execution", "evaluation.log"): nil, + filepath.Join("result-directory", "symflower", "symbolic-execution", "golang-summed.csv"): nil, + filepath.Join("result-directory", "symflower", "symbolic-execution", "java-summed.csv"): nil, + filepath.Join("result-directory", "symflower", "symbolic-execution", "models-summed.csv"): nil, + filepath.Join("result-directory", "symflower", "symbolic-execution", "README.md"): nil, + filepath.Join("result-directory", "symflower", "symbolic-execution", string(evaluatetask.IdentifierWriteTests), "symflower_symbolic-execution", "golang", "golang", "plain.log"): func(t *testing.T, filePath, data string) { + assert.Equal(t, 1, strings.Count(data, `Evaluating model "symflower/symbolic-execution"`)) + }, + filepath.Join("result-directory", "symflower", "symbolic-execution", string(evaluatetask.IdentifierWriteTests), "symflower_symbolic-execution", "java", "java", "plain.log"): func(t *testing.T, filePath, data string) { + assert.Equal(t, 1, strings.Count(data, `Evaluating model "symflower/symbolic-execution"`)) + }, + + // Parallel run 2 + filepath.Join("result-directory", "symflower", "symbolic-execution-0", "categories.svg"): nil, + filepath.Join("result-directory", "symflower", "symbolic-execution-0", "evaluation.csv"): func(t *testing.T, filePath, data string) { + actualAssessments := validateMetrics(t, extractMetricsCSVMatch, data, []metrics.Assessments{ + metrics.Assessments{ + metrics.AssessmentKeyCoverage: 10, + metrics.AssessmentKeyFilesExecuted: 1, + metrics.AssessmentKeyResponseNoError: 1, + metrics.AssessmentKeyResponseNoExcess: 1, + metrics.AssessmentKeyResponseWithCode: 1, + }, + metrics.Assessments{ + metrics.AssessmentKeyCoverage: 10, + metrics.AssessmentKeyFilesExecuted: 1, + metrics.AssessmentKeyResponseNoError: 1, + metrics.AssessmentKeyResponseNoExcess: 1, + metrics.AssessmentKeyResponseWithCode: 1, + }, + metrics.Assessments{ + metrics.AssessmentKeyCoverage: 10, + metrics.AssessmentKeyFilesExecuted: 1, + metrics.AssessmentKeyResponseNoError: 1, + metrics.AssessmentKeyResponseNoExcess: 1, + metrics.AssessmentKeyResponseWithCode: 1, + }, + metrics.Assessments{ + metrics.AssessmentKeyCoverage: 10, + metrics.AssessmentKeyFilesExecuted: 1, + metrics.AssessmentKeyResponseNoError: 1, + metrics.AssessmentKeyResponseNoExcess: 1, + metrics.AssessmentKeyResponseWithCode: 1, + }, + }, []uint64{14, 14, 14, 14}) + // Assert non-deterministic behavior. + assert.Greater(t, actualAssessments[0][metrics.AssessmentKeyProcessingTime], uint64(0)) + assert.Equal(t, uint64(254), actualAssessments[0][metrics.AssessmentKeyGenerateTestsForFileCharacterCount]) + assert.Equal(t, uint64(254), actualAssessments[0][metrics.AssessmentKeyResponseCharacterCount]) + }, + filepath.Join("result-directory", "symflower", "symbolic-execution-0", "evaluation.log"): nil, + filepath.Join("result-directory", "symflower", "symbolic-execution-0", "golang-summed.csv"): nil, + filepath.Join("result-directory", "symflower", "symbolic-execution-0", "java-summed.csv"): nil, + filepath.Join("result-directory", "symflower", "symbolic-execution-0", "models-summed.csv"): nil, + filepath.Join("result-directory", "symflower", "symbolic-execution-0", "README.md"): nil, + filepath.Join("result-directory", "symflower", "symbolic-execution-0", string(evaluatetask.IdentifierWriteTests), "symflower_symbolic-execution", "golang", "golang", "plain.log"): func(t *testing.T, filePath, data string) { + assert.Equal(t, 1, strings.Count(data, `Evaluating model "symflower/symbolic-execution"`)) + }, + filepath.Join("result-directory", "symflower", "symbolic-execution-0", string(evaluatetask.IdentifierWriteTests), "symflower_symbolic-execution", "java", "java", "plain.log"): func(t *testing.T, filePath, data string) { + assert.Equal(t, 1, strings.Count(data, `Evaluating model "symflower/symbolic-execution"`)) + }, + + // Parallel run 3 + filepath.Join("result-directory", "symflower", "symbolic-execution-1", "categories.svg"): nil, + filepath.Join("result-directory", "symflower", "symbolic-execution-1", "evaluation.csv"): func(t *testing.T, filePath, data string) { + actualAssessments := validateMetrics(t, extractMetricsCSVMatch, data, []metrics.Assessments{ + metrics.Assessments{ + metrics.AssessmentKeyCoverage: 10, + metrics.AssessmentKeyFilesExecuted: 1, + metrics.AssessmentKeyResponseNoError: 1, + metrics.AssessmentKeyResponseNoExcess: 1, + metrics.AssessmentKeyResponseWithCode: 1, + }, + metrics.Assessments{ + metrics.AssessmentKeyCoverage: 10, + metrics.AssessmentKeyFilesExecuted: 1, + metrics.AssessmentKeyResponseNoError: 1, + metrics.AssessmentKeyResponseNoExcess: 1, + metrics.AssessmentKeyResponseWithCode: 1, + }, + metrics.Assessments{ + metrics.AssessmentKeyCoverage: 10, + metrics.AssessmentKeyFilesExecuted: 1, + metrics.AssessmentKeyResponseNoError: 1, + metrics.AssessmentKeyResponseNoExcess: 1, + metrics.AssessmentKeyResponseWithCode: 1, + }, + metrics.Assessments{ + metrics.AssessmentKeyCoverage: 10, + metrics.AssessmentKeyFilesExecuted: 1, + metrics.AssessmentKeyResponseNoError: 1, + metrics.AssessmentKeyResponseNoExcess: 1, + metrics.AssessmentKeyResponseWithCode: 1, + }, + }, []uint64{14, 14, 14, 14}) + // Assert non-deterministic behavior. + assert.Greater(t, actualAssessments[0][metrics.AssessmentKeyProcessingTime], uint64(0)) + assert.Equal(t, uint64(254), actualAssessments[0][metrics.AssessmentKeyGenerateTestsForFileCharacterCount]) + assert.Equal(t, uint64(254), actualAssessments[0][metrics.AssessmentKeyResponseCharacterCount]) + }, + filepath.Join("result-directory", "symflower", "symbolic-execution-1", "evaluation.log"): nil, + filepath.Join("result-directory", "symflower", "symbolic-execution-1", "golang-summed.csv"): nil, + filepath.Join("result-directory", "symflower", "symbolic-execution-1", "java-summed.csv"): nil, + filepath.Join("result-directory", "symflower", "symbolic-execution-1", "models-summed.csv"): nil, + filepath.Join("result-directory", "symflower", "symbolic-execution-1", "README.md"): nil, + filepath.Join("result-directory", "symflower", "symbolic-execution-1", string(evaluatetask.IdentifierWriteTests), "symflower_symbolic-execution", "golang", "golang", "plain.log"): func(t *testing.T, filePath, data string) { + assert.Equal(t, 1, strings.Count(data, `Evaluating model "symflower/symbolic-execution"`)) + }, + filepath.Join("result-directory", "symflower", "symbolic-execution-1", string(evaluatetask.IdentifierWriteTests), "symflower_symbolic-execution", "java", "java", "plain.log"): func(t *testing.T, filePath, data string) { + assert.Equal(t, 1, strings.Count(data, `Evaluating model "symflower/symbolic-execution"`)) + }, + }, + }) + }) + // This case checks a beautiful bug where the Markdown export crashed when the current working directory contained a README.md file. While this is not the case during the tests (as the current work directory is the directory of this file), it certainly caused problems when our binary was executed from the repository root (which of course contained a README.md). Therefore, we sadly have to modify the current work directory right within the tests of this case to reproduce the problem and fix it forever. validate(t, &testCase{ Name: "Current work directory contains a README.md",