diff --git a/pkg/covermerger/covermerger.go b/pkg/covermerger/covermerger.go index aa47d83b1ada..b5bc8cbdb33e 100644 --- a/pkg/covermerger/covermerger.go +++ b/pkg/covermerger/covermerger.go @@ -7,22 +7,19 @@ import ( "encoding/csv" "fmt" "io" + "log" "strconv" "golang.org/x/exp/maps" ) const ( - keyKernelRepo = "kernel_repo" - keyKernelBranch = "kernel_branch" - keyKernelCommit = "kernel_commit" - keyFilePath = "file_path" - keyStartLine = "sl" - keyStartCol = "sc" - keyEndLine = "el" - keyEndCol = "ec" - keyHitCount = "hit_count" - keyArch = "arch" + KeyKernelRepo = "kernel_repo" + KeyKernelBranch = "kernel_branch" + KeyKernelCommit = "kernel_commit" + KeyFilePath = "file_path" + KeyStartLine = "sl" + KeyHitCount = "hit_count" ) type FileRecord map[string]string @@ -35,9 +32,9 @@ type RepoBranchCommit struct { func (fr FileRecord) RepoBranchCommit() RepoBranchCommit { return RepoBranchCommit{ - fr[keyKernelRepo], - fr[keyKernelBranch], - fr[keyKernelCommit], + fr[KeyKernelRepo], + fr[KeyKernelBranch], + fr[KeyKernelCommit], } } @@ -48,65 +45,69 @@ type Frame struct { EndCol int } -func (fr FileRecord) Frame() Frame { - f := Frame{} +func (fr FileRecord) Frame() (*Frame, error) { + f := &Frame{} var err error - if f.StartCol, err = strconv.Atoi(fr[keyStartCol]); err != nil { - panic(fmt.Sprintf("failed to Atoi(%s)", fr[keyStartCol])) + if f.StartLine, err = strconv.Atoi(fr[KeyStartLine]); err != nil { + return nil, fmt.Errorf("failed to Atoi(%s): %w", fr[KeyStartLine], err) } - if f.StartLine, err = strconv.Atoi(fr[keyStartLine]); err != nil { - panic(fmt.Sprintf("failed to Atoi(%s)", fr[keyStartLine])) - } - if f.EndCol, err = strconv.Atoi(fr[keyEndCol]); err != nil { - panic(fmt.Sprintf("failed to Atoi(%s)", fr[keyEndCol])) - } - if f.EndLine, err = strconv.Atoi(fr[keyEndLine]); err != nil { - panic(fmt.Sprintf("failed to Atoi(%s)", fr[keyEndLine])) - } - return f + return f, nil } -func (fr FileRecord) HitCount() int { - if hitCount, err := strconv.Atoi(fr[keyHitCount]); err != nil { - panic(fmt.Sprintf("failed to Atoi(%s)", fr[keyHitCount])) +func (fr FileRecord) HitCount() (int, error) { + if hitCount, err := strconv.Atoi(fr[KeyHitCount]); err != nil { + return 0, fmt.Errorf("failed to Atoi(%s): %w", fr[KeyHitCount], err) } else { - return hitCount + return hitCount, nil } } -func (fr FileRecord) Arch() string { - return fr[keyArch] -} - type MergeResult struct { HitCounts map[int]int FileExists bool + LostFrames map[RepoBranchCommit]int64 } type FileCoverageMerger interface { - AddRecord(rbc RepoBranchCommit, arch string, f Frame, hitCount int) + AddRecord(rbc RepoBranchCommit, f *Frame, hitCount int) Result() *MergeResult } -func batchFileData(c *Config, targetFilePath string, records FileRecords, base RepoBranchCommit, +func batchFileData(c *Config, targetFilePath string, records FileRecords, processedFiles map[string]struct{}, ) (*MergeResult, error) { + log.Printf("processing %d records for %s", len(records), targetFilePath) + if _, exists := processedFiles[targetFilePath]; exists { + return nil, fmt.Errorf("file was already processed, check the input ordering") + } + processedFiles[targetFilePath] = struct{}{} repoBranchCommitsMap := make(map[RepoBranchCommit]bool) for _, record := range records { repoBranchCommitsMap[record.RepoBranchCommit()] = true } - repoBranchCommitsMap[base] = true + repoBranchCommitsMap[c.Base] = true repoBranchCommits := maps.Keys(repoBranchCommitsMap) - fileVersions, err := getFileVersions(c, targetFilePath, repoBranchCommits) + getFiles := getFileVersions + if c.getFileVersionsMock != nil { + getFiles = c.getFileVersionsMock + } + fvs, err := getFiles(c, targetFilePath, repoBranchCommits) if err != nil { return nil, fmt.Errorf("failed to getFileVersions: %w", err) } - merger := makeFileLineCoverMerger(fileVersions, base) + merger := makeFileLineCoverMerger(fvs, c.Base) for _, record := range records { + var f *Frame + if f, err = record.Frame(); err != nil { + return nil, fmt.Errorf("error parsing records: %w", err) + } + var hitCount int + if hitCount, err = record.HitCount(); err != nil { + return nil, fmt.Errorf("error parsing records: %w", err) + } merger.AddRecord( record.RepoBranchCommit(), - record.Arch(), - record.Frame(), - record.HitCount()) + f, + hitCount) } return merger.Result(), nil } @@ -124,37 +125,76 @@ func makeRecord(fields, schema []string) FileRecord { } type Config struct { - Workdir string - skipRepoClone bool + Workdir string + skipRepoClone bool + Base RepoBranchCommit + getFileVersionsMock func(*Config, string, []RepoBranchCommit) (fileVersions, error) + repoCache repoCache } -func AggregateStreamData(c *Config, stream io.Reader, base RepoBranchCommit, -) (map[string]*MergeResult, error) { - stat := make(map[string]*MergeResult) +func isSchema(fields, schema []string) bool { + if len(fields) != len(schema) { + return false + } + for i := 0; i < len(fields); i++ { + if fields[i] != schema[i] { + return false + } + } + return true +} + +func MergeCSVData(config *Config, reader io.Reader) (map[string]*MergeResult, error) { var schema []string - targetFile := "" - var records FileRecords - csvReader := csv.NewReader(stream) + csvReader := csv.NewReader(reader) if fields, err := csvReader.Read(); err != nil { return nil, fmt.Errorf("failed to read schema: %w", err) } else { schema = fields } - for { - fields, err := csvReader.Read() - if err == io.EOF { - break - } - if err != nil { - return nil, fmt.Errorf("failed to read CSV line: %w", err) + errStdinReadChan := make(chan error, 1) + recordsChan := make(chan FileRecord) + go func() { + defer close(recordsChan) + for { + fields, err := csvReader.Read() + if err == io.EOF { + break + } + if err != nil { + errStdinReadChan <- fmt.Errorf("failed to read CSV line: %w", err) + return + } + if isSchema(fields, schema) { + // The input may be the merged CVS files with multiple schemas. + continue + } + recordsChan <- makeRecord(fields, schema) } - record := makeRecord(fields, schema) - curTargetFile := record[keyFilePath] + errStdinReadChan <- nil + }() + mergeResult, errMerging := mergeChanData(config, recordsChan) + errStdinRead := <-errStdinReadChan + if errMerging != nil || errStdinRead != nil { + return nil, fmt.Errorf("errors merging stdin data:\nmerger err: %w\nstdin reader err: %w", + errMerging, errStdinRead) + } + return mergeResult, nil +} + +func mergeChanData(c *Config, recordsChan <-chan FileRecord) (map[string]*MergeResult, error) { + stat := make(map[string]*MergeResult) + targetFile := "" + var records []FileRecord + processedFiles := map[string]struct{}{} + for record := range recordsChan { + curTargetFile := record[KeyFilePath] if targetFile == "" { targetFile = curTargetFile } if curTargetFile != targetFile { - if stat[targetFile], err = batchFileData(c, targetFile, records, base); err != nil { + var err error + if stat[targetFile], err = batchFileData(c, targetFile, records, processedFiles); err != nil { return nil, fmt.Errorf("failed to batchFileData(%s): %w", targetFile, err) } records = nil @@ -164,7 +204,7 @@ func AggregateStreamData(c *Config, stream io.Reader, base RepoBranchCommit, } if records != nil { var err error - if stat[targetFile], err = batchFileData(c, targetFile, records, base); err != nil { + if stat[targetFile], err = batchFileData(c, targetFile, records, processedFiles); err != nil { return nil, fmt.Errorf("failed to batchFileData(%s): %w", targetFile, err) } } diff --git a/pkg/covermerger/covermerger_test.go b/pkg/covermerger/covermerger_test.go index 825402aa11f1..5d99e35df984 100644 --- a/pkg/covermerger/covermerger_test.go +++ b/pkg/covermerger/covermerger_test.go @@ -119,25 +119,41 @@ samp_time,1,360,arch,b1,ci-mock,git://repo,master,commit2,not_changed.c,func1,4, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { - aggregation, err := AggregateStreamData( + aggregation, err := MergeCSVData( &Config{ Workdir: test.workdir, skipRepoClone: true, + Base: RepoBranchCommit{ + Repo: test.baseRepo, + Branch: test.baseBranch, + Commit: test.baseCommit, + }, + getFileVersionsMock: mockGetFileVersions, }, strings.NewReader(test.bqTable), - RepoBranchCommit{ - Repo: test.baseRepo, - Branch: test.baseBranch, - Commit: test.baseCommit, - }) + ) assert.Nil(t, err) - var simpleAggregationJSON map[string]*MergeResult - assert.Nil(t, json.Unmarshal([]byte(test.simpleAggregation), &simpleAggregationJSON)) - assert.Equal(t, simpleAggregationJSON, aggregation) + var expectedAggregation map[string]*MergeResult + assert.Nil(t, json.Unmarshal([]byte(test.simpleAggregation), &expectedAggregation)) + assert.Equal(t, expectedAggregation, aggregation) }) } } +func mockGetFileVersions(c *Config, targetFilePath string, rbcs []RepoBranchCommit, +) (fileVersions, error) { + res := make(fileVersions) + for _, rbc := range rbcs { + filePath := c.Workdir + "/repos/" + rbc.Commit + "/" + targetFilePath + if bytes, err := os.ReadFile(filePath); err == nil { + res[rbc] = fileVersion{ + content: string(bytes), + } + } + } + return res, nil +} + func readFileOrFail(t *testing.T, path string) string { absPath, err := filepath.Abs(path) assert.Nil(t, err) diff --git a/pkg/covermerger/deleted_file_merger.go b/pkg/covermerger/deleted_file_merger.go index 7313c8dae936..32513ecb1d66 100644 --- a/pkg/covermerger/deleted_file_merger.go +++ b/pkg/covermerger/deleted_file_merger.go @@ -6,7 +6,7 @@ package covermerger type DeletedFileLineMerger struct { } -func (a *DeletedFileLineMerger) AddRecord(RepoBranchCommit, string, Frame, int) { +func (a *DeletedFileLineMerger) AddRecord(RepoBranchCommit, *Frame, int) { } func (a *DeletedFileLineMerger) Result() *MergeResult { diff --git a/pkg/covermerger/file_line_merger.go b/pkg/covermerger/file_line_merger.go index 2cd2eea78c43..29fd4200b410 100644 --- a/pkg/covermerger/file_line_merger.go +++ b/pkg/covermerger/file_line_merger.go @@ -4,12 +4,12 @@ package covermerger func makeFileLineCoverMerger( - fileVersions map[RepoBranchCommit]string, base RepoBranchCommit) FileCoverageMerger { + fvs fileVersions, base RepoBranchCommit) FileCoverageMerger { baseFile := "" baseFileExists := false - for rbc, fileContent := range fileVersions { + for rbc, fv := range fvs { if rbc == base { - baseFile = fileContent + baseFile = fv.content baseFileExists = true break } @@ -18,33 +18,46 @@ func makeFileLineCoverMerger( return &DeletedFileLineMerger{} } a := &FileLineCoverMerger{ - rbcToFile: fileVersions, - baseFile: baseFile, - hitCounts: make(map[int]int), - matchers: make(map[RepoBranchCommit]*LineToLineMatcher), + rbcToFile: fvs, + baseFile: baseFile, + hitCounts: make(map[int]int), + matchers: make(map[RepoBranchCommit]*LineToLineMatcher), + lostFrames: map[RepoBranchCommit]int64{}, } - for rbc, fileVersion := range fileVersions { - a.matchers[rbc] = makeLineToLineMatcher(fileVersion, baseFile) + for rbc, fv := range fvs { + a.matchers[rbc] = makeLineToLineMatcher(fv.content, baseFile) } return a } type FileLineCoverMerger struct { - rbcToFile map[RepoBranchCommit]string - baseFile string - hitCounts map[int]int - matchers map[RepoBranchCommit]*LineToLineMatcher + rbcToFile fileVersions + baseFile string + hitCounts map[int]int + matchers map[RepoBranchCommit]*LineToLineMatcher + lostFrames map[RepoBranchCommit]int64 } -func (a *FileLineCoverMerger) AddRecord(rbc RepoBranchCommit, arch string, f Frame, hitCount int) { +func (a *FileLineCoverMerger) AddRecord(rbc RepoBranchCommit, f *Frame, hitCount int) { + if a.matchers[rbc] == nil { + if hitCount > 0 { + a.lostFrames[rbc]++ + } + return + } if targetLine := a.matchers[rbc].SameLinePos(f.StartLine); targetLine != -1 { a.hitCounts[f.StartLine] += hitCount } } func (a *FileLineCoverMerger) Result() *MergeResult { + lostFrames := a.lostFrames + if len(lostFrames) == 0 { + lostFrames = nil + } return &MergeResult{ HitCounts: a.hitCounts, FileExists: true, + LostFrames: lostFrames, } } diff --git a/pkg/covermerger/repos.go b/pkg/covermerger/repos.go index 77a2e557e172..2c63e2d5d8a5 100644 --- a/pkg/covermerger/repos.go +++ b/pkg/covermerger/repos.go @@ -5,39 +5,85 @@ package covermerger import ( "fmt" - "os" + "log" "github.com/google/syzkaller/pkg/vcs" "github.com/google/syzkaller/sys/targets" ) +type fileVersion struct { + content string +} + +type fileVersions map[RepoBranchCommit]fileVersion + func getFileVersions(c *Config, targetFilePath string, rbcs []RepoBranchCommit, -) (map[RepoBranchCommit]string, error) { - reposPath := c.Workdir + "/repos" +) (fileVersions, error) { + repos, err := cloneRepos(c, rbcs) + if err != nil { + return nil, fmt.Errorf("failed to clone repos: %w", err) + } + + res := make(fileVersions) for _, rbc := range rbcs { - commitPath := reposPath + "/" + rbc.Commit - if _, err := os.Stat(commitPath); err == nil || c.skipRepoClone { - continue - } - repo, err := vcs.NewRepo(targets.Linux, "none", commitPath) + fileBytes, err := repos[rbc].Object(targetFilePath, rbc.Commit) + // It is ok if some file doesn't exist. It means we have repo FS diff. if err != nil { - return nil, fmt.Errorf("failed to create new repo at %s: %w", commitPath, err) + continue } - if _, err = repo.CheckoutCommit(rbc.Repo, rbc.Commit); err != nil { - return nil, fmt.Errorf("failed to get commit %s from repo %s to folder %s: %w", - rbc.Commit, rbc.Repo, commitPath, err) + res[rbc] = fileVersion{ + content: string(fileBytes), } } + return res, nil +} - res := make(map[RepoBranchCommit]string) +type repoCache struct { + cache map[RepoBranchCommit]vcs.Repo +} + +func (rc *repoCache) get(rbc RepoBranchCommit) vcs.Repo { + rbc.Commit = "" + if repo, ok := rc.cache[rbc]; ok { + return repo + } + return nil +} + +func (rc *repoCache) put(rbc RepoBranchCommit, repo vcs.Repo) { + rbc.Commit = "" + if rc.cache == nil { + rc.cache = map[RepoBranchCommit]vcs.Repo{} + } + rc.cache[rbc] = repo +} + +func cloneRepos(c *Config, rbcs []RepoBranchCommit) (map[RepoBranchCommit]vcs.Repo, error) { + cache := &c.repoCache + repos := make(map[RepoBranchCommit]vcs.Repo) for _, rbc := range rbcs { - filePath := reposPath + "/" + rbc.Commit + "/" + targetFilePath - fileBytes, err := os.ReadFile(filePath) - // It is ok if some file doesn't exist. It means we have repo FS diff. - if err == nil { - res[rbc] = string(fileBytes) + repos[rbc] = cache.get(rbc) + if repos[rbc] != nil { + continue + } + rbcPath := c.Workdir + "/repos/linux_kernels" + repo, err := vcs.NewRepo(targets.Linux, "none", rbcPath) + if err != nil { + return nil, fmt.Errorf("failed to create/open repo at %s: %w", rbcPath, err) + } + repos[rbc] = repo + cache.put(rbc, repo) + if c.skipRepoClone { + continue + } + log.Printf("cloning repo: %s, branch: %s", rbc.Repo, rbc.Branch) + if rbc.Branch == "" { + panic("repo and branch are needed") + } + if _, err = repo.CheckoutBranch(rbc.Repo, rbc.Branch); err != nil { + return nil, fmt.Errorf("failed to CheckoutBranch(repo %s, branch %s): %w", + rbc.Repo, rbc.Branch, err) } } - - return res, nil + return repos, nil } diff --git a/tools/syz-bq.sh b/tools/syz-bq.sh new file mode 100755 index 000000000000..343d6c81e778 --- /dev/null +++ b/tools/syz-bq.sh @@ -0,0 +1,134 @@ +#!/usr/bin/env bash +# Copyright 2024 syzkaller project authors. All rights reserved. +# Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +set -e # exit on any problem +set -o pipefail + +while getopts w:d:t:n:r:b: option +do + case "${option}" + in + w)workdir=${OPTARG};; + d)duration=${OPTARG};; + t)to_date=${OPTARG};; + n)namespace=${OPTARG};; + r)repo=${OPTARG};; + b)branch=${OPTARG};; + esac +done + +if [ -z "$workdir" ] +then + echo "-w is requires to specify workdir" + exit +fi +if [ -z "$to_date" ] +then + echo "-t is required to specify to_date" + exit +fi +if [ -z "$duration" ] +then + echo "-d is required to specify duration" + exit +fi +if [ -z "$namespace" ] +then + echo "-n is required to specify namespace" + exit +fi +if [ -z "$repo" ] +then + echo "-r is required to specify the merging repo base" + exit +fi +if [ -z "$branch" ] +then + echo "-b is required to specify the merging branch base" + exit +fi + +# it also allows to early check gcloud credentials +echo "making sure spanner table 'files' exists" +create_table=$( echo -n ' +CREATE TABLE IF NOT EXISTS + files ( + "namespace" text, + "repo" text, + "commit" text, + "filepath" text, + "duration" bigint, + "dateto" date, + "instrumented" bigint, + "covered" bigint, + PRIMARY KEY + (duration, dateto, commit, filepath) );') +gcloud spanner databases ddl update coverage --instance=syzbot --project=syzkaller \ + --ddl="$create_table" + +echo "Workdir: $workdir" +base_dir="${workdir}repos/base" +if [ ! -d $base_dir ]; then + echo "base dir doesn't exist, cloning" + git clone $repo $base_dir +fi +cd $base_dir +git checkout $branch +cd - + +# get the last merged commit at to_date +# sometimes many commits have same time and are shuffled +base_commit=$(git --git-dir=${base_dir}/.git log --date=iso-local --before ${to_date}T23:59:59 --pretty=format:"%cd %H" -1000 | \ + sort -rn | { head -1; cat >/dev/null; } | rev | cut -d' ' -f1 | rev) +if [ -z "$base_commit" ] +then + echo FAILED to get the base merging commit. + exit +fi +echo The latest commit as of $to_date is $base_commit. + +# rm -rf $base_dir +# echo Temp dir $base_dir deleted. + +from_date=$(date -d "$to_date - $duration days" +%Y-%m-%d) +sessionID=$(uuidgen) +gsURI=$(echo gs://syzbot-temp/bq-exports/${sessionID}/*.csv.gz) +echo fetching data from bigquery +query=$( echo -n ' +EXPORT DATA + OPTIONS ( + uri = "'$gsURI'", + format = "CSV", + overwrite = true, + header = true, + compression = "GZIP") +AS ( + SELECT + kernel_repo, kernel_branch, kernel_commit, file_path, sl, SUM(hit_count) as hit_count + FROM syzkaller.syzbot_coverage.'$namespace' + WHERE + TIMESTAMP_TRUNC(timestamp, DAY) >= "'$from_date'" AND + TIMESTAMP_TRUNC(timestamp, DAY) <= "'$to_date'" AND + version = 1 + GROUP BY file_path, kernel_commit, kernel_repo, kernel_branch, sl + ORDER BY file_path +); +') + +bq query --format=csv --use_legacy_sql=false "$query" +sessionDir="$workdir/sessions/$sessionID" +mkdir -p $sessionDir +gcloud storage cp $gsURI $sessionDir +cat $sessionDir/*.csv.gz | gunzip | \ +go run ./tools/syz-covermerger/ -workdir $workdir \ + -repo $repo \ + -branch $branch \ + -commit $base_commit \ + -save-to-spanner true \ + -namespace $namespace \ + -duration $duration \ + -date-to $to_date + +echo Cleanup +rm -rf $sessionDir diff --git a/tools/syz-coveraggregator/syz_coveraggregator.go b/tools/syz-coveraggregator/syz_coveraggregator.go deleted file mode 100644 index fda7619bf1de..000000000000 --- a/tools/syz-coveraggregator/syz_coveraggregator.go +++ /dev/null @@ -1,67 +0,0 @@ -// Copyright 2024 syzkaller project authors. All rights reserved. -// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. - -package main - -import ( - "flag" - "fmt" - "log" - "os" - - "github.com/google/syzkaller/pkg/covermerger" -) - -func main() { - flagWorkdir := flag.String("workdir", "workdir-cover-aggregation", - "[optional] used to clone repos") - flagCleanWorkdir := flag.Bool("clean-workdir", false, - "[optional] cleans workdir before start") - flagRepo := flag.String("repo", "", "[required] repo to be used as an aggregation point") - flagBranch := flag.String("branch", "", "[required] branch to be used as an aggregation point") - flagCommit := flag.String("commit", "", "[required] commit hash to be used as an aggregation point") - flag.Parse() - - config := &covermerger.Config{ - Workdir: *flagWorkdir, - } - if *flagRepo == "" || *flagBranch == "" || *flagCommit == "" { - log.Print("-repo -branch and -commit flags are required") - return - } - if *flagCleanWorkdir { - if err := os.RemoveAll(config.Workdir); err != nil { - panic("failed to clean workdir " + config.Workdir) - } - } - mergedCoverage, err := covermerger.AggregateStreamData( - config, - os.Stdin, - covermerger.RepoBranchCommit{ - Repo: *flagRepo, - Branch: *flagBranch, - Commit: *flagCommit, - }) - if err != nil { - panic(err) - } - for fileName, lineStat := range mergedCoverage { - totalInstrumentedLines := 0 - totalCoveredLines := 0 - if !lineStat.FileExists { - continue - } - - for _, lineHitCount := range lineStat.HitCounts { - totalInstrumentedLines++ - if lineHitCount > 0 { - totalCoveredLines++ - } - } - coverage := 0.0 - if totalInstrumentedLines != 0 { - coverage = float64(totalCoveredLines) / float64(totalInstrumentedLines) - } - fmt.Printf("%s: %.2f%%", fileName, coverage*100) - } -} diff --git a/tools/syz-covermerger/db.go b/tools/syz-covermerger/db.go new file mode 100644 index 000000000000..996e7a6e065f --- /dev/null +++ b/tools/syz-covermerger/db.go @@ -0,0 +1,64 @@ +// Copyright 2024 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package main + +import ( + "context" + "fmt" + + "cloud.google.com/go/civil" + "cloud.google.com/go/spanner" +) + +// TODO: move to dashAPI once tested? I'm not sure we'll benefit. + +type DBRecord struct { + Namespace string + Repo string + Commit string + Duration int64 + DateTo civil.Date + FilePath string + Instrumented int64 + Covered int64 +} + +func saveToSpanner(ctx context.Context, projectID string, coverage map[string]*Coverage, + template *DBRecord) { + client, err := spanner.NewClient(ctx, "projects/"+projectID+"/instances/syzbot/databases/coverage") + if err != nil { + panic(fmt.Sprintf("spanner.NewClient() failed: %s", err.Error())) + } + defer client.Close() + + mutations := []*spanner.Mutation{} + for filePath, record := range coverage { + var insert *spanner.Mutation + if insert, err = spanner.InsertOrUpdateStruct("files", &DBRecord{ + Namespace: template.Namespace, + Repo: template.Repo, + Commit: template.Commit, + Duration: template.Duration, + DateTo: template.DateTo, + FilePath: filePath, + Instrumented: record.Instrumented, + Covered: record.Covered, + }); err != nil { + panic(fmt.Sprintf("failed to spanner.InsertStruct(): %s", err.Error())) + } + mutations = append(mutations, insert) + // 80k mutations is a DB limit. 7 fields * 1k records is apx 7k mutations + // let keep this value 10x lower to have a room for indexes + // indexes update are also counted + if len(mutations) > 1000 { + if _, err = client.Apply(ctx, mutations); err != nil { + panic(fmt.Sprintf("failed to spanner.Apply(inserts): %s", err.Error())) + } + mutations = nil + } + } + if _, err = client.Apply(ctx, mutations); err != nil { + panic(fmt.Sprintf("failed to spanner.Apply(inserts): %s", err.Error())) + } +} diff --git a/tools/syz-covermerger/syz_covermerger.go b/tools/syz-covermerger/syz_covermerger.go new file mode 100644 index 000000000000..afa518487fc1 --- /dev/null +++ b/tools/syz-covermerger/syz_covermerger.go @@ -0,0 +1,128 @@ +// Copyright 2024 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package main + +import ( + "context" + "flag" + "fmt" + "log" + "os" + "sort" + + "cloud.google.com/go/civil" + "github.com/google/syzkaller/pkg/covermerger" + "golang.org/x/exp/maps" +) + +var ( + flagWorkdir = flag.String("workdir", "workdir-cover-aggregation", + "[optional] used to clone repos") + flagRepo = flag.String("repo", "", "[required] repo to be used as an aggregation point") + flagBranch = flag.String("branch", "", "[required] branch to be used as an aggregation point") + flagCommit = flag.String("commit", "", "[required] commit hash to be used as an aggregation point") + flagNamespace = flag.String("namespace", "upstream", "[optional] target namespace") + flagDuration = flag.Int64("duration", 0, "[optional] used to mark DB records") + flagDateTo = flag.String("date-to", "", "[optional] used to mark DB records") + flagSaveToSpanner = flag.String("save-to-spanner", "", "[optional] save aggregation to spanner") + flagProjectID = flag.String("project-id", "syzkaller", "[optional] target spanner db project") +) + +func main() { + flag.Parse() + config := &covermerger.Config{ + Workdir: *flagWorkdir, + Base: covermerger.RepoBranchCommit{ + Repo: *flagRepo, + Branch: *flagBranch, + Commit: *flagCommit, + }, + } + mergeResult, err := covermerger.MergeCSVData(config, os.Stdin) + if err != nil { + panic(err) + } + printMergeResult(mergeResult) + if *flagSaveToSpanner != "" { + log.Print("saving to spanner") + if *flagDuration == 0 || *flagDateTo == "" { + panic("duration and date-to are required to store to DB") + } + var dateTo civil.Date + if dateTo, err = civil.ParseDate(*flagDateTo); err != nil { + panic(fmt.Sprintf("failed to parse time_to: %s", err.Error())) + } + coverage, _, _ := mergeResultsToCoverage(mergeResult) + saveToSpanner(context.Background(), *flagProjectID, coverage, + &DBRecord{ + Namespace: *flagNamespace, + Repo: *flagRepo, + Commit: *flagCommit, + Duration: *flagDuration, + DateTo: dateTo, + }, + ) + } +} + +func printMergeResult(mergeResult map[string]*covermerger.MergeResult) { + totalLostFrames := map[covermerger.RepoBranchCommit]int64{} + coverage, totalInstrumentedLines, totalCoveredLines := mergeResultsToCoverage(mergeResult) + keys := maps.Keys(coverage) + sort.Strings(keys) + for _, fileName := range keys { + lineStat := mergeResult[fileName] + for rbc, lostFrames := range lineStat.LostFrames { + log.Printf("\t[warn] lost %d frames from rbc(%s, %s, %s)", + lostFrames, rbc.Repo, rbc.Branch, rbc.Commit) + totalLostFrames[rbc] += lostFrames + } + printCoverage(fileName, coverage[fileName].Instrumented, coverage[fileName].Covered) + } + printCoverage("total", totalInstrumentedLines, totalCoveredLines) + for rbc, lostFrames := range totalLostFrames { + log.Printf("\t[warn] lost %d frames from rbc(%s, %s, %s)", + lostFrames, rbc.Repo, rbc.Branch, rbc.Commit) + totalLostFrames[rbc] += lostFrames + } +} + +func printCoverage(target string, instrumented, covered int64) { + coverage := 0.0 + if instrumented != 0 { + coverage = float64(covered) / float64(instrumented) + } + fmt.Printf("%s,%d,%d,%.2f%%\n", + target, instrumented, covered, coverage*100) +} + +type Coverage struct { + Instrumented int64 + Covered int64 +} + +func mergeResultsToCoverage(mergedCoverage map[string]*covermerger.MergeResult, +) (map[string]*Coverage, int64, int64) { + res := make(map[string]*Coverage) + var totalInstrumented, totalCovered int64 + for fileName, lineStat := range mergedCoverage { + if !lineStat.FileExists { + continue + } + var instrumented, covered int64 + for _, lineHitCount := range lineStat.HitCounts { + instrumented++ + if lineHitCount > 0 { + covered++ + } + } + res[fileName] = &Coverage{ + Instrumented: instrumented, + Covered: covered, + } + totalInstrumented += instrumented + totalCovered += covered + } + return res, totalInstrumented, totalCovered +}