Skip to content

Commit

Permalink
feat: Adding functionality for dirhash.
Browse files Browse the repository at this point in the history
Signed-off-by: Matthias Glastra <[email protected]>
  • Loading branch information
matglas committed Aug 23, 2024
1 parent 960e8a1 commit 645f358
Show file tree
Hide file tree
Showing 8 changed files with 216 additions and 32 deletions.
39 changes: 30 additions & 9 deletions attestation/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"sync"
"time"

"github.com/gobwas/glob"
"github.com/in-toto/go-witness/cryptoutil"
"github.com/in-toto/go-witness/log"
)
Expand Down Expand Up @@ -83,6 +84,20 @@ func WithWorkingDir(workingDir string) AttestationContextOption {
}
}

func WithDirHashGlob(dirHashGlob []string) AttestationContextOption {
return func(ctx *AttestationContext) {
if len(dirHashGlob) > 0 {
ctx.dirHashGlob = dirHashGlob

ctx.dirHashGlobCompiled = make([]glob.Glob, len(ctx.dirHashGlob))
for i, dirHashGlobItem := range dirHashGlob {
dirHashGlobItemCompiled, _ := glob.Compile(dirHashGlobItem)
ctx.dirHashGlobCompiled[i] = dirHashGlobItemCompiled
}
}
}
}

type CompletedAttestor struct {
Attestor Attestor
StartTime time.Time
Expand All @@ -91,15 +106,17 @@ type CompletedAttestor struct {
}

type AttestationContext struct {
ctx context.Context
attestors []Attestor
workingDir string
hashes []cryptoutil.DigestValue
completedAttestors []CompletedAttestor
products map[string]Product
materials map[string]cryptoutil.DigestSet
stepName string
mutex sync.RWMutex
ctx context.Context
attestors []Attestor
workingDir string
dirHashGlob []string
dirHashGlobCompiled []glob.Glob
hashes []cryptoutil.DigestValue
completedAttestors []CompletedAttestor
products map[string]Product
materials map[string]cryptoutil.DigestSet
stepName string
mutex sync.RWMutex
}

type Product struct {
Expand Down Expand Up @@ -208,6 +225,10 @@ func (ctx *AttestationContext) runAttestor(attestor Attestor) {
log.Infof("Finished %v attestor... (%vs)", attestor.Name(), time.Since(startTime).Seconds())
}

func (ctx *AttestationContext) DirHashGlob() []glob.Glob {
return ctx.dirHashGlobCompiled
}

func (ctx *AttestationContext) CompletedAttestors() []CompletedAttestor {
ctx.mutex.RLock()
out := make([]CompletedAttestor, len(ctx.completedAttestors))
Expand Down
31 changes: 25 additions & 6 deletions attestation/file/file.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,29 +19,48 @@ import (
"os"
"path/filepath"

"github.com/gobwas/glob"
"github.com/in-toto/go-witness/cryptoutil"
"github.com/in-toto/go-witness/log"
)

// recordArtifacts will walk basePath and record the digests of each file with each of the functions in hashes.
// If file already exists in baseArtifacts and the two artifacts are equal the artifact will not be in the
// returned map of artifacts.
func RecordArtifacts(basePath string, baseArtifacts map[string]cryptoutil.DigestSet, hashes []cryptoutil.DigestValue, visitedSymlinks map[string]struct{}, processWasTraced bool, openedFiles map[string]bool) (map[string]cryptoutil.DigestSet, error) {
func RecordArtifacts(basePath string, baseArtifacts map[string]cryptoutil.DigestSet, hashes []cryptoutil.DigestValue, visitedSymlinks map[string]struct{}, processWasTraced bool, openedFiles map[string]bool, dirHashGlob []glob.Glob) (map[string]cryptoutil.DigestSet, error) {
artifacts := make(map[string]cryptoutil.DigestSet)
err := filepath.Walk(basePath, func(path string, info fs.FileInfo, err error) error {
if err != nil {
return err
}

if info.IsDir() {
return nil
}

relPath, err := filepath.Rel(basePath, path)
if err != nil {
return err
}

if info.IsDir() {
dirHashMatch := false
for _, globItem := range dirHashGlob {
if !dirHashMatch && globItem.Match(relPath) {
dirHashMatch = true
}
}

if dirHashMatch {
dir, err := cryptoutil.CalculateDigestSetFromDir(path, hashes)

if err != nil {
return err
}

artifacts[relPath+string(os.PathSeparator)] = dir
return filepath.SkipDir
}

return nil
}

if info.Mode()&fs.ModeSymlink != 0 {
// if this is a symlink, eval the true path and eval any artifacts in the symlink. we record every symlink we've visited to prevent infinite loops
linkedPath, err := filepath.EvalSymlinks(path)
Expand All @@ -57,7 +76,7 @@ func RecordArtifacts(basePath string, baseArtifacts map[string]cryptoutil.Digest
}

visitedSymlinks[linkedPath] = struct{}{}
symlinkedArtifacts, err := RecordArtifacts(linkedPath, baseArtifacts, hashes, visitedSymlinks, processWasTraced, openedFiles)
symlinkedArtifacts, err := RecordArtifacts(linkedPath, baseArtifacts, hashes, visitedSymlinks, processWasTraced, openedFiles, dirHashGlob)
if err != nil {
return err
}
Expand Down
42 changes: 39 additions & 3 deletions attestation/file/file_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"path/filepath"
"testing"

"github.com/gobwas/glob"
"github.com/in-toto/go-witness/cryptoutil"
"github.com/stretchr/testify/require"
)
Expand All @@ -38,13 +39,15 @@ func TestBrokenSymlink(t *testing.T) {
symTestDir := filepath.Join(dir, "symTestDir")
require.NoError(t, os.Symlink(testDir, symTestDir))

_, err := RecordArtifacts(dir, map[string]cryptoutil.DigestSet{}, []cryptoutil.DigestValue{{Hash: crypto.SHA256}}, map[string]struct{}{}, false, map[string]bool{})
dirHash := make([]glob.Glob, 0)

_, err := RecordArtifacts(dir, map[string]cryptoutil.DigestSet{}, []cryptoutil.DigestValue{{Hash: crypto.SHA256}}, map[string]struct{}{}, false, map[string]bool{}, dirHash)
require.NoError(t, err)

// remove the symlinks and make sure we don't get an error back
require.NoError(t, os.RemoveAll(testDir))
require.NoError(t, os.RemoveAll(testFile))
_, err = RecordArtifacts(dir, map[string]cryptoutil.DigestSet{}, []cryptoutil.DigestValue{{Hash: crypto.SHA256}}, map[string]struct{}{}, false, map[string]bool{})
_, err = RecordArtifacts(dir, map[string]cryptoutil.DigestSet{}, []cryptoutil.DigestValue{{Hash: crypto.SHA256}}, map[string]struct{}{}, false, map[string]bool{}, dirHash)
require.NoError(t, err)
}

Expand All @@ -57,7 +60,40 @@ func TestSymlinkCycle(t *testing.T) {
symTestDir := filepath.Join(dir, "symTestDir")
require.NoError(t, os.Symlink(dir, symTestDir))

dirHash := make([]glob.Glob, 0)

// if a symlink cycle weren't properly handled this would be an infinite loop
_, err := RecordArtifacts(dir, map[string]cryptoutil.DigestSet{}, []cryptoutil.DigestValue{{Hash: crypto.SHA256}}, map[string]struct{}{}, false, map[string]bool{})
_, err := RecordArtifacts(dir, map[string]cryptoutil.DigestSet{}, []cryptoutil.DigestValue{{Hash: crypto.SHA256}}, map[string]struct{}{}, false, map[string]bool{}, dirHash)
require.NoError(t, err)
}

func TestDirHash(t *testing.T) {
dir := t.TempDir()
testFile := filepath.Join(dir, "testfile")
require.NoError(t, os.WriteFile(testFile, []byte("some dummy data"), os.ModePerm))
testDir := filepath.Join(dir, "testdir")
require.NoError(t, os.Mkdir(testDir, os.ModePerm))
testFile2 := filepath.Join(testDir, "testfile2")
require.NoError(t, os.WriteFile(testFile2, []byte("more dummy data"), os.ModePerm))

dirHashGlobs := make([]glob.Glob, 0)

dirHash := "testdir"
dirHashGlobItem, _ := glob.Compile(dirHash)
dirHashGlobs = append(dirHashGlobs, dirHashGlobItem)

artifacts, err := RecordArtifacts(dir, map[string]cryptoutil.DigestSet{}, []cryptoutil.DigestValue{{Hash: crypto.SHA256}}, map[string]struct{}{}, false, map[string]bool{}, dirHashGlobs)
require.NoError(t, err)

// Below command is example usage on the above created scenario for testdir.
// find . -type f | cut -c3- | LC_ALL=C sort | xargs -r sha256sum | sha256sum
dirHashSha256 := "ba9842eac063209c5f67c5a202b2b3a710f8f845f1d064f54af56763645b895b"

require.Len(t, artifacts, 2)

dirDigestSet := artifacts["testdir/"]
dirDigestSetMap, err := dirDigestSet.ToNameMap()
require.NoError(t, err)

require.Equal(t, dirDigestSetMap["dirHash"], dirHashSha256)
}
4 changes: 2 additions & 2 deletions attestation/link/link_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,8 @@ func TestAttest(t *testing.T) {
// Setup Materials
m := attestors.NewTestMaterialAttestor()
materials := make(map[string]cryptoutil.DigestSet)
materials["test2"] = cryptoutil.DigestSet{{Hash: crypto.SHA256, GitOID: false}: "a53d0741798b287c6dd7afa64aee473f305e65d3f49463bb9d7408ec3b12bf5f"}
materials["test1"] = cryptoutil.DigestSet{{Hash: crypto.SHA256, GitOID: false}: "a53d0741798b287c6dd7afa64aee473f305e65d3f49463bb9d7408ec3b12bf5f"}
materials["test2"] = cryptoutil.DigestSet{{Hash: crypto.SHA256, GitOID: false, DirHash: false}: "a53d0741798b287c6dd7afa64aee473f305e65d3f49463bb9d7408ec3b12bf5f"}
materials["test1"] = cryptoutil.DigestSet{{Hash: crypto.SHA256, GitOID: false, DirHash: false}: "a53d0741798b287c6dd7afa64aee473f305e65d3f49463bb9d7408ec3b12bf5f"}
m.SetMaterials(materials)

// Setup CommandRun
Expand Down
2 changes: 1 addition & 1 deletion attestation/material/material.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ func (a *Attestor) Schema() *jsonschema.Schema {
}

func (a *Attestor) Attest(ctx *attestation.AttestationContext) error {
materials, err := file.RecordArtifacts(ctx.WorkingDir(), nil, ctx.Hashes(), map[string]struct{}{}, false, map[string]bool{})
materials, err := file.RecordArtifacts(ctx.WorkingDir(), nil, ctx.Hashes(), map[string]struct{}{}, false, map[string]bool{}, ctx.DirHashGlob())
if err != nil {
return err
}
Expand Down
17 changes: 15 additions & 2 deletions attestation/product/product.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"bytes"
"encoding/json"
"fmt"
"os"
"path/filepath"

"github.com/gabriel-vasile/mimetype"
Expand Down Expand Up @@ -121,11 +122,19 @@ func fromDigestMap(workingDir string, digestMap map[string]cryptoutil.DigestSet)
products := make(map[string]attestation.Product)
for fileName, digestSet := range digestMap {
filePath := filepath.Join(workingDir, fileName)

mimeType, err := getFileContentType(filePath)
if err != nil {
mimeType = "unknown"
}

if mimeType == "application/octet-stream" {
fileInfo, err := os.Stat(filePath)
if err == nil && fileInfo.IsDir() {
mimeType = "text/directory"
}
}

products[fileName] = attestation.Product{
MimeType: mimeType,
Digest: digestSet,
Expand Down Expand Up @@ -199,7 +208,7 @@ func (a *Attestor) Attest(ctx *attestation.AttestationContext) error {
}
}

products, err := file.RecordArtifacts(ctx.WorkingDir(), a.baseArtifacts, ctx.Hashes(), map[string]struct{}{}, processWasTraced, openedFileSet)
products, err := file.RecordArtifacts(ctx.WorkingDir(), a.baseArtifacts, ctx.Hashes(), map[string]struct{}{}, processWasTraced, openedFileSet, ctx.DirHashGlob())
if err != nil {
return err
}
Expand Down Expand Up @@ -237,7 +246,11 @@ func (a *Attestor) Subjects() map[string]cryptoutil.DigestSet {
continue
}

subjects[fmt.Sprintf("file:%v", productName)] = product.Digest
subjectType := "file"
if product.MimeType == "text/directory" {
subjectType = "dir"
}
subjects[fmt.Sprintf("%v:%v", subjectType, productName)] = product.Digest
}

return subjects
Expand Down
52 changes: 43 additions & 9 deletions cryptoutil/digestset.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,44 +22,64 @@ import (
"hash"
"io"
"os"

"golang.org/x/mod/sumdb/dirhash"
)

var (
hashNames = map[DigestValue]string{
{
Hash: crypto.SHA256,
GitOID: false,
Hash: crypto.SHA256,
GitOID: false,
DirHash: false,
}: "sha256",
{
Hash: crypto.SHA1,
GitOID: false,
Hash: crypto.SHA1,
GitOID: false,
DirHash: false,
}: "sha1",
{
Hash: crypto.SHA256,
GitOID: true,
Hash: crypto.SHA256,
GitOID: true,
DirHash: false,
}: "gitoid:sha256",
{
Hash: crypto.SHA1,
GitOID: true,
Hash: crypto.SHA1,
GitOID: true,
DirHash: false,
}: "gitoid:sha1",
{
Hash: crypto.SHA256,
GitOID: false,
DirHash: true,
}: "dirHash",
}

hashesByName = map[string]DigestValue{
"sha256": {
crypto.SHA256,
false,
false,
},
"sha1": {
crypto.SHA1,
false,
false,
},
"gitoid:sha256": {
crypto.SHA256,
true,
false,
},
"gitoid:sha1": {
crypto.SHA1,
true,
false,
},
"dirHash": {
crypto.SHA256,
false,
true,
},
}
)
Expand All @@ -72,7 +92,8 @@ func (e ErrUnsupportedHash) Error() string {

type DigestValue struct {
crypto.Hash
GitOID bool
GitOID bool
DirHash bool
}

func (dv DigestValue) New() hash.Hash {
Expand Down Expand Up @@ -203,6 +224,19 @@ func CalculateDigestSetFromFile(path string, hashes []DigestValue) (DigestSet, e
return CalculateDigestSet(file, hashes)
}

func CalculateDigestSetFromDir(dir string, hashes []DigestValue) (DigestSet, error) {

dirHash, err := dirhash.HashDir(dir, "", DirhHashSha256)
if err != nil {
return nil, err
}

digestSetByName := make(map[string]string)
digestSetByName["dirHash"] = dirHash

return NewDigestSet(digestSetByName)
}

func (ds DigestSet) MarshalJSON() ([]byte, error) {
nameMap, err := ds.ToNameMap()
if err != nil {
Expand Down
Loading

0 comments on commit 645f358

Please sign in to comment.