From 9a7d8ff7ebd544d04ac4951f67f36e9b32eb770e Mon Sep 17 00:00:00 2001 From: Stefan Hengl Date: Thu, 26 Sep 2024 15:51:25 +0200 Subject: [PATCH] archive: use ModTime as proxy for LatestCommitDate This motivated by (add link here) We use archive index in our e2e tests. In order to test our latest improvements to ranking, archive index needs to set the latest commit date. Test plan: - new unit test - I checked the tar files downloaded from github have the correct mod time. --- internal/archive/archive.go | 8 +++- internal/archive/e2e_test.go | 78 ++++++++++++++++++++++++++++++++++-- internal/archive/index.go | 17 ++++++-- 3 files changed, 93 insertions(+), 10 deletions(-) diff --git a/internal/archive/archive.go b/internal/archive/archive.go index 2c8cb64fa..2048a25fb 100644 --- a/internal/archive/archive.go +++ b/internal/archive/archive.go @@ -11,6 +11,7 @@ import ( "net/url" "os" "strings" + "time" ) type Archive interface { @@ -20,8 +21,9 @@ type Archive interface { type File struct { io.ReadCloser - Name string - Size int64 + Name string + Size int64 + ModTime time.Time } type tarArchive struct { @@ -45,6 +47,7 @@ func (a *tarArchive) Next() (*File, error) { ReadCloser: io.NopCloser(a.tr), Name: hdr.Name, Size: hdr.Size, + ModTime: hdr.ModTime, }, nil } } @@ -71,6 +74,7 @@ func (a *zipArchive) Next() (*File, error) { ReadCloser: r, Name: f.Name, Size: int64(f.UncompressedSize64), + ModTime: f.Modified, }, nil } diff --git a/internal/archive/e2e_test.go b/internal/archive/e2e_test.go index 79b3ba538..b861eb09a 100644 --- a/internal/archive/e2e_test.go +++ b/internal/archive/e2e_test.go @@ -11,8 +11,12 @@ import ( "io" "log" "os" + "path/filepath" "strings" "testing" + "time" + + "github.com/stretchr/testify/require" "github.com/sourcegraph/zoekt" "github.com/sourcegraph/zoekt/build" @@ -28,11 +32,18 @@ func TestMain(m *testing.M) { os.Exit(m.Run()) } +var modTime = time.Date(2024, 9, 26, 0, 0, 0, 0, time.UTC) + func writeArchive(w io.Writer, format string, files map[string]string) (err error) { if format == "zip" { zw := zip.NewWriter(w) for name, body := range files { - f, err := zw.Create(name) + header := &zip.FileHeader{ + Name: name, + Method: zip.Deflate, + Modified: modTime, + } + f, err := zw.CreateHeader(header) if err != nil { return err } @@ -63,9 +74,10 @@ func writeArchive(w io.Writer, format string, files map[string]string) (err erro for name, body := range files { hdr := &tar.Header{ - Name: name, - Mode: 0o600, - Size: int64(len(body)), + Name: name, + Mode: 0o600, + Size: int64(len(body)), + ModTime: modTime, } if err := tw.WriteHeader(hdr); err != nil { return err @@ -189,3 +201,61 @@ func testIndexIncrementally(t *testing.T, format string) { } } } + +// TestLatestCommitDate tests that the latest commit date is set correctly if +// the mod time of the files has been set during the archive creation. +func TestLatestCommitDate(t *testing.T) { + for _, format := range []string{"tar", "tgz", "zip"} { + t.Run(format, func(t *testing.T) { + testLatestCommitDate(t, format) + }) + } +} + +func testLatestCommitDate(t *testing.T, format string) { + // Create an archive + archive, err := os.CreateTemp("", "TestLatestCommitDate") + require.NoError(t, err) + defer os.Remove(archive.Name()) + + fileSize := 10 + files := map[string]string{} + for i := 0; i < 4; i++ { + s := fmt.Sprintf("%d", i) + files["F"+s] = strings.Repeat("a", fileSize) + files["!F"+s] = strings.Repeat("a", fileSize) + } + + err = writeArchive(archive, format, files) + if err != nil { + t.Fatalf("unable to create archive %v", err) + } + archive.Close() + + // Index + indexDir := t.TempDir() + bopts := build.Options{ + IndexDir: indexDir, + } + opts := Options{ + Archive: archive.Name(), + Name: "repo", + Branch: "master", + Commit: "cccccccccccccccccccccccccccccccccccccccc", + } + + err = Index(opts, bopts) + require.NoError(t, err) + + // Read the metadata of the index we just created and check the latest commit date. + f, err := os.Open(indexDir) + require.NoError(t, err) + + indexFiles, err := f.Readdirnames(1) + require.Len(t, indexFiles, 1) + + repos, _, err := zoekt.ReadMetadataPath(filepath.Join(indexDir, indexFiles[0])) + require.NoError(t, err) + require.Len(t, repos, 1) + require.True(t, repos[0].LatestCommitDate.Equal(modTime)) +} diff --git a/internal/archive/index.go b/internal/archive/index.go index 2262c9521..c8836768f 100644 --- a/internal/archive/index.go +++ b/internal/archive/index.go @@ -7,6 +7,7 @@ import ( "io" "net/url" "strings" + "sync" "github.com/sourcegraph/zoekt" "github.com/sourcegraph/zoekt/build" @@ -113,14 +114,22 @@ func Index(opts Options, bopts build.Options) error { defer a.Close() bopts.RepositoryDescription.Source = opts.Archive - builder, err := build.NewBuilder(bopts) - if err != nil { - return err - } + var builder *build.Builder + once := sync.Once{} + var onceErr error add := func(f *File) error { defer f.Close() + once.Do(func() { + // We use the ModTime of the first file as a proxy for the latest commit date. + bopts.RepositoryDescription.LatestCommitDate = f.ModTime + builder, onceErr = build.NewBuilder(bopts) + }) + if onceErr != nil { + return onceErr + } + contents, err := io.ReadAll(f) if err != nil { return err