From b2e14f01b32855928ef6fd1b8cd5148622e3662d Mon Sep 17 00:00:00 2001 From: "Masih H. Derkani" Date: Wed, 6 Jul 2022 16:27:40 +0100 Subject: [PATCH] Empty identity CID should be indexed when options are set There is an edge-case where if the storing identity CIDs are enabled in a CARv2, one could technically store an empty identity CID which ends up with a `singlewidthindex` width of 8. Such CAR files indeed exist out there and the validation changes introduced in `2.4.0` means such CAR file indices are no longer readable , even if regenerated. The question is should this be considered a valid index/readable index? --- v2/blockstore/readwrite_test.go | 50 +++++++++++++++++++++++++++++++++ v2/index/indexsorted.go | 4 +-- 2 files changed, 52 insertions(+), 2 deletions(-) diff --git a/v2/blockstore/readwrite_test.go b/v2/blockstore/readwrite_test.go index 11cc99a2..22525e78 100644 --- a/v2/blockstore/readwrite_test.go +++ b/v2/blockstore/readwrite_test.go @@ -8,6 +8,7 @@ import ( "io/ioutil" "math/rand" "os" + "path" "path/filepath" "sync" "testing" @@ -943,3 +944,52 @@ func TestReadWrite_ReWritingCARv1WithIdentityCidIsIdenticalToOriginalWithOptions require.Equal(t, wantWritten, gotWritten) require.Equal(t, wantSum, gotSum) } + +func TestBlockstore_IdentityCidWithEmptyDataIsIndexed(t *testing.T) { + p := path.Join(t.TempDir(), "car-id-cid-empty.carv2") + var noData []byte + + mh, err := multihash.Sum(noData, multihash.IDENTITY, -1) + require.NoError(t, err) + w, err := blockstore.OpenReadWrite(p, nil, carv2.StoreIdentityCIDs(true)) + require.NoError(t, err) + + blk, err := blocks.NewBlockWithCid(noData, cid.NewCidV1(cid.Raw, mh)) + require.NoError(t, err) + + err = w.Put(context.TODO(), blk) + require.NoError(t, err) + require.NoError(t, w.Finalize()) + + r, err := carv2.OpenReader(p) + require.NoError(t, err) + defer func() { require.NoError(t, r.Close()) }() + + dr, err := r.DataReader() + require.NoError(t, err) + header, err := carv1.ReadHeader(dr, carv1.DefaultMaxAllowedHeaderSize) + require.NoError(t, err) + wantOffset, err := carv1.HeaderSize(header) + require.NoError(t, err) + + ir, err := r.IndexReader() + require.NoError(t, err) + idx, err := index.ReadFrom(ir) + require.NoError(t, err) + + itidx, ok := idx.(index.IterableIndex) + require.True(t, ok) + var count int + err = itidx.ForEach(func(m multihash.Multihash, u uint64) error { + dm, err := multihash.Decode(m) + require.NoError(t, err) + require.Equal(t, multicodec.Identity, multicodec.Code(dm.Code)) + require.Equal(t, 0, dm.Length) + require.Empty(t, dm.Digest) + require.Equal(t, wantOffset, u) + count++ + return nil + }) + require.NoError(t, err) + require.Equal(t, 1, count) +} diff --git a/v2/index/indexsorted.go b/v2/index/indexsorted.go index aeed4c11..ed94ed8f 100644 --- a/v2/index/indexsorted.go +++ b/v2/index/indexsorted.go @@ -91,8 +91,8 @@ func (s *singleWidthIndex) Unmarshal(r io.Reader) error { } func (s *singleWidthIndex) checkUnmarshalLengths(width uint32, dataLen, extra uint64) error { - if width <= 8 { - return errors.New("malformed index; width must be bigger than 8") + if width < 8 { + return errors.New("malformed index; width must be at least 8") } const maxWidth = 32 << 20 // 32MiB, to ~match the go-cid maximum if width > maxWidth {