Skip to content

Commit

Permalink
Merge pull request #8252 from dolthub/aaron/nbs-table-file-index-uint…
Browse files Browse the repository at this point in the history
…32-overflow

go/store/nbs: Fix table_index for table files with so many chunks that certain index slice operations overflow a uint32.
  • Loading branch information
reltuk authored Aug 12, 2024
2 parents 3aa62b3 + 76cd8a0 commit 7ee9b5c
Show file tree
Hide file tree
Showing 4 changed files with 118 additions and 12 deletions.
22 changes: 22 additions & 0 deletions go/store/nbs/race_off.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
// Copyright 2024 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build !race
// +build !race

package nbs

func isRaceEnabled() bool {
return false
}
22 changes: 22 additions & 0 deletions go/store/nbs/race_on.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
// Copyright 2024 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build race
// +build race

package nbs

func isRaceEnabled() bool {
return true
}
26 changes: 14 additions & 12 deletions go/store/nbs/table_index.go
Original file line number Diff line number Diff line change
Expand Up @@ -234,12 +234,14 @@ func newOnHeapTableIndex(indexBuff []byte, offsetsBuff1 []byte, count uint32, to
return onHeapTableIndex{}, ErrWrongBufferSize
}

tuples := indexBuff[:prefixTupleSize*count]
lengths := indexBuff[prefixTupleSize*count : prefixTupleSize*count+lengthSize*count]
suffixes := indexBuff[prefixTupleSize*count+lengthSize*count : indexSize(count)]
cnt64 := uint64(count)

tuples := indexBuff[:prefixTupleSize*cnt64]
lengths := indexBuff[prefixTupleSize*cnt64 : prefixTupleSize*cnt64+lengthSize*cnt64]
suffixes := indexBuff[prefixTupleSize*cnt64+lengthSize*cnt64 : indexSize(count)]
footer := indexBuff[indexSize(count):]

chunks2 := count / 2
chunks2 := cnt64 / 2

r := NewOffsetsReader(bytes.NewReader(lengths))
_, err := io.ReadFull(r, offsetsBuff1)
Expand Down Expand Up @@ -369,7 +371,7 @@ func (ti onHeapTableIndex) findPrefix(prefix uint64) (idx uint32) {
}

func (ti onHeapTableIndex) tupleAt(idx uint32) (prefix uint64, ord uint32) {
off := int64(prefixTupleSize * idx)
off := prefixTupleSize * int64(idx)
b := ti.prefixTuples[off : off+prefixTupleSize]

prefix = binary.BigEndian.Uint64(b[:])
Expand All @@ -378,13 +380,13 @@ func (ti onHeapTableIndex) tupleAt(idx uint32) (prefix uint64, ord uint32) {
}

func (ti onHeapTableIndex) prefixAt(idx uint32) uint64 {
off := int64(prefixTupleSize * idx)
off := prefixTupleSize * int64(idx)
b := ti.prefixTuples[off : off+hash.PrefixLen]
return binary.BigEndian.Uint64(b)
}

func (ti onHeapTableIndex) ordinalAt(idx uint32) uint32 {
off := int64(prefixTupleSize*idx) + hash.PrefixLen
off := prefixTupleSize*int64(idx) + hash.PrefixLen
b := ti.prefixTuples[off : off+ordinalSize]
return binary.BigEndian.Uint32(b)
}
Expand All @@ -394,10 +396,10 @@ func (ti onHeapTableIndex) offsetAt(ord uint32) uint64 {
chunks1 := ti.count - ti.count/2
var b []byte
if ord < chunks1 {
off := int64(offsetSize * ord)
off := offsetSize * int64(ord)
b = ti.offsets1[off : off+offsetSize]
} else {
off := int64(offsetSize * (ord - chunks1))
off := offsetSize * int64(ord-chunks1)
b = ti.offsets2[off : off+offsetSize]
}
return binary.BigEndian.Uint64(b)
Expand All @@ -406,7 +408,7 @@ func (ti onHeapTableIndex) offsetAt(ord uint32) uint64 {
func (ti onHeapTableIndex) ordinals() ([]uint32, error) {
// todo: |o| is not accounted for in the memory quota
o := make([]uint32, ti.count)
for i, off := uint32(0), 0; i < ti.count; i, off = i+1, off+prefixTupleSize {
for i, off := uint32(0), uint64(0); i < ti.count; i, off = i+1, off+prefixTupleSize {
b := ti.prefixTuples[off+hash.PrefixLen : off+prefixTupleSize]
o[i] = binary.BigEndian.Uint32(b)
}
Expand All @@ -416,7 +418,7 @@ func (ti onHeapTableIndex) ordinals() ([]uint32, error) {
func (ti onHeapTableIndex) prefixes() ([]uint64, error) {
// todo: |p| is not accounted for in the memory quota
p := make([]uint64, ti.count)
for i, off := uint32(0), 0; i < ti.count; i, off = i+1, off+prefixTupleSize {
for i, off := uint32(0), uint64(0); i < ti.count; i, off = i+1, off+prefixTupleSize {
b := ti.prefixTuples[off : off+hash.PrefixLen]
p[i] = binary.BigEndian.Uint64(b)
}
Expand All @@ -425,7 +427,7 @@ func (ti onHeapTableIndex) prefixes() ([]uint64, error) {

func (ti onHeapTableIndex) hashAt(idx uint32) hash.Hash {
// Get tuple
off := int64(prefixTupleSize * idx)
off := prefixTupleSize * int64(idx)
tuple := ti.prefixTuples[off : off+prefixTupleSize]

// Get prefix, ordinal, and suffix
Expand Down
60 changes: 60 additions & 0 deletions go/store/nbs/table_index_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,66 @@ func TestParseTableIndex(t *testing.T) {
}
}

func TestParseLargeTableIndex(t *testing.T) {
if isRaceEnabled() {
t.SkipNow()
}

// This is large enough for the NBS table index to overflow uint32s on certain index calculations.
numChunks := uint32(320331063)
idxSize := indexSize(numChunks)
sz := idxSize + footerSize
idxBuf := make([]byte, sz)
copy(idxBuf[idxSize+12:], magicNumber)
binary.BigEndian.PutUint32(idxBuf[idxSize:], numChunks)
binary.BigEndian.PutUint64(idxBuf[idxSize+4:], uint64(numChunks)*4*1024)

var prefix uint64

off := 0
// Write Tuples
for i := uint32(0); i < numChunks; i++ {
binary.BigEndian.PutUint64(idxBuf[off:], prefix)
binary.BigEndian.PutUint32(idxBuf[off+hash.PrefixLen:], i)
prefix += 2
off += prefixTupleSize
}

// Write Lengths
for i := uint32(0); i < numChunks; i++ {
binary.BigEndian.PutUint32(idxBuf[off:], 4*1024)
off += lengthSize
}

// Write Suffixes
for i := uint32(0); i < numChunks; i++ {
off += hash.SuffixLen
}

idx, err := parseTableIndex(context.Background(), idxBuf, &UnlimitedQuotaProvider{})
require.NoError(t, err)
h := &hash.Hash{}
h[7] = 2
ord, err := idx.lookupOrdinal(h)
require.NoError(t, err)
assert.Equal(t, uint32(1), ord)
h[7] = 1
ord, err = idx.lookupOrdinal(h)
require.NoError(t, err)
assert.Equal(t, numChunks, ord)
// This is the end of the chunk, not the beginning.
assert.Equal(t, uint64(8*1024), idx.offsetAt(1))
assert.Equal(t, uint64(2), idx.prefixAt(1))
assert.Equal(t, uint32(1), idx.ordinalAt(1))
h[7] = 2
assert.Equal(t, *h, idx.hashAt(1))
entry, ok, err := idx.lookup(h)
require.NoError(t, err)
assert.True(t, ok)
assert.Equal(t, uint64(4*1024), entry.Offset())
assert.Equal(t, uint32(4*1024), entry.Length())
}

func BenchmarkFindPrefix(b *testing.B) {
ctx := context.Background()
f, err := os.Open("testdata/0oa7mch34jg1rvghrnhr4shrp2fm4ftd.idx")
Expand Down

0 comments on commit 7ee9b5c

Please sign in to comment.