diff --git a/go/store/nbs/race_off.go b/go/store/nbs/race_off.go new file mode 100644 index 0000000000..afd1ad72ce --- /dev/null +++ b/go/store/nbs/race_off.go @@ -0,0 +1,22 @@ +// Copyright 2024 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build !race +// +build !race + +package nbs + +func isRaceEnabled() bool { + return false +} diff --git a/go/store/nbs/race_on.go b/go/store/nbs/race_on.go new file mode 100644 index 0000000000..3d8ab55a75 --- /dev/null +++ b/go/store/nbs/race_on.go @@ -0,0 +1,22 @@ +// Copyright 2024 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build race +// +build race + +package nbs + +func isRaceEnabled() bool { + return true +} diff --git a/go/store/nbs/table_index.go b/go/store/nbs/table_index.go index 43e2873348..bf210c4378 100644 --- a/go/store/nbs/table_index.go +++ b/go/store/nbs/table_index.go @@ -234,12 +234,14 @@ func newOnHeapTableIndex(indexBuff []byte, offsetsBuff1 []byte, count uint32, to return onHeapTableIndex{}, ErrWrongBufferSize } - tuples := indexBuff[:prefixTupleSize*count] - lengths := indexBuff[prefixTupleSize*count : prefixTupleSize*count+lengthSize*count] - suffixes := indexBuff[prefixTupleSize*count+lengthSize*count : indexSize(count)] + cnt64 := uint64(count) + + tuples := indexBuff[:prefixTupleSize*cnt64] + lengths := indexBuff[prefixTupleSize*cnt64 : prefixTupleSize*cnt64+lengthSize*cnt64] + suffixes := indexBuff[prefixTupleSize*cnt64+lengthSize*cnt64 : indexSize(count)] footer := indexBuff[indexSize(count):] - chunks2 := count / 2 + chunks2 := cnt64 / 2 r := NewOffsetsReader(bytes.NewReader(lengths)) _, err := io.ReadFull(r, offsetsBuff1) @@ -369,7 +371,7 @@ func (ti onHeapTableIndex) findPrefix(prefix uint64) (idx uint32) { } func (ti onHeapTableIndex) tupleAt(idx uint32) (prefix uint64, ord uint32) { - off := int64(prefixTupleSize * idx) + off := prefixTupleSize * int64(idx) b := ti.prefixTuples[off : off+prefixTupleSize] prefix = binary.BigEndian.Uint64(b[:]) @@ -378,13 +380,13 @@ func (ti onHeapTableIndex) tupleAt(idx uint32) (prefix uint64, ord uint32) { } func (ti onHeapTableIndex) prefixAt(idx uint32) uint64 { - off := int64(prefixTupleSize * idx) + off := prefixTupleSize * int64(idx) b := ti.prefixTuples[off : off+hash.PrefixLen] return binary.BigEndian.Uint64(b) } func (ti onHeapTableIndex) ordinalAt(idx uint32) uint32 { - off := int64(prefixTupleSize*idx) + hash.PrefixLen + off := prefixTupleSize*int64(idx) + hash.PrefixLen b := ti.prefixTuples[off : off+ordinalSize] return binary.BigEndian.Uint32(b) } @@ -394,10 +396,10 @@ func (ti onHeapTableIndex) offsetAt(ord uint32) uint64 { chunks1 := ti.count - ti.count/2 var b []byte if ord < chunks1 { - off := int64(offsetSize * ord) + off := offsetSize * int64(ord) b = ti.offsets1[off : off+offsetSize] } else { - off := int64(offsetSize * (ord - chunks1)) + off := offsetSize * int64(ord-chunks1) b = ti.offsets2[off : off+offsetSize] } return binary.BigEndian.Uint64(b) @@ -406,7 +408,7 @@ func (ti onHeapTableIndex) offsetAt(ord uint32) uint64 { func (ti onHeapTableIndex) ordinals() ([]uint32, error) { // todo: |o| is not accounted for in the memory quota o := make([]uint32, ti.count) - for i, off := uint32(0), 0; i < ti.count; i, off = i+1, off+prefixTupleSize { + for i, off := uint32(0), uint64(0); i < ti.count; i, off = i+1, off+prefixTupleSize { b := ti.prefixTuples[off+hash.PrefixLen : off+prefixTupleSize] o[i] = binary.BigEndian.Uint32(b) } @@ -416,7 +418,7 @@ func (ti onHeapTableIndex) ordinals() ([]uint32, error) { func (ti onHeapTableIndex) prefixes() ([]uint64, error) { // todo: |p| is not accounted for in the memory quota p := make([]uint64, ti.count) - for i, off := uint32(0), 0; i < ti.count; i, off = i+1, off+prefixTupleSize { + for i, off := uint32(0), uint64(0); i < ti.count; i, off = i+1, off+prefixTupleSize { b := ti.prefixTuples[off : off+hash.PrefixLen] p[i] = binary.BigEndian.Uint64(b) } @@ -425,7 +427,7 @@ func (ti onHeapTableIndex) prefixes() ([]uint64, error) { func (ti onHeapTableIndex) hashAt(idx uint32) hash.Hash { // Get tuple - off := int64(prefixTupleSize * idx) + off := prefixTupleSize * int64(idx) tuple := ti.prefixTuples[off : off+prefixTupleSize] // Get prefix, ordinal, and suffix diff --git a/go/store/nbs/table_index_test.go b/go/store/nbs/table_index_test.go index 3eabd99595..cf984d581f 100644 --- a/go/store/nbs/table_index_test.go +++ b/go/store/nbs/table_index_test.go @@ -56,6 +56,66 @@ func TestParseTableIndex(t *testing.T) { } } +func TestParseLargeTableIndex(t *testing.T) { + if isRaceEnabled() { + t.SkipNow() + } + + // This is large enough for the NBS table index to overflow uint32s on certain index calculations. + numChunks := uint32(320331063) + idxSize := indexSize(numChunks) + sz := idxSize + footerSize + idxBuf := make([]byte, sz) + copy(idxBuf[idxSize+12:], magicNumber) + binary.BigEndian.PutUint32(idxBuf[idxSize:], numChunks) + binary.BigEndian.PutUint64(idxBuf[idxSize+4:], uint64(numChunks)*4*1024) + + var prefix uint64 + + off := 0 + // Write Tuples + for i := uint32(0); i < numChunks; i++ { + binary.BigEndian.PutUint64(idxBuf[off:], prefix) + binary.BigEndian.PutUint32(idxBuf[off+hash.PrefixLen:], i) + prefix += 2 + off += prefixTupleSize + } + + // Write Lengths + for i := uint32(0); i < numChunks; i++ { + binary.BigEndian.PutUint32(idxBuf[off:], 4*1024) + off += lengthSize + } + + // Write Suffixes + for i := uint32(0); i < numChunks; i++ { + off += hash.SuffixLen + } + + idx, err := parseTableIndex(context.Background(), idxBuf, &UnlimitedQuotaProvider{}) + require.NoError(t, err) + h := &hash.Hash{} + h[7] = 2 + ord, err := idx.lookupOrdinal(h) + require.NoError(t, err) + assert.Equal(t, uint32(1), ord) + h[7] = 1 + ord, err = idx.lookupOrdinal(h) + require.NoError(t, err) + assert.Equal(t, numChunks, ord) + // This is the end of the chunk, not the beginning. + assert.Equal(t, uint64(8*1024), idx.offsetAt(1)) + assert.Equal(t, uint64(2), idx.prefixAt(1)) + assert.Equal(t, uint32(1), idx.ordinalAt(1)) + h[7] = 2 + assert.Equal(t, *h, idx.hashAt(1)) + entry, ok, err := idx.lookup(h) + require.NoError(t, err) + assert.True(t, ok) + assert.Equal(t, uint64(4*1024), entry.Offset()) + assert.Equal(t, uint32(4*1024), entry.Length()) +} + func BenchmarkFindPrefix(b *testing.B) { ctx := context.Background() f, err := os.Open("testdata/0oa7mch34jg1rvghrnhr4shrp2fm4ftd.idx")