Skip to content

Commit

Permalink
sstable: perf improvement for ResetForReuse
Browse files Browse the repository at this point in the history
Resetting iterators takes a non-trivial amount of time (15% in the
RandSeekInSST benchmark). Most of it is because we're copying fairly
large structures by value.

We change `ResetForReuse` to reset the iterator in-place, and we use
an unsafe trick to clear out only a section of `singleLevelIterator`.

Benchmark (baseline includes a few other in-flight PRs):
```
name                              old time/op  new time/op  delta
RandSeekInSST/v4/single-level-10  1.15µs ± 0%  1.02µs ± 1%  -10.84%  (p=0.001 n=7+7)
RandSeekInSST/v4/two-level-10     1.99µs ± 2%  1.71µs ± 2%  -13.85%  (p=0.000 n=8+8)
RandSeekInSST/v5/single-level-10   944ns ± 1%   823ns ± 1%  -12.86%  (p=0.000 n=8+8)
RandSeekInSST/v5/two-level-10     1.44µs ± 1%  1.24µs ± 7%  -13.79%  (p=0.000 n=7+8)
```
  • Loading branch information
RaduBerinde committed Oct 20, 2024
1 parent 24d6d75 commit dacffe5
Show file tree
Hide file tree
Showing 9 changed files with 58 additions and 50 deletions.
10 changes: 3 additions & 7 deletions sstable/colblk/data_block.go
Original file line number Diff line number Diff line change
Expand Up @@ -1009,13 +1009,9 @@ func (i *DataBlockIter) IsDataInvalidated() bool {

// ResetForReuse resets the iterator for reuse, retaining buffers and
// configuration supplied to InitOnce, to avoid future allocations.
func (i *DataBlockIter) ResetForReuse() DataBlockIter {
return DataBlockIter{
keySchema: i.keySchema,
getLazyValuer: i.getLazyValuer,
keyIter: PrefixBytesIter{Buf: i.keyIter.Buf},
keySeeker: i.keySeeker,
}
func (i *DataBlockIter) ResetForReuse() {
i.d = nil
i.kv = base.InternalKV{}
}

// IsLowerBound implements the block.DataBlockIterator interface.
Expand Down
28 changes: 13 additions & 15 deletions sstable/colblk/index_block.go
Original file line number Diff line number Diff line change
Expand Up @@ -207,19 +207,15 @@ var _ block.IndexBlockIterator = (*IndexIter)(nil)
func (i *IndexIter) InitWithDecoder(
compare base.Compare, split base.Split, d *IndexBlockDecoder, transforms block.IterTransforms,
) {
*i = IndexIter{
compare: compare,
split: split,
d: d,
n: int(d.bd.header.Rows),
row: -1,
h: i.h,
allocDecoder: i.allocDecoder,
keyBuf: i.keyBuf,
syntheticPrefix: transforms.SyntheticPrefix,
syntheticSuffix: transforms.SyntheticSuffix,
noTransforms: !transforms.SyntheticPrefix.IsSet() && !transforms.SyntheticSuffix.IsSet(),
}
i.compare = compare
i.split = split
i.d = d
i.n = int(d.bd.header.Rows)
i.row = -1
i.syntheticPrefix = transforms.SyntheticPrefix
i.syntheticSuffix = transforms.SyntheticSuffix
i.noTransforms = !transforms.SyntheticPrefix.IsSet() && !transforms.SyntheticSuffix.IsSet()
// Leave h, allocDecoder, keyBuf unchanged.
}

// Init initializes an iterator from the provided block data slice.
Expand Down Expand Up @@ -252,11 +248,13 @@ func (i *IndexIter) RowIndex() int {

// ResetForReuse resets the IndexIter for reuse, retaining buffers to avoid
// future allocations.
func (i *IndexIter) ResetForReuse() IndexIter {
func (i *IndexIter) ResetForReuse() {
if invariants.Enabled && i.h != (block.BufferHandle{}) {
panic(errors.AssertionFailedf("IndexIter reset for reuse with non-empty handle"))
}
return IndexIter{ /* nothing to retain */ }
i.d = nil
i.syntheticPrefix = nil
i.syntheticSuffix = nil
}

// Valid returns true if the iterator is currently positioned at a valid block
Expand Down
4 changes: 2 additions & 2 deletions sstable/reader_iter.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ type dataBlockIterator[D any] interface {

// ResetForReuse resets the data block iterator for reuse, retaining buffers
// to avoid future allocations.
ResetForReuse() D
ResetForReuse()

*D // non-interface type constraint element
}
Expand All @@ -45,7 +45,7 @@ type indexBlockIterator[I any] interface {

// ResetForReuse resets the index iterator for reuse, retaining buffers to
// avoid future allocations.
ResetForReuse() I
ResetForReuse()

*I // non-interface type constraint element
}
Expand Down
31 changes: 21 additions & 10 deletions sstable/reader_iter_single_lvl.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,8 @@ type singleLevelIterator[I any, PI indexBlockIterator[I], D any, PD dataBlockIte
// endKeyInclusive is set to force the iterator to treat the upper field as
// inclusive while iterating instead of exclusive.
endKeyInclusive bool
index I
indexFilterRH objstorage.ReadHandle
indexFilterRHPrealloc objstorageprovider.PreallocatedReadHandle
data D
dataRH objstorage.ReadHandle
dataRHPrealloc objstorageprovider.PreallocatedReadHandle
// dataBH refers to the last data block that the iterator considered
Expand Down Expand Up @@ -172,6 +170,11 @@ type singleLevelIterator[I any, PI indexBlockIterator[I], D any, PD dataBlockIte

transforms IterTransforms

// All fields above this field are cleared when resetting the iterator for reuse.
clearForResetBoundary struct{}

index I
data D
// inPool is set to true before putting the iterator in the reusable pool;
// used to detect double-close.
inPool bool
Expand All @@ -183,6 +186,9 @@ type singleLevelIterator[I any, PI indexBlockIterator[I], D any, PD dataBlockIte
// If the iterator is embedded within a twoLevelIterator, pool is nil and
// the twoLevelIterator.pool field may be non-nil.
pool *sync.Pool

// NOTE: any new fields should be added above the clearForResetBoundary field,
// unless they need to be retained when resetting the iterator.
}

// singleLevelIterator implements the base.InternalIterator interface.
Expand Down Expand Up @@ -384,13 +390,18 @@ func (i *singleLevelIterator[I, PI, D, PD]) SetupForCompaction() {
}
}

func (i *singleLevelIterator[I, PI, D, PD]) resetForReuse() singleLevelIterator[I, PI, D, PD] {
return singleLevelIterator[I, PI, D, PD]{
index: PI(&i.index).ResetForReuse(),
data: PD(&i.data).ResetForReuse(),
pool: i.pool,
inPool: true,
}
const clearLen = unsafe.Offsetof(singleLevelIteratorRowBlocks{}.clearForResetBoundary)

// Assert that clearLen is consistent betwen the row and columnar implementations.
const clearLenColBlocks = unsafe.Offsetof(singleLevelIteratorColumnBlocks{}.clearForResetBoundary)
const _ uintptr = clearLen - clearLenColBlocks
const _ uintptr = clearLenColBlocks - clearLen

func (i *singleLevelIterator[I, PI, D, PD]) resetForReuse() {
*(*[clearLen]byte)(unsafe.Pointer(i)) = [clearLen]byte{}
PI(&i.index).ResetForReuse()
PD(&i.data).ResetForReuse()
i.inPool = true
}

func (i *singleLevelIterator[I, PI, D, PD]) initBounds() {
Expand Down Expand Up @@ -1565,7 +1576,7 @@ func firstError(err0, err1 error) error {
func (i *singleLevelIterator[I, PI, D, PD]) Close() error {
err := i.closeInternal()
pool := i.pool
*i = i.resetForReuse()
i.resetForReuse()
if pool != nil {
pool.Put(i)
}
Expand Down
9 changes: 4 additions & 5 deletions sstable/reader_iter_two_lvl.go
Original file line number Diff line number Diff line change
Expand Up @@ -1045,12 +1045,11 @@ func (i *twoLevelIterator[I, PI, D, PD]) Close() error {
}
pool := i.pool
err := i.secondLevel.closeInternal()
i.secondLevel.resetForReuse()
err = firstError(err, PI(&i.topLevelIndex).Close())
*i = twoLevelIterator[I, PI, D, PD]{
secondLevel: i.secondLevel.resetForReuse(),
topLevelIndex: PI(&i.topLevelIndex).ResetForReuse(),
pool: pool,
}
PI(&i.topLevelIndex).ResetForReuse()
i.useFilterBlock = false
i.lastBloomFilterMatched = false
if pool != nil {
pool.Put(i)
}
Expand Down
3 changes: 2 additions & 1 deletion sstable/rowblk/rowblk_fragment_iter.go
Original file line number Diff line number Diff line change
Expand Up @@ -286,8 +286,9 @@ func (i *fragmentIter) Close() {
return
}

i.blockIter.ResetForReuse()
*i = fragmentIter{
blockIter: i.blockIter.ResetForReuse(),
blockIter: i.blockIter,
closeCheck: i.closeCheck,
startKeyBuf: i.startKeyBuf[:0],
endKeyBuf: i.endKeyBuf[:0],
Expand Down
4 changes: 2 additions & 2 deletions sstable/rowblk/rowblk_index_iter.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ func (i *IndexIter) InitHandle(

// ResetForReuse resets the index iterator for reuse, retaining buffers to avoid
// future allocations.
func (i *IndexIter) ResetForReuse() IndexIter {
return IndexIter{iter: i.iter.ResetForReuse()}
func (i *IndexIter) ResetForReuse() {
i.iter.ResetForReuse()
}

// Valid returns true if the iterator is currently positioned at a valid block
Expand Down
17 changes: 10 additions & 7 deletions sstable/rowblk/rowblk_iter.go
Original file line number Diff line number Diff line change
Expand Up @@ -303,13 +303,16 @@ func (i *Iter) IsDataInvalidated() bool {

// ResetForReuse resets the blockIter for reuse, retaining buffers to avoid
// future allocations.
func (i *Iter) ResetForReuse() Iter {
return Iter{
fullKey: i.fullKey[:0],
cached: i.cached[:0],
cachedBuf: i.cachedBuf[:0],
firstUserKeyWithPrefixBuf: i.firstUserKeyWithPrefixBuf[:0],
data: nil,
func (i *Iter) ResetForReuse() {
fullKey := i.fullKey[:0]
cached := i.cached[:0]
cachedBuf := i.cachedBuf[:0]
firstUserKeyWithPrefixBuf := i.firstUserKeyWithPrefixBuf[:0]
*i = Iter{
fullKey: fullKey,
cached: cached,
cachedBuf: cachedBuf,
firstUserKeyWithPrefixBuf: firstUserKeyWithPrefixBuf,
}
}

Expand Down
2 changes: 1 addition & 1 deletion sstable/rowblk/rowblk_rewrite.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,6 @@ func (r *Rewriter) RewriteSuffixes(
end.Trailer = r.scratchKey.Trailer
r.keyAlloc, end.UserKey = r.keyAlloc.Copy(r.scratchKey.UserKey)

r.iter = r.iter.ResetForReuse()
r.iter.ResetForReuse()
return start, end, r.writer.Finish(), nil
}

0 comments on commit dacffe5

Please sign in to comment.