From 1f53faedfd7bf385dfa2721c485e3af6509b229e Mon Sep 17 00:00:00 2001 From: Jackson Owens Date: Fri, 27 Sep 2024 11:33:02 -0400 Subject: [PATCH] sstable: update TestWriter to exercise TableFormatPebblev5 Update the TestWriter test to exercise the new TableFormatPebblev5 that enables columnar blocks. This commit also fixes a couple issues with key ordering checks. --- internal/base/internal.go | 5 + sstable/colblk/data_block.go | 3 - sstable/colblk/keyspan.go | 15 ++ sstable/colblk_writer.go | 29 +++- sstable/random_test.go | 5 +- sstable/testdata/writer_v5 | 282 +++++++++++++++++++++++++++++++++++ sstable/writer_test.go | 15 +- 7 files changed, 340 insertions(+), 14 deletions(-) create mode 100644 sstable/testdata/writer_v5 diff --git a/internal/base/internal.go b/internal/base/internal.go index 0fc719f4b5..0c123eb4ce 100644 --- a/internal/base/internal.go +++ b/internal/base/internal.go @@ -230,6 +230,11 @@ func MakeTrailer(seqNum SeqNum, kind InternalKeyKind) InternalKeyTrailer { return (InternalKeyTrailer(seqNum) << 8) | InternalKeyTrailer(kind) } +// String imlements the fmt.Stringer interface. +func (t InternalKeyTrailer) String() string { + return fmt.Sprintf("%s,%s", SeqNum(t>>8), InternalKeyKind(t&0xff)) +} + // SeqNum returns the sequence number component of the trailer. func (t InternalKeyTrailer) SeqNum() SeqNum { return SeqNum(t >> 8) diff --git a/sstable/colblk/data_block.go b/sstable/colblk/data_block.go index 985d769cf6..29a70ebeef 100644 --- a/sstable/colblk/data_block.go +++ b/sstable/colblk/data_block.go @@ -183,9 +183,6 @@ func (w *defaultKeyWriter) ComparePrev(key []byte) KeyComparison { lp := w.prefixes.UnsafeGet(w.prefixes.nKeys - 1) cmpv.CommonPrefixLen = int32(crbytes.CommonPrefix(lp, key[:cmpv.PrefixLen])) - if invariants.Enabled && bytes.Compare(lp, key[:cmpv.PrefixLen]) > 0 { - panic(errors.AssertionFailedf("keys are not in order: %q > %q", lp, key[:cmpv.PrefixLen])) - } // Keys are written in order and prefixes must be sorted lexicograpgically, // so CommonPrefixLen == PrefixLen implies that the keys share the same // logical prefix. (If the previous key had a prefix longer than diff --git a/sstable/colblk/keyspan.go b/sstable/colblk/keyspan.go index dac1cad183..d8e50f0de1 100644 --- a/sstable/colblk/keyspan.go +++ b/sstable/colblk/keyspan.go @@ -132,6 +132,21 @@ func (w *KeyspanBlockWriter) UnsafeBoundaryKeys() (smallest, largest base.Intern return smallest, largest } +// UnsafeLastSpan returns the start and end user keys of the last span written +// to the block and the trailer of its largest key. The returned keys point +// directly into the block writer's memory and must not be mutated. +func (w *KeyspanBlockWriter) UnsafeLastSpan() ( + start, end []byte, + largestTrailer base.InternalKeyTrailer, +) { + if w.keyCount == 0 { + return nil, nil, 0 + } + return w.boundaryUserKeys.UnsafeGet(w.boundaryUserKeys.rows - 2), + w.boundaryUserKeys.UnsafeGet(w.boundaryUserKeys.rows - 1), + base.InternalKeyTrailer(w.trailers.Get(w.keyCount - 1)) +} + // Size returns the size of the pending block. func (w *KeyspanBlockWriter) Size() int { off := blockHeaderSize(keyspanColumnCount, keyspanHeaderSize) diff --git a/sstable/colblk_writer.go b/sstable/colblk_writer.go index f4493d8a48..f9f517a8b7 100644 --- a/sstable/colblk_writer.go +++ b/sstable/colblk_writer.go @@ -254,11 +254,31 @@ func (w *RawColumnWriter) EncodeSpan(span keyspan.Span) error { for _, k := range span.Keys { w.meta.updateSeqNum(k.SeqNum()) } + + blockWriter := &w.rangeKeyBlock if span.Keys[0].Kind() == base.InternalKeyKindRangeDelete { - w.rangeDelBlock.AddSpan(span) - return nil + blockWriter = &w.rangeDelBlock + } + if !w.disableKeyOrderChecks && blockWriter.KeyCount() > 0 { + // Check that spans are being added in fragmented order. If the two + // tombstones overlap, their start and end keys must be identical. + prevStart, prevEnd, prevTrailer := blockWriter.UnsafeLastSpan() + if w.opts.Comparer.Equal(prevStart, span.Start) && w.opts.Comparer.Equal(prevEnd, span.End) { + if prevTrailer < span.Keys[0].Trailer { + w.err = errors.Errorf("pebble: keys must be added in order: %s-%s:{(#%s)}, %s", + w.opts.Comparer.FormatKey(prevStart), + w.opts.Comparer.FormatKey(prevEnd), + prevTrailer, span.Pretty(w.opts.Comparer.FormatKey)) + } + } else if c := w.opts.Comparer.Compare(prevEnd, span.Start); c > 0 { + w.err = errors.Errorf("pebble: keys must be added in order: %s-%s:{(#%s)}, %s", + w.opts.Comparer.FormatKey(prevStart), + w.opts.Comparer.FormatKey(prevEnd), + prevTrailer, span.Pretty(w.opts.Comparer.FormatKey)) + return w.err + } } - w.rangeKeyBlock.AddSpan(span) + blockWriter.AddSpan(span) return nil } @@ -759,6 +779,9 @@ func (w *RawColumnWriter) Close() (err error) { w.err = err } }() + if w.layout.writable == nil { + return w.err + } // Finish the last data block and send it to the write queue if it contains // any pending KVs. diff --git a/sstable/random_test.go b/sstable/random_test.go index 12bac379b9..c3e51a4255 100644 --- a/sstable/random_test.go +++ b/sstable/random_test.go @@ -274,8 +274,9 @@ type randomTableConfig struct { func (cfg *randomTableConfig) readerOpts() ReaderOptions { rOpts := ReaderOptions{ - Comparer: testkeys.Comparer, - Filters: map[string]FilterPolicy{}, + Comparer: testkeys.Comparer, + KeySchema: cfg.wopts.KeySchema, + Filters: map[string]FilterPolicy{}, } if cfg.wopts.FilterPolicy != nil { rOpts.Filters[cfg.wopts.FilterPolicy.Name()] = cfg.wopts.FilterPolicy diff --git a/sstable/testdata/writer_v5 b/sstable/testdata/writer_v5 new file mode 100644 index 0000000000..1fdcd5523e --- /dev/null +++ b/sstable/testdata/writer_v5 @@ -0,0 +1,282 @@ +build +a.SET.1:a +---- +point: [a#1,SET-a#1,SET] +seqnums: [1-1] + +scan +---- +a#1,SET:a + +scan-range-del +---- + +scan-range-key +---- + +build +a.SET.1:a +b.DEL.2: +c.MERGE.3:c +EncodeSpan: d-e:{(#4,RANGEDEL)} +f.SET.5:f +g.DEL.6: +h.MERGE.7:h +EncodeSpan: i-j:{(#8,RANGEDEL)} +EncodeSpan: j-k:{(#9,RANGEKEYDEL)} +EncodeSpan: k-l:{(#10,RANGEKEYUNSET,@5)} +EncodeSpan: l-m:{(#11,RANGEKEYSET,@10,foo)} +---- +point: [a#1,SET-h#7,MERGE] +rangedel: [d#4,RANGEDEL-j#inf,RANGEDEL] +rangekey: [j#9,RANGEKEYDEL-m#inf,RANGEKEYSET] +seqnums: [1-11] + +build +a.SET.1:a +b.DEL.2: +c.MERGE.3:c +EncodeSpan: d-e:{(#4,RANGEDEL)} +f.SET.5:f +g.DEL.6: +h.MERGE.7:h +EncodeSpan: i-j:{(#8,RANGEDEL)} +---- +point: [a#1,SET-h#7,MERGE] +rangedel: [d#4,RANGEDEL-j#inf,RANGEDEL] +seqnums: [1-8] + +scan +---- +a#1,SET:a +b#2,DEL: +c#3,MERGE:c +f#5,SET:f +g#6,DEL: +h#7,MERGE:h + +scan-range-del +---- +d-e:{(#4,RANGEDEL)} +i-j:{(#8,RANGEDEL)} + +# 3: a-----------m +# 2: f------------s +# 1: j---------------z + +build +EncodeSpan: a-f:{(#3,RANGEDEL)} +EncodeSpan: f-j:{(#3,RANGEDEL) (#2,RANGEDEL)} +EncodeSpan: j-m:{(#3,RANGEDEL) (#2,RANGEDEL) (#1,RANGEDEL)} +EncodeSpan: m-s:{(#2,RANGEDEL) (#1,RANGEDEL)} +EncodeSpan: s-z:{(#1,RANGEDEL)} +---- +rangedel: [a#3,RANGEDEL-z#inf,RANGEDEL] +seqnums: [1-3] + +scan +---- + +scan-range-del +---- +a-f:{(#3,RANGEDEL)} +f-j:{(#3,RANGEDEL) (#2,RANGEDEL)} +j-m:{(#3,RANGEDEL) (#2,RANGEDEL) (#1,RANGEDEL)} +m-s:{(#2,RANGEDEL) (#1,RANGEDEL)} +s-z:{(#1,RANGEDEL)} + +scan-range-key +---- + +# The range tombstone upper bound is exclusive, so a point operation +# on that same key will be the actual boundary. + +build +EncodeSpan: a-b:{(#3,RANGEDEL)} +b.SET.4:c +---- +point: [b#4,SET-b#4,SET] +rangedel: [a#3,RANGEDEL-b#inf,RANGEDEL] +seqnums: [3-4] + +build +EncodeSpan: a-b:{(#3,RANGEDEL)} +b.SET.2:c +---- +point: [b#2,SET-b#2,SET] +rangedel: [a#3,RANGEDEL-b#inf,RANGEDEL] +seqnums: [2-3] + +build +EncodeSpan: a-c:{(#3,RANGEDEL)} +b.SET.2:c +---- +point: [b#2,SET-b#2,SET] +rangedel: [a#3,RANGEDEL-c#inf,RANGEDEL] +seqnums: [2-3] + +# Keys must be added in order. + +build +a.SET.1:b +a.SET.2:c +---- +pebble: keys must be added in strictly increasing order: a#2,SET + +build +b.SET.1:a +a.SET.2:b +---- +pebble: keys must be added in strictly increasing order: a#2,SET + +build +b.RANGEDEL.1:c +a.RANGEDEL.2:b +---- +RANGEDEL must be added through EncodeSpan + +build +EncodeSpan: b-c:{(#1,RANGEDEL)} +EncodeSpan: a-b:{(#2,RANGEDEL)} +---- +pebble: keys must be added in order: b-c:{(#1,RANGEDEL)}, a-b:{(#2,RANGEDEL)} + +build-raw +EncodeSpan: a-c:{(#1,RANGEDEL)} +EncodeSpan: a-c:{(#2,RANGEDEL)} +---- +pebble: keys must be added in order: a-c:{(#1,RANGEDEL)}, a-c:{(#2,RANGEDEL)} + +build-raw +EncodeSpan: a-c:{(#1,RANGEDEL)} +EncodeSpan: b-d:{(#2,RANGEDEL)} +---- +pebble: keys must be added in order: a-c:{(#1,RANGEDEL)}, b-d:{(#2,RANGEDEL)} + +build-raw +EncodeSpan: a-c:{(#2,RANGEDEL)} +EncodeSpan: a-d:{(#1,RANGEDEL)} +---- +pebble: keys must be added in order: a-c:{(#2,RANGEDEL)}, a-d:{(#1,RANGEDEL)} + +build-raw +EncodeSpan: a-c:{(#1,RANGEDEL)} +EncodeSpan: c-d:{(#2,RANGEDEL)} +---- +rangedel: [a#1,RANGEDEL-d#inf,RANGEDEL] +seqnums: [1-2] + +build-raw +EncodeSpan: a-b:{(#2,RANGEKEYSET,@10,foo) (#1,RANGEKEYSET,@10,foo)} +---- +rangekey: [a#2,RANGEKEYSET-b#inf,RANGEKEYSET] +seqnums: [1-2] + +build-raw +EncodeSpan: b-c:{(#2,RANGEKEYSET,@10,foo)} +EncodeSpan: a-b:{(#1,RANGEKEYSET,@10,foo)} +---- +pebble: keys must be added in order: b-c:{(#2,RANGEKEYSET)}, a-b:{(#1,RANGEKEYSET,@10,foo)} + +build-raw +EncodeSpan: a-c:{(#1,RANGEKEYSET,@10,foo)} +EncodeSpan: c-d:{(#2,RANGEKEYSET,@10,foo)} +---- +rangekey: [a#1,RANGEKEYSET-d#inf,RANGEKEYSET] +seqnums: [1-2] + +# Range keys may have perfectly aligned spans (including sequence numbers), +# though the key kinds must be ordered (descending). + +build-raw +EncodeSpan: a-b:{(#1,RANGEKEYSET,@10,foo) (#1,RANGEKEYUNSET,@10) (#1,RANGEKEYDEL)} +---- +rangekey: [a#1,RANGEKEYSET-b#inf,RANGEKEYDEL] +seqnums: [1-1] + +# Setting a very small index-block-size results in a two-level index. + +build block-size=1 index-block-size=1 +a.SET.1:a +b.SET.1:b +c.SET.1:c +---- +point: [a#1,SET-c#1,SET] +seqnums: [1-1] + +layout +---- + 0 data (74) + 79 data (74) + 158 data (74) + 237 index (36) + 278 index (44) + 327 index (44) + 376 top-index (53) + 434 properties (480) + 919 meta-index (33) + 957 footer (53) + 1010 EOF + +# Exercise the non-Reader layout-decoding codepath. + +decode-layout +---- + 0 data (74) + 79 data (74) + 158 data (74) + 237 index (36) + 278 index (44) + 327 index (44) + 376 top-index (53) + 434 properties (480) + 919 meta-index (33) + 957 footer (53) + 1010 EOF + +scan +---- +a#1,SET:a +b#1,SET:b +c#1,SET:c + +# Enabling leveldb format disables the creation of a two-level index +# (the input data here mirrors the test case above). + +build leveldb block-size=1 index-block-size=1 +a.SET.1:a +b.SET.1:b +c.SET.1:c +---- +point: [a#1,SET-c#1,SET] +seqnums: [1-1] + +layout +---- + 0 data (21) + 26 data (21) + 52 data (21) + 78 index (45) + 128 properties (549) + 682 meta-index (33) + 720 leveldb-footer (48) + 768 EOF + +# Range keys, if present, are shown in the layout. + +build +EncodeSpan: a-b:{(#3,RANGEKEYSET,@3,foo)} +EncodeSpan: b-c:{(#2,RANGEKEYSET,@2,bar)} +EncodeSpan: c-d:{(#1,RANGEKEYSET,@1,baz)} +---- +rangekey: [a#3,RANGEKEYSET-d#inf,RANGEKEYSET] +seqnums: [1-3] + +layout +---- + 0 index (28) + 33 range-key (84) + 122 properties (441) + 568 meta-index (57) + 630 footer (53) + 683 EOF diff --git a/sstable/writer_test.go b/sstable/writer_test.go index b707622689..91b1556130 100644 --- a/sstable/writer_test.go +++ b/sstable/writer_test.go @@ -39,12 +39,15 @@ type tableFormatFile struct { } func testWriterParallelism(t *testing.T, parallelism bool) { - for _, format := range []TableFormat{TableFormatPebblev2, TableFormatPebblev3} { - tdFile := "testdata/writer" - if format == TableFormatPebblev3 { - tdFile = "testdata/writer_v3" - } - t.Run(format.String(), func(t *testing.T) { runDataDriven(t, tdFile, format, parallelism) }) + formatFiles := []tableFormatFile{ + {TableFormat: TableFormatPebblev2, File: "testdata/writer"}, + {TableFormat: TableFormatPebblev3, File: "testdata/writer_v3"}, + {TableFormat: TableFormatPebblev5, File: "testdata/writer_v5"}, + } + for _, tff := range formatFiles { + t.Run(tff.TableFormat.String(), func(t *testing.T) { + runDataDriven(t, tff.File, tff.TableFormat, parallelism) + }) } } func TestWriter(t *testing.T) {