Skip to content

Commit

Permalink
colblk: fix UintsBuilder bug in default mode
Browse files Browse the repository at this point in the history
When we are in `useDefault` mode, the slice can be smaller than the
number of rows. In that case, the existing code was reading outside
the underlying slice and writing out garbage.

We should reconsider the use of `UnsafeSlice` here, as I don't think
it's buying us much performance-wise.
  • Loading branch information
RaduBerinde committed Oct 20, 2024
1 parent 908960b commit ce4dcc9
Show file tree
Hide file tree
Showing 2 changed files with 137 additions and 12 deletions.
114 changes: 114 additions & 0 deletions sstable/colblk/testdata/uints
Original file line number Diff line number Diff line change
Expand Up @@ -580,3 +580,117 @@ uints
├── 08-12: x 4a420f00 # data[1] = 1000010
├── 12-16: x 54420f00 # data[2] = 1000020
└── 16-20: x 5e420f00 # data[3] = 1000030


init default-zero
----

write
0:1 1:1 2:1 3:1 4:1
----

# Verify the case where the last rows have not been set.
finish rows=100
----
uints
├── 000-001: x 01 # encoding: 1b
├── 001-002: x 01 # data[0] = 1
├── 002-003: x 01 # data[1] = 1
├── 003-004: x 01 # data[2] = 1
├── 004-005: x 01 # data[3] = 1
├── 005-006: x 01 # data[4] = 1
├── 006-007: x 00 # data[5] = 0
├── 007-008: x 00 # data[6] = 0
├── 008-009: x 00 # data[7] = 0
├── 009-010: x 00 # data[8] = 0
├── 010-011: x 00 # data[9] = 0
├── 011-012: x 00 # data[10] = 0
├── 012-013: x 00 # data[11] = 0
├── 013-014: x 00 # data[12] = 0
├── 014-015: x 00 # data[13] = 0
├── 015-016: x 00 # data[14] = 0
├── 016-017: x 00 # data[15] = 0
├── 017-018: x 00 # data[16] = 0
├── 018-019: x 00 # data[17] = 0
├── 019-020: x 00 # data[18] = 0
├── 020-021: x 00 # data[19] = 0
├── 021-022: x 00 # data[20] = 0
├── 022-023: x 00 # data[21] = 0
├── 023-024: x 00 # data[22] = 0
├── 024-025: x 00 # data[23] = 0
├── 025-026: x 00 # data[24] = 0
├── 026-027: x 00 # data[25] = 0
├── 027-028: x 00 # data[26] = 0
├── 028-029: x 00 # data[27] = 0
├── 029-030: x 00 # data[28] = 0
├── 030-031: x 00 # data[29] = 0
├── 031-032: x 00 # data[30] = 0
├── 032-033: x 00 # data[31] = 0
├── 033-034: x 00 # data[32] = 0
├── 034-035: x 00 # data[33] = 0
├── 035-036: x 00 # data[34] = 0
├── 036-037: x 00 # data[35] = 0
├── 037-038: x 00 # data[36] = 0
├── 038-039: x 00 # data[37] = 0
├── 039-040: x 00 # data[38] = 0
├── 040-041: x 00 # data[39] = 0
├── 041-042: x 00 # data[40] = 0
├── 042-043: x 00 # data[41] = 0
├── 043-044: x 00 # data[42] = 0
├── 044-045: x 00 # data[43] = 0
├── 045-046: x 00 # data[44] = 0
├── 046-047: x 00 # data[45] = 0
├── 047-048: x 00 # data[46] = 0
├── 048-049: x 00 # data[47] = 0
├── 049-050: x 00 # data[48] = 0
├── 050-051: x 00 # data[49] = 0
├── 051-052: x 00 # data[50] = 0
├── 052-053: x 00 # data[51] = 0
├── 053-054: x 00 # data[52] = 0
├── 054-055: x 00 # data[53] = 0
├── 055-056: x 00 # data[54] = 0
├── 056-057: x 00 # data[55] = 0
├── 057-058: x 00 # data[56] = 0
├── 058-059: x 00 # data[57] = 0
├── 059-060: x 00 # data[58] = 0
├── 060-061: x 00 # data[59] = 0
├── 061-062: x 00 # data[60] = 0
├── 062-063: x 00 # data[61] = 0
├── 063-064: x 00 # data[62] = 0
├── 064-065: x 00 # data[63] = 0
├── 065-066: x 00 # data[64] = 0
├── 066-067: x 00 # data[65] = 0
├── 067-068: x 00 # data[66] = 0
├── 068-069: x 00 # data[67] = 0
├── 069-070: x 00 # data[68] = 0
├── 070-071: x 00 # data[69] = 0
├── 071-072: x 00 # data[70] = 0
├── 072-073: x 00 # data[71] = 0
├── 073-074: x 00 # data[72] = 0
├── 074-075: x 00 # data[73] = 0
├── 075-076: x 00 # data[74] = 0
├── 076-077: x 00 # data[75] = 0
├── 077-078: x 00 # data[76] = 0
├── 078-079: x 00 # data[77] = 0
├── 079-080: x 00 # data[78] = 0
├── 080-081: x 00 # data[79] = 0
├── 081-082: x 00 # data[80] = 0
├── 082-083: x 00 # data[81] = 0
├── 083-084: x 00 # data[82] = 0
├── 084-085: x 00 # data[83] = 0
├── 085-086: x 00 # data[84] = 0
├── 086-087: x 00 # data[85] = 0
├── 087-088: x 00 # data[86] = 0
├── 088-089: x 00 # data[87] = 0
├── 089-090: x 00 # data[88] = 0
├── 090-091: x 00 # data[89] = 0
├── 091-092: x 00 # data[90] = 0
├── 092-093: x 00 # data[91] = 0
├── 093-094: x 00 # data[92] = 0
├── 094-095: x 00 # data[93] = 0
├── 095-096: x 00 # data[94] = 0
├── 096-097: x 00 # data[95] = 0
├── 097-098: x 00 # data[96] = 0
├── 098-099: x 00 # data[97] = 0
├── 099-100: x 00 # data[98] = 0
└── 100-101: x 00 # data[99] = 0
35 changes: 23 additions & 12 deletions sstable/colblk/uints.go
Original file line number Diff line number Diff line change
Expand Up @@ -361,15 +361,15 @@ func (b *UintBuilder) Finish(col, rows int, offset uint32, buf []byte) uint32 {
// values slice if there's actually a non-nil ptr.
var valuesSlice []uint64
if b.array.elems.ptr != nil {
valuesSlice = b.array.elems.Slice(rows)
valuesSlice = b.array.elems.Slice(min(rows, b.array.n))
}
return uintColumnFinish(minimum, valuesSlice, e, offset, buf)
return uintColumnFinish(rows, minimum, valuesSlice, e, offset, buf)
}

// uintColumnFinish finishes the column of unsigned integers of type T, applying
// the given encoding.
func uintColumnFinish(
minimum uint64, values []uint64, e UintEncoding, offset uint32, buf []byte,
rows int, minimum uint64, values []uint64, e UintEncoding, offset uint32, buf []byte,
) uint32 {
buf[offset] = byte(e)
offset++
Expand All @@ -390,28 +390,28 @@ func uintColumnFinish(

switch e.Width() {
case 1:
dest := makeUnsafeRawSlice[uint8](unsafe.Pointer(&buf[offset])).Slice(len(values))
dest := makeUnsafeRawSlice[uint8](unsafe.Pointer(&buf[offset])).Slice(rows)
reduceUints(deltaBase, values, dest)

case 2:
dest := makeUnsafeRawSlice[uint16](unsafe.Pointer(&buf[offset])).Slice(len(values))
dest := makeUnsafeRawSlice[uint16](unsafe.Pointer(&buf[offset])).Slice(rows)
reduceUints(deltaBase, values, dest)

case 4:
dest := makeUnsafeRawSlice[uint32](unsafe.Pointer(&buf[offset])).Slice(len(values))
dest := makeUnsafeRawSlice[uint32](unsafe.Pointer(&buf[offset])).Slice(rows)
reduceUints(deltaBase, values, dest)

case 8:
if deltaBase != 0 {
panic("unreachable")
}
dest := makeUnsafeRawSlice[uint64](unsafe.Pointer(&buf[offset])).Slice(len(values))
dest := makeUnsafeRawSlice[uint64](unsafe.Pointer(&buf[offset])).Slice(rows)
copy(dest, values)

default:
panic("unreachable")
}
return offset + uint32(len(values))*width
return offset + uint32(rows)*width
}

// WriteDebug implements Encoder.
Expand All @@ -425,17 +425,28 @@ func (b *UintBuilder) WriteDebug(w io.Writer, rows int) {
// reduceUints[uint8](10, []uint64{10, 11, 12}, dst)
//
// could be used to reduce a slice of uint64 values to uint8 values {0, 1, 2}.
//
// The values slice can be smaller than dst; in that case, the values between
// len(values) and len(dst) are assumed to be 0.
func reduceUints[N constraints.Integer](minimum uint64, values []uint64, dst []N) {
for i := 0; i < len(values); i++ {
for i, v := range values {
if invariants.Enabled {
if values[i] < minimum {
if v < minimum {
panic("incorrect minimum value")
}
if values[i]-minimum > uint64(N(0)-1) {
if v-minimum > uint64(N(0)-1) {
panic("incorrect target width")
}
}
dst[i] = N(values[i] - minimum)
dst[i] = N(v - minimum)
}
if invariants.Enabled && len(values) < len(dst) {
if minimum != 0 {
panic("incorrect minimum value")
}
}
for i := len(values); i < len(dst); i++ {
dst[i] = 0
}
}

Expand Down

0 comments on commit ce4dcc9

Please sign in to comment.