-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathskiplist.go
327 lines (283 loc) · 10.2 KB
/
skiplist.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
package goskip
import (
"math/rand"
"sync/atomic"
"unsafe"
)
const (
DefaultMaxHeight = 24
LayerSize = int(unsafe.Sizeof(uint32(0)))
defaultLevelP = 0.5
)
// A note on CPU Cache Performance:
// Try to align your structures with cache line size.
// For structures that generally contain data elements of different types,
// the compiler tries to maintain proper alignment of data elements by
// inserting unused memory between elements. This technique is known as
// 'Padding'. Also, the compiler aligns the entire structure to its most
// strictly aligned member. The compiler may also increase the size of
// structure, if necessary, to make it a multiple of the alignment
// by adding padding at the end of the structure.
// This is known as 'Tail Padding'.
// Since the size of the listNode is bigger than usual cache line
// size (64 Bytes on most CPU), it is beneficial to keep
// the parts of the structure that are touched together in memory,
// which may lead to improved cache locality.
type node struct {
// valSize (uint32) and valOffset (uint32) is encoded as a single uint64 value,
// so that we can atomically load this values.
// valSize -> bits 0-31
// valOffset -> bits 32-63
encodedValue uint64
// Height of the current node.
// 1 < height < maxHeight.
// Height never changes even on
height uint8 // 1 Byte
// Key offset and size in allocator.
// keyOffset and keySize never changes
keyOffset uint32 // 4 Bytes
keySize uint16 // 2 Bytes
// Instead of creating a new node for the same key,
// use existing node to save space (also for improved cache locality).
// layers[0] represents the first level (which has height 1).
// layers definition should always be at the end of the struct since
// we might allocate less space for it in memory to reduce
// memory footprint.
layers [DefaultMaxHeight]uint32 // 4 Byte for each level. Average: 32 Byte
}
// SkipList represents a skip list.
type SkipList struct {
// Current height of the list.
height uint32
// Head node.
head *node
// Value Allocator is used for allocating node values.
valueAllocator *Allocator
// Root Allocator is used for every other allocation: key, node etc...
mainAllocator *Allocator
}
// newNode creates a node with given height and returns node and the offset.
func newNode(allc *Allocator, valAllc *Allocator, height uint8, key []byte, val []byte) (*node, uint32) {
truncatedSize := (DefaultMaxHeight - int(height)) * LayerSize
keyOffset := allc.putBytes(key)
nodeOffset := allc.makeNode(uint32(truncatedSize))
valOffset := valAllc.putBytes(val)
node := allc.getNode(nodeOffset)
node.height = height
node.keyOffset = keyOffset
// TODO key length should not exceed uint16 size, assert.
node.keySize = uint16(len(key))
node.encodeValue(valOffset, uint32(len(val)))
return node, nodeOffset
}
// Returns the offset of next node on given level (height).
func (n *node) getNextNodeOffset(level uint8) uint32 {
// Layers can be altered concurrently. Use atomic load.
return atomic.LoadUint32(&n.layers[level])
}
// casNextNodeOffset sets the offset of next node on a level, using cas operation.
func (n *node) casNextNodeOffset(level uint8, old uint32, new uint32) bool {
return atomic.CompareAndSwapUint32(&n.layers[level], old, new)
}
// Set value offset and size.
func (n *node) encodeValue(offset uint32, size uint32) {
encodedValue := uint64(size)
encodedValue += uint64(offset) << 32
atomic.StoreUint64(&n.encodedValue, encodedValue)
}
// Returns (offset, size) of value.
func (n *node) decodeValue() (uint32, uint32) {
val := atomic.LoadUint64(&n.encodedValue)
return uint32(val >> 32), uint32(val)
}
// Returns a pointer to node with given offset.
func (s *SkipList) getNodeKey(node *node) []byte {
return s.mainAllocator.getBytes(node.keyOffset, uint32(node.keySize))
}
// Returns a pointer to node with given offset.
func (s *SkipList) getNodeValue(node *node) []byte {
offset, size := node.decodeValue()
return s.valueAllocator.getBytes(offset, size)
}
// Returns a pointer to node with given offset.
// Returns nil if given offset is nilAllocatorOffset.
func (s *SkipList) getNode(offset uint32) *node {
return s.mainAllocator.getNode(offset)
}
// Set the value of given node.
func (s *SkipList) setNodeValue(node *node, val []byte) {
newValSize := uint32(len(val))
valOffset, valSize := node.decodeValue()
// If node currently has a value and the size of the value is bigger than new value,
// use previous value's memory for new value.
if valSize >= newValSize {
s.valueAllocator.putBytesTo(valOffset, val)
node.encodeValue(valOffset, newValSize)
return
}
// If the length of new node is greater than odl node, forget old value
// and allocate new space in memory for new value.
newOffset := s.valueAllocator.putBytes(val)
node.encodeValue(newOffset, newValSize)
}
// getNeighbourNodes returns nodes (x, y, z) where
// x is the right most node asserts x.key <= key.
// y is the offset of next node of x, on this level.
// z is true whenever x.key = key
// startingNode is used as starting point for search (hint from previous calls.)
// Key of the startingNode always must be less then given key.
func (s *SkipList) getNeighbourNodes(startingNode *node, level uint8, key []byte) (*node, uint32, bool) {
currentNode := startingNode
for {
nextNodeOffset := currentNode.getNextNodeOffset(level)
nextNode := s.getNode(nextNodeOffset)
if nextNode == nil {
return currentNode, nextNodeOffset, false
}
nextNodeKey := s.getNodeKey(nextNode)
cmp := compareKeys(nextNodeKey, key)
if cmp == 0 {
return nextNode, nextNodeOffset, true
}
if cmp > 0 {
return currentNode, nextNodeOffset, false
}
// Go to next node in this level.
currentNode = nextNode
}
}
// getClosestNode returns the closest node whose key <= given key,
// along with a boolean value which designates whether returned nodes key
// is equal to given key.
func (s *SkipList) getClosestNode(key []byte) (*node, bool) {
currentNode := s.head // points to current node in loop.
level := uint8(s.height - 1) // current level
for {
nextNodeOffset := currentNode.getNextNodeOffset(level)
nextNode := s.getNode(nextNodeOffset)
// if there is no next node on this level.
if nextNode == nil {
// If there are still levels to descend to, go one level lower.
// Else, return current node as closest node.
if level > 0 {
level--
continue // used here to get rid of nested if/else.
}
return currentNode, false
}
nextNodeKey := s.getNodeKey(nextNode)
cmp := compareKeys(nextNodeKey, key)
// If the node is found, return it.
if cmp == 0 {
return nextNode, true
}
// If next node's key is less than the key we search for,
// set currentNode as next node and continue searching.
// Go right in skip list.
if cmp < 0 {
currentNode = nextNode
continue
}
// If next node's key is greater than the key we search for,
// a) Go one level down in skip list if possible.
if level > 0 {
level--
continue // used here to get rid of nested if/else.
}
// b) Return current node as closest node if this is the base level.
return currentNode, false
}
}
// Get returns value for given key if it exists,
// returns nil otherwise.
func (s *SkipList) Get(key []byte) []byte {
node, found := s.getClosestNode(key)
if !found {
return nil
}
return s.getNodeValue(node)
}
// Set inserts given key-value pair into list.
func (s *SkipList) Set(key []byte, val []byte) {
listHeight := s.getHeight()
var prevNodes [DefaultMaxHeight + 1]*node
var nextNodesOffsets [DefaultMaxHeight + 1]uint32
var sameKey bool
prevNodes[listHeight] = s.head
// Starting from the highest level, find the suitable position to
// put the node for each level.
for i := int(listHeight) - 1; i >= 0; i-- {
prevNodes[i], nextNodesOffsets[i], sameKey = s.getNeighbourNodes(prevNodes[i+1], uint8(i), key)
// if there is already a node with the same key, there is no need to
// create a new node, just use it.
if sameKey {
s.setNodeValue(prevNodes[i], val)
return
}
}
// Create a new node.
nodeHeight := s.randomHeight()
node, nodeOffset := newNode(s.mainAllocator, s.valueAllocator, nodeHeight, key, val)
// If the height of new node is more then current height of the list,
// try to increase list height using CAS, since it can be changed.
for listHeight < uint32(nodeHeight) {
if s.casHeight(listHeight, uint32(nodeHeight)) {
break
}
listHeight = s.getHeight()
}
// Start from base level and put new node.
// We are starting from base level to prevent race conditions
// for neighbors.
for i := uint8(0); i < nodeHeight; i++ {
for {
// if prevNodes[i] is nil, it means at the time of search,
// list height was less than new node length.
// We need to discover this level.
if prevNodes[i] == nil {
prevNodes[i], nextNodesOffsets[i], _ = s.getNeighbourNodes(s.head, i, key)
}
node.layers[i] = nextNodesOffsets[i]
if prevNodes[i].casNextNodeOffset(i, nextNodesOffsets[i], nodeOffset) {
break
}
// If cas fails, we need to rediscover this level
prevNodes[i], nextNodesOffsets[i], sameKey = s.getNeighbourNodes(prevNodes[i], i, key)
if sameKey {
s.setNodeValue(prevNodes[i], val)
return
}
}
}
}
// casHeight performs cas operation on list height.
func (s *SkipList) casHeight(old uint32, new uint32) bool {
return atomic.CompareAndSwapUint32(&s.height, old, new)
}
// getHeight returns the maximum height of its nodes.
func (s *SkipList) getHeight() uint32 {
// Height can be modified concurrently, so we need to load it atomically.
return atomic.LoadUint32(&s.height)
}
// randomHeight returns a random number between 1 and maxHeight of the list.
func (s *SkipList) randomHeight() uint8 {
height := 1
// TODO is there a more performant way to produce random level?
for height < DefaultMaxHeight && rand.Float64() < defaultLevelP {
height++
}
return uint8(height)
}
// NewSkipList initializes and returns a skip list instance.
func NewSkipList(allocatorSize uint32) *SkipList {
mainAllocator := newAllocator(allocatorSize)
valueAllocator := newAllocator(allocatorSize)
var emptyValue []byte
head, _ := newNode(mainAllocator, valueAllocator, DefaultMaxHeight, emptyValue, emptyValue)
return &SkipList{
mainAllocator: mainAllocator,
valueAllocator: valueAllocator,
height: 0,
head: head,
}
}