-
Notifications
You must be signed in to change notification settings - Fork 17
/
btree.h
497 lines (461 loc) · 20.6 KB
/
btree.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
// This implemantion appears to have originally come from the following location:
// https://en.wikibooks.org/wiki/Algorithm_Implementation/Trees/B%2B_tree
// It has been modified from its original form somewhat.
/* Copyright (C) 2006 David Garcia
* You may use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of this software subject to the following conditions:
* THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
* OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#if !defined BPLUSTREE_HPP_227824
#define BPLUSTREE_HPP_227824
/*
On older Linux systems, this is required for glibc to define std::posix_memalign
#if !defined(_XOPEN_SOURCE) || (_XOPEN_SOURCE < 600)
#define _XOPEN_SOURCE 600
#endif
*/
#include <assert.h>
#include <stdlib.h>
#include <string.h>
// DEBUG
#include <iostream>
using std::cout;
using std::endl;
// Recent Mac OS X includes posix_memalign: If you are using an ancient version,
// undefine this
#define HAVE_POSIX_MEMALIGN
#ifdef HAVE_POSIX_MEMALIGN
// Nothing to do
#else
// TODO: This is not aligned! It doesn't matter for this implementation, but it could
static inline int posix_memalign(void** result, int, size_t bytes) {
*result = malloc(bytes);
return *result == NULL;
}
#endif
template <typename KEY, typename VALUE, unsigned N, unsigned M,
unsigned INNER_NODE_PADDING= 0, unsigned LEAF_NODE_PADDING= 0,
unsigned NODE_ALIGNMENT= 64>
class BPlusTree
{
public:
// Builds a new empty tree.
BPlusTree()
: depth(0),
root(new LeafNode())
{
// Previously used BOOST_STATIC_ASSERT:
// N must be greater than two to make the split of
// two inner nodes sensible.
assert(N>2);
// Leaf nodes must be able to hold at least one element
assert(M>0);
// DEBUG
// cout << "sizeof(LeafNode)==" << sizeof(LeafNode) << endl;
// cout << "sizeof(InnerNode)==" << sizeof(InnerNode) << endl;
}
~BPlusTree() {
// TODO: Previously used boost:pool. To remove dependency, we now just leak
}
// Inserts a pair (key, value). If there is a previous pair with
// the same key, the old value is overwritten with the new one.
void insert(KEY key, VALUE value) {
// GCC warns that this may be used uninitialized, even though that is untrue.
InsertionResult result = { KEY(), 0, 0 };
bool was_split;
if( depth == 0 ) {
// The root is a leaf node
assert( *reinterpret_cast<NodeType*>(root) ==
NODE_LEAF);
was_split= leaf_insert(reinterpret_cast<LeafNode*>
(root), key, value, &result);
} else {
// The root is an inner node
assert( *reinterpret_cast<NodeType*>
(root) == NODE_INNER );
was_split= inner_insert(reinterpret_cast<InnerNode*>
(root), depth, key, value, &result);
}
if( was_split ) {
// The old root was splitted in two parts.
// We have to create a new root pointing to them
depth++;
root= new InnerNode();
InnerNode* rootProxy=
reinterpret_cast<InnerNode*>(root);
rootProxy->num_keys= 1;
rootProxy->keys[0]= result.key;
rootProxy->children[0]= result.left;
rootProxy->children[1]= result.right;
}
}
// Looks for the given key. If it is not found, it returns false,
// if it is found, it returns true and copies the associated value
// unless the pointer is null.
bool find(const KEY& key, VALUE* value= 0) const {
const InnerNode* inner;
const void* node= root;
unsigned d= depth, index;
while( d-- != 0 ) {
inner= reinterpret_cast<const InnerNode*>(node);
assert( inner->type == NODE_INNER );
index= inner_position_for(key, inner->keys, inner->num_keys);
assert(index < inner->num_keys + 1);
node= inner->children[index];
}
const LeafNode* leaf= reinterpret_cast<const LeafNode*>(node);
assert( leaf->type == NODE_LEAF );
index= leaf_position_for(key, leaf->keys, leaf->num_keys);
assert(index <= leaf->num_keys);
if( index < leaf->num_keys && leaf->keys[index] == key ) {
if( value != 0 ) {
*value= leaf->values[index];
}
if (leaf->values[index])
return true;
else return false;
} else {
return false;
}
}
// Looks for the given key. If it is not found, it returns false,
// if it is found, it returns true and sets
// the associated value to NULL
// Note: del currently leaks memory. Fix later.
bool del(const KEY& key) {
InnerNode* inner;
void* node= root;
unsigned d= depth, index;
while( d-- != 0 ) {
inner= reinterpret_cast<InnerNode*>(node);
assert( inner->type == NODE_INNER );
index= inner_position_for(key, inner->keys, inner->num_keys);
node= inner->children[index];
}
LeafNode* leaf= reinterpret_cast<LeafNode*>(node);
assert( leaf->type == NODE_LEAF );
index= leaf_position_for(key, leaf->keys, leaf->num_keys);
if( leaf->keys[index] == key ) {
leaf->values[index] = 0;
return true;
} else {
return false;
}
}
// Finds the LAST item that is < key. That is, the next item in the tree is not < key, but this
// item is. If we were to insert key into the tree, it would go after this item. This is weird,
// but is easier than implementing iterators. In STL terms, this would be "lower_bound(key)--"
// WARNING: This does *not* work when values are deleted. Thankfully, TPC-C does not use deletes.
bool findLastLessThan(const KEY& key, VALUE* value = 0, KEY* out_key = 0) const {
const void* node = root;
unsigned int d = depth;
while( d-- != 0 ) {
const InnerNode* inner = reinterpret_cast<const InnerNode*>(node);
assert( inner->type == NODE_INNER );
unsigned int pos = inner_position_for(key, inner->keys, inner->num_keys);
// We need to rewind in the case where they are equal
if (pos > 0 && key == inner->keys[pos-1]) {
pos -= 1;
}
assert(pos == 0 || inner->keys[pos-1] < key);
node = inner->children[pos];
}
const LeafNode* leaf= reinterpret_cast<const LeafNode*>(node);
assert( leaf->type == NODE_LEAF );
unsigned int pos = leaf_position_for(key, leaf->keys, leaf->num_keys);
if (pos <= leaf->num_keys) {
pos -= 1;
if (pos < leaf->num_keys && key == leaf->keys[pos]) {
pos -= 1;
}
if (pos < leaf->num_keys) {
assert(leaf->keys[pos] < key);
if (leaf->values[pos]) {
if (value != NULL) {
*value = leaf->values[pos];
}
if (out_key != NULL) {
*out_key = leaf->keys[pos];
}
return true;
} else {
// This is a deleted key! Try again with the new key value
// HACK: This is because this implementation doesn't do deletes correctly. However,
// this makes it work. We need this for TPC-C undo. The solution is to use a
// more complete b-tree implementation.
return findLastLessThan(leaf->keys[pos], value, out_key);
}
}
}
return false;
}
// Returns the size of an inner node
// It is useful when optimizing performance with cache alignment.
unsigned sizeof_inner_node() const {
return sizeof(InnerNode);
}
// Returns the size of a leaf node.
// It is useful when optimizing performance with cache alignment.
unsigned sizeof_leaf_node() const {
return sizeof(LeafNode);
}
private:
// Used when debugging
enum NodeType {NODE_INNER=0xDEADBEEF, NODE_LEAF=0xC0FFEE};
// Leaf nodes store pairs of keys and values.
struct LeafNode {
#ifndef NDEBUG
// Leaf nodes need to memset the keys: when inserting the first key, we
// check to see if we need to overwrite or insert
LeafNode() : type(NODE_LEAF), num_keys(0) {memset(keys,0,sizeof(keys));}
const NodeType type;
#else
LeafNode() : num_keys(0) {memset(keys,0,sizeof(keys));}
#endif
unsigned num_keys;
KEY keys[M];
VALUE values[M];
// unsigned char _pad[LEAF_NODE_PADDING];
};
// Inner nodes store pointers to other nodes interleaved with keys.
struct InnerNode {
#ifndef NDEBUG
InnerNode() : type(NODE_INNER), num_keys(0) {}
const NodeType type;
#else
InnerNode() : num_keys(0) {}
#endif
unsigned num_keys;
KEY keys[N];
void* children[N+1];
// unsigned char _pad[INNER_NODE_PADDING];
};
// Custom allocator that returns aligned blocks of memory
template <unsigned ALIGNMENT>
struct AlignedMemoryAllocator {
typedef std::size_t size_type;
typedef std::ptrdiff_t difference_type;
static char* malloc(const size_type bytes)
{
void* result;
if( posix_memalign(&result, ALIGNMENT, bytes) != 0 ) {
result= 0;
}
// Alternative: result= std::malloc(bytes);
return reinterpret_cast<char*>(result);
}
static void free(char* const block)
{ std::free(block); }
};
// Data type returned by the private insertion methods.
struct InsertionResult {
KEY key;
void* left;
void* right;
};
// Returns the position where 'key' should be inserted in a leaf node
// that has the given keys.
static unsigned leaf_position_for(const KEY& key, const KEY* keys,
unsigned num_keys) {
// Simple linear search. Faster for small values of N or M
unsigned k= 0;
while((k < num_keys) && (keys[k]<key)) {
++k;
}
return k;
/*
// Binary search. It is faster when N or M is > 100,
// but the cost of the division renders it useless
// for smaller values of N or M.
XXX--- needs to be re-checked because the linear search
has changed since the last update to the following ---XXX
int left= -1, right= num_keys, middle;
while( right -left > 1 ) {
middle= (left+right)/2;
if( keys[middle] < key) {
left= middle;
} else {
right= middle;
}
}
//assert( right == k );
return unsigned(right);
*/
}
// Returns the position where 'key' should be inserted in an inner node
// that has the given keys.
static inline unsigned inner_position_for(const KEY& key, const KEY* keys,
unsigned num_keys) {
// Simple linear search. Faster for small values of N or M
unsigned k= 0;
while((k < num_keys) && ((keys[k]<key) || (keys[k]==key))) {
++k;
}
return k;
// Binary search is faster when N or M is > 100,
// but the cost of the division renders it useless
// for smaller values of N or M.
}
bool leaf_insert(LeafNode* node, KEY& key,
VALUE& value, InsertionResult* result) {
assert( node->type == NODE_LEAF );
assert( node->num_keys <= M );
bool was_split= false;
// Simple linear search
unsigned i= leaf_position_for(key, node->keys, node->num_keys);
if( node->num_keys == M ) {
// The node was full. We must split it
unsigned treshold= (M+1)/2;
LeafNode* new_sibling= new LeafNode();
new_sibling->num_keys= node->num_keys -treshold;
for(unsigned j=0; j < new_sibling->num_keys; ++j) {
new_sibling->keys[j]= node->keys[treshold+j];
new_sibling->values[j]=
node->values[treshold+j];
}
node->num_keys= treshold;
if( i < treshold ) {
// Inserted element goes to left sibling
leaf_insert_nonfull(node, key, value, i);
} else {
// Inserted element goes to right sibling
leaf_insert_nonfull(new_sibling, key, value,
i-treshold);
}
// Notify the parent about the split
was_split= true;
result->key= new_sibling->keys[0];
result->left= node;
result->right= new_sibling;
} else {
// The node was not full
leaf_insert_nonfull(node, key, value, i);
}
return was_split;
}
static void leaf_insert_nonfull(LeafNode* node, KEY& key, VALUE& value,
unsigned index) {
assert( node->type == NODE_LEAF );
assert( node->num_keys < M );
assert( index < M );
assert( index <= node->num_keys );
if( node->keys[index] == key ) {
// We are inserting a duplicate value.
// Simply overwrite the old one
node->values[index]= value;
} else {
// The key we are inserting is unique
for(unsigned i=node->num_keys; i > index; --i) {
node->keys[i]= node->keys[i-1];
node->values[i]= node->values[i-1];
}
node->num_keys++;
node->keys[index]= key;
node->values[index]= value;
}
}
bool inner_insert(InnerNode* node, unsigned current_depth, KEY& key,
VALUE& value, InsertionResult* result) {
assert( node->type == NODE_INNER );
assert( current_depth != 0 );
// Early split if node is full.
// This is not the canonical algorithm for B+ trees,
// but it is simpler and does not break the definition.
bool was_split= false;
if( node->num_keys == N ) {
// Split
unsigned treshold= (N+1)/2;
InnerNode* new_sibling= new InnerNode();
new_sibling->num_keys= node->num_keys -treshold;
for(unsigned i=0; i < new_sibling->num_keys; ++i) {
new_sibling->keys[i]= node->keys[treshold+i];
new_sibling->children[i]=
node->children[treshold+i];
}
new_sibling->children[new_sibling->num_keys]=
node->children[node->num_keys];
node->num_keys= treshold-1;
// Set up the return variable
was_split= true;
result->key= node->keys[treshold-1];
result->left= node;
result->right= new_sibling;
// Now insert in the appropriate sibling
if( key < result->key ) {
inner_insert_nonfull(node, current_depth, key, value);
} else {
inner_insert_nonfull(new_sibling, current_depth, key,
value);
}
} else {
// No split
inner_insert_nonfull(node, current_depth, key, value);
}
return was_split;
}
void inner_insert_nonfull(InnerNode* node, unsigned current_depth, KEY& key,
VALUE& value) {
assert( node->type == NODE_INNER );
assert( node->num_keys < N );
assert( current_depth != 0 );
// Simple linear search
unsigned index= inner_position_for(key, node->keys,
node->num_keys);
// GCC warns that this may be used uninitialized, even though that is untrue.
InsertionResult result = { KEY(), 0, 0 };
bool was_split;
if( current_depth-1 == 0 ) {
// The children are leaf nodes
for(unsigned kk=0; kk < node->num_keys+1; ++kk) {
assert( *reinterpret_cast<NodeType*>
(node->children[kk]) == NODE_LEAF );
}
was_split= leaf_insert(reinterpret_cast<LeafNode*>
(node->children[index]), key, value, &result);
} else {
// The children are inner nodes
for(unsigned kk=0; kk < node->num_keys+1; ++kk) {
assert( *reinterpret_cast<NodeType*>
(node->children[kk]) == NODE_INNER );
}
InnerNode* child= reinterpret_cast<InnerNode*>
(node->children[index]);
was_split= inner_insert( child, current_depth-1, key, value,
&result);
}
if( was_split ) {
if( index == node->num_keys ) {
// Insertion at the rightmost key
node->keys[index]= result.key;
node->children[index]= result.left;
node->children[index+1]= result.right;
node->num_keys++;
} else {
// Insertion not at the rightmost key
node->children[node->num_keys+1]=
node->children[node->num_keys];
for(unsigned i=node->num_keys; i!=index; --i) {
node->children[i]= node->children[i-1];
node->keys[i]= node->keys[i-1];
}
node->children[index]= result.left;
node->children[index+1]= result.right;
node->keys[index]= result.key;
node->num_keys++;
}
} // else the current node is not affected
}
typedef AlignedMemoryAllocator<NODE_ALIGNMENT> AlignedAllocator;
// Depth of the tree. A tree of depth 0 only has a leaf node.
unsigned depth;
// Pointer to the root node. It may be a leaf or an inner node, but
// it is never null.
void* root;
};
#endif // !defined BPLUSTREE_HPP_227824