-
Notifications
You must be signed in to change notification settings - Fork 0
/
blob.py
450 lines (391 loc) · 13.6 KB
/
blob.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
from array import array
import cPickle
import hashlib
from file import PAGE_SIZE
def dirties(fn):
"""
This decorator marks a function as one that causes this blob to become dirty. It will take care of marking the blob
as dirty upon entry.
"""
def wrapped(self, *args, **kwargs):
self._assert_valid_state()
if self.clean:
self.dirty = True
self._blob = None
return fn(self, *args, **kwargs)
return wrapped
def validate(fn):
"""
This decorator requires the blob to be valid before the function is entered. It takes care of testing for validity,
fetching data if needed, and marking the blob as valid.
"""
def wrapped(self, *args, **kwargs):
self._assert_valid_state()
# this also means that self._key != None, because of invariants
if self.invalid:
self._blob = self.cntl.getdata(self._key)
self._deserialize_data(self._blob)
self.valid = True
return fn(self, *args, **kwargs)
return wrapped
class Blob(object):
def __init__(self, key, cntl, parent, valid = False):
"""
key: the key of the data that this blob represents.
cntl: the controller object this blob should use to access the backing store.
parent: the parent blob of this blob.
valid: whether this blob is valid -- that is, should the data be fetched from
the backend before this blob can be read?
"""
self._blob = None
self.parent = parent
self._key = key
self.valid = valid
self.cntl = cntl
self._assert_valid_state()
def my_name(self):
if self.parent == None:
return "/"
else:
return self.parent.my_name() + "/" + self.parent.locate(self)
def _assert_valid_state(self):
assert(self._key != None or self.valid == True)
def _delete_data(self):
"""
You're being removed from cache, clear your data
"""
raise NotImplementedError()
def evict(self):
"""
This is called by a cache manager to let me know that I need to
evict my data field from main memory. It only does anything if this blob
is both dirty and valid.
"""
self._assert_valid_state()
if self.valid:
self.commit()
for child in self.children:
child.evict()
self.valid = False
self._delete_data()
self._blob = None
self._assert_valid_state()
def commit(self):
"""
Calls _flush_down on all children, then calls _flush_up on self. When commit
returns to an external caller, the file system will be in a consistent and
up-to-date state. If this node is clean, commit is a no-op.
"""
self._assert_valid_state()
if self.dirty:
root = self
while root.parent != None:
root = root.parent
root._flush_down()
self.cntl.update_root(root.key)
def _flush_down(self):
"""
For each child, call _flush_down on that child. Then, call _flush on self.
"""
if self.dirty:
for child in self.children:
child._flush_down()
self.cntl.putdata(self.key, self.blob)
@property
def invalid(self):
"""
Convenience property. Returns not self.valid.
"""
return not self.valid
@property
def clean(self):
"""
Convenience property. Returns not self.dirty.
"""
return not self.dirty
def _serialize_data(self):
"""
This method returns the blob's data serialized by cPickle.dumps. Data may be,
for example, a dict mapping filenames to keys (for a directory), or a list of
keys (for a block list), or a string representing a list of bytes (for a block).
"""
raise NotImplementedError()
def _deserialize_data(self, data):
"""
The parameter to this function is an object that was previously output by a call
to serializable_data on a blob of this blob's type. It will use the data to
initialize this blob.
"""
raise NotImplementedError()
@validate
def _update_hash_and_blob(self):
"""
After this method is called, self._blob will contain the serialized representation
of this blob's data, and self._key will contain the hash of self._blob.
"""
self._blob = self._serialize_data()
cp = self._blob
self._key = hashlib.sha512(cp).hexdigest()
@property
def key(self):
"""
Returns the hash of this blob -- that is, the key under which this blob should
be stored in the backend. The returned key is guaranteed to be up-to-date according
to the data currently stored. Does not check for whether this blob is valid.
"""
if self.dirty:
self._update_hash_and_blob()
return self._key
@property
def blob(self):
"""
Returns the serialized data associated with this blob. This is the data that should
be stored in the backend. The serialized data is based on the current locally stored
data -- this method does not check for whether the blob is valid.
"""
if self._blob == None:
self._update_hash_and_blob()
return self._blob
def getdirty(self):
"""
Getter for dirty property -- tells you whether the blob has uncommitted changes.
"""
return self._key == None
def setdirty(self, value):
"""
Setter for dirty property -- tells you whether the blob has uncommitted changes.
"""
# we never want to set value to false, calling self.key will do it automatically
assert value == True
if self.parent != None and self.parent.dirty == False:
self.parent.dirty = True
self._key = None
dirty = property(getdirty, setdirty)
@property
def children(self):
"""
Each subclass should implement this method to return a list of Blob objects, each
of which is a child of the current object. DirectoryBlobs should return a list of
subdirectories/files; BlockListBlobs should return a list of BlockBlobs; BlockBlobs
should return an empty list.
"""
raise NotImplementedError()
def __hash__(self):
return id(self)
class DirectoryBlob(Blob):
"""
Represents a directory. Can be used like a dict. Example usage:
>>> dirblob = DirectoryBlob(None, cntl, None, True)
>>> dirblob['usr'] = DirectoryBlob(None, cntl, dirblob, True)
>>> dirblob['usr']['bin'] = DirectoryBlob(None, cntl, dirblob['usr'], True)
>>> dirblob['usr']['bin']['README'] = BlockListBlob(none, cntl, dirblob['usr']['bin'], True)
The file system now looks like this:
usr
bin
README
where usr and bin are directories and README is a file.
Delete items like so:
>>> del dirblob['usr']['bin']['README']
To flush everything, including the children, do:
>>> dirblob.recursiveFlush()
"""
def __init__(self, key, cntl, parent = None, valid = False):
super(DirectoryBlob, self).__init__(key, cntl, parent, valid)
if valid:
self.items = dict()
def _delete_data(self):
self.items = dict()
def locate(self, other):
for k, v in self.items.items():
if v == other:
return k
raise Exception("%s not in %s" % (other, self.items))
@validate
def __getitem__(self, filename):
"""
Returns the blob associated with key
"""
if filename not in self.items:
raise IOError("%s not found" % filename)
return self.items[filename]
@validate
@dirties
def __setitem__(self, filename, blob):
"""
Sets directory or filename key to point to blob value
"""
self.items[filename] = blob
@validate
@dirties
def __delitem__(self, key):
"""
Deletes child from this directory.
"""
# TODO: add garbage collection and everything
del self.items[key]
self.dirty = True
@validate
def keys(self):
"""
Returns the filenames of all files in this directory.
"""
return self.items.keys()
@property
@validate
def children(self):
return self.items.values()
def _serialize_data(self):
data = dict()
for filename, blob in self.items.items():
data[filename] = (blob.__class__, blob.key)
return cPickle.dumps(data)
def _deserialize_data(self, data):
self.items = dict()
for filename, (itemclass, item) in cPickle.loads(str(data)).items():
self.items[filename] = itemclass(item, self.cntl, self)
class BlockListBlob(Blob):
"""
Represents a block list. Can be accessed like a list. For example:
>>> blocks = BlockListBlob(None, cntl, parent, True)
>>> blocks[0][0] = "H"
>>> print chr(blocks[0][0])
H
>>> blocks[0].extend(array("B", "ello, world!"))
>>> print map(chr, blocks[0])
Hello, world!
Each element in this object is a BlockBlob.
"""
def __init__(self, key, cntl, parent, valid = False):
super(BlockListBlob, self).__init__(key, cntl, parent, valid)
if valid:
self.blocks = list()
def _delete_data(self):
self.blocks = list()
def locate(self, other):
return "Block of " + self.parent.locate(self)
@validate
def __getitem__(self, item):
if len(self.blocks) - 1 < item:
# block list is too short; expand
self.dirty = True
self.blocks.extend([
BlockBlob(None, self.cntl, self, True)
for i in range(item - len(self.blocks) + 1)
])
return self.blocks[item]
@validate
@dirties
def __setitem__(self, key, value):
self.blocks[key] = value
@validate
@dirties
def __delitem__(self, key):
# TODO: decrement reference count of deleted object
del self.blocks[key]
self.dirty = True
def _serialize_data(self):
"""
Returns a list of the keys of the blocks in this block list.
"""
data = list()
for block in self.blocks:
data.append(block.key)
return cPickle.dumps(data)
def _deserialize_data(self, data):
"""
Expects a list of keys of blocks in this block list; initializes the
block list with invalid BlockBlob objects.
"""
self.blocks = list()
for key in cPickle.loads(str(data)):
self.blocks.append(BlockBlob(key, self.cntl, self))
@property
@validate
def children(self):
return self.blocks
class BlockBlob(Blob):
"""
Represents a block of data. Can be treated like an array. The underlying representation
used is array.array. Example usage:
>>> block = BlockBlob(None, cntl, parent, True)
>>> print block.data_as_string()
>>> block.extend(array("B", "Good bye."))
>>> print block.data_as_string()
Good bye.
"""
def __init__(self, key, cntl, parent, valid = False):
super(BlockBlob, self).__init__(key, cntl, parent, valid)
if self.valid:
self.data = array('B')
def _delete_data(self):
self.data = array('B')
@validate
def __getitem__(self, index):
if len(self.data) - 1 < index:
raise IndexError("index out of range")
return self.data[index]
@validate
@dirties
def __setitem__(self, index, value):
if len(self.data) - 1 < index:
# block list is too short; expand
self.data.extend([0 for i in range(index - len(self.data) + 1)])
self.data[index] = value
@validate
@dirties
def write(self, index, value):
"""
Batch write operation. Writes value to data starting at index. Overwrites
existing data.
"""
if len(self.data) < index + len(value):
self.data.extend([0 for i in range(index + len(value) - len(self.data))])
if type(value) == array:
self.data[index:(index + len(value))] = value
else:
self.data[index:(index + len(value))] = array("B", value)
@validate
def read(self, index, size):
"""
Returns sub array [index, index+size)
"""
if len(self.data) - 1 < index + size - 1:
raise IndexError("index out of range")
return self.data[index:index + size]
@validate
@dirties
def append(self, value):
"""
Appends value to this block's data. Value must be an integer.
"""
# TODO: test against PAGE_SIZE
self.data.append(value)
@validate
@dirties
def extend(self, values):
"""
Appends each of a list of values to this block's data. The values must
be integers.
"""
# TODO: test against PAGE_SIZE
self.data.extend(values)
def _serialize_data(self):
return cPickle.dumps(self.data.tostring())
def _deserialize_data(self, data):
self.data = array("B")
self.data.fromstring(cPickle.loads(str(data)))
@validate
def data_as_string(self):
"""
Returns the data in this block as a string
"""
return "".join(map(chr, self.data))
@validate
def size(self):
return len(self.data)
@property
def children(self):
"""
Block's don't have children, so return an empty list.
"""
return list()