-
Notifications
You must be signed in to change notification settings - Fork 71
/
pst.py
2252 lines (1746 loc) · 106 KB
/
pst.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#! /usr/bin/env python
#
# Copyright (c) 2014, Dionach Ltd. All rights reserved. See LICENSE file.
#
# By BB
# based on MS-PST Microsoft specification for PST file format [MS-PST].pdf v2.1
#
import struct, datetime, math, os, sys, unicodedata, re, argparse, itertools, string
import progressbar
class PSTException(Exception):
pass
error_log_list = []
if sys.hexversion >= 0x03000000:
def to_byte(x):
return x
def is_int(x):
return isinstance(x, int)
else:
to_byte = ord
def is_int(x):
return isinstance(x, (int, long))
##############################################################################################################################
# _ _ _ ____ _ _ ___ _ ____ ______ _
# | \ | | ___ __| | ___ | _ \ __ _| |_ __ _| |__ __ _ ___ ___ / / \ | | _ \| __ ) \ | | __ _ _ _ ___ _ __
# | \| |/ _ \ / _` |/ _ \ | | | |/ _` | __/ _` | '_ \ / _` / __|/ _ \ | || \| | | | | _ \| | | | / _` | | | |/ _ \ '__|
# | |\ | (_) | (_| | __/ | |_| | (_| | || (_| | |_) | (_| \__ \ __/ | || |\ | |_| | |_) | | | |__| (_| | |_| | __/ |
# |_| \_|\___/ \__,_|\___| |____/ \__,_|\__\__,_|_.__/ \__,_|___/\___| | ||_| \_|____/|____/| | |_____\__,_|\__, |\___|_|
# \_\ /_/ |___/
##############################################################################################################################
class NID:
NID_TYPE_HID = 0x00
NID_TYPE_INTERNAL = 0x01
NID_TYPE_NORMAL_FOLDER = 0x02
NID_TYPE_SEARCH_FOLDER = 0x03
NID_TYPE_NORMAL_MESSAGE = 0x04
NID_TYPE_ATTACHMENT = 0x05
NID_TYPE_SEARCH_UPDATE_QUEUE = 0x06
NID_TYPE_SEARCH_CRITERIA_OBJECT = 0x07
NID_TYPE_ASSOC_MESSAGE = 0x08
NID_TYPE_CONTENTS_TABLE_INDEX = 0x0a
NID_TYPE_RECEIVE_FOLDER_TABLE = 0x0b
NID_TYPE_OUTGOING_QUEUE_TABLE = 0x0c
NID_TYPE_HIERARCHY_TABLE = 0x0d
NID_TYPE_CONTENTS_TABLE = 0x0e
NID_TYPE_ASSOC_CONTENTS_TABLE = 0x0f
NID_TYPE_SEARCH_CONTENTS_TABLE = 0x10
NID_TYPE_ATTACHMENT_TABLE = 0x11
NID_TYPE_RECIPIENT_TABLE = 0x12
NID_TYPE_SEARCH_TABLE_INDEX = 0x13
NID_TYPE_LTP = 0x1f
NID_MESSAGE_STORE = 0x21
NID_NAME_TO_ID_MAP = 0x61
NID_NORMAL_FOLDER_TEMPLATE = 0xA1
NID_SEARCH_FOLDER_TEMPLATE = 0xC1
NID_ROOT_FOLDER = 0x122
NID_SEARCH_MANAGEMENT_QUEUE = 0x1E1
NID_SEARCH_ACTIVITY_LIST = 0x201
NID_RESERVED1 = 0x241
NID_SEARCH_DOMAIN_OBJECT = 0x261
NID_SEARCH_GATHERER_QUEUE = 0x281
NID_SEARCH_GATHERER_DESCRIPTOR = 0x2A1
NID_RESERVED2 = 0x2E1
NID_RESERVED3 = 0x301
NID_SEARCH_GATHERER_FOLDER_QUEUE = 0x321
def __init__(self, bytes_or_nid):
if is_int(bytes_or_nid):
self.nid = bytes_or_nid
else:
self.nid = struct.unpack('I', bytes_or_nid)[0]
self.nidType = self.nid & 0x1f
self.nidIndex = self.nid & 0xffffffe0
self.is_hid = False
self.is_nid = True
def __repr__(self):
return 'nid: %s, %s' % (hex(self.nid), hex(self.nidType))
class BID:
def __init__(self, bytes):
if len(bytes) == 4: # ansi
self.bid = struct.unpack('I', bytes)[0]
else: #unicode (8)
self.bid = struct.unpack('Q', bytes)[0]
if self.bid % 2 == 1: # A
self.bid -= 1
self.is_internal = (self.bid & 2 == 2) # B
def __repr__(self):
if self.is_internal:
int_ext = 'I'
else:
int_ext = 'E'
return 'bid: %s %s' % (self.bid, int_ext)
class BREF:
def __init__(self, bytes):
if len(bytes) == 8: # ansi
self.bid, self.ib = struct.unpack('4sI', bytes)
else: #unicode (16)
self.bid, self.ib = struct.unpack('8sQ', bytes)
self.bid = BID(self.bid)
def __repr__(self):
return '%s, ib: %s' % (self.bid, hex(self.ib))
class Page:
PAGE_SIZE = 512
ptypeBBT = 0x80
ptypeNBT = 0x81
ptypeFMap = 0x82
ptypePMap = 0x83
ptypeAMap = 0x84
ptypeFPMap = 0x85
ptypeDL = 0x86
def __init__(self, bytes, is_ansi):
# fixed 512 bytes
if len(bytes) != Page.PAGE_SIZE:
raise PSTException('Invalid Page size')
if is_ansi:
self.ptype, self.ptypeRepeat, self.wSig, self.bid, self.dwCRC = struct.unpack('BBHII', bytes[-12:])
else: # unicode
self.ptype, self.ptypeRepeat, self.wSig, self.dwCRC, self.bid = struct.unpack('BBHIQ', bytes[-16:])
if self.ptype < Page.ptypeBBT or self.ptype > Page.ptypeDL:
raise PSTException('Invalid Page Type %s ' % hex(self.ptype))
if self.ptype != self.ptypeRepeat:
raise PSTException('Page Type does not match Page Type Repeat %s!=%s ' % (hex(self.ptype), hex(self.ptypeRepeat)))
if self.ptype in (Page.ptypeBBT, Page.ptypeNBT):
if is_ansi:
self.cEnt, self.cEntMax, self.cbEnt, self.cLevel = struct.unpack('BBBB', bytes[-16:-12])
# rgEntries 492 (cLevel>0) or 496 bytes (cLevel=0)
entry_size = 12
else: # unicode
self.cEnt, self.cEntMax, self.cbEnt, self.cLevel = struct.unpack('BBBB', bytes[-24:-20])
# rgEntries 488 bytes
entry_size = 24
if self.cLevel == 0:
if self.ptype == Page.ptypeBBT:
entry_type = BBTENTRY
else: # ptypeNBT
entry_type = NBTENTRY
entry_size = entry_size + entry_size//3
else: # BTENTRY
entry_type = BTENTRY
self.rgEntries = []
for i in range(self.cEnt): # self.cbEnt is size of each entry which may be different to entry_size
self.rgEntries.append(entry_type(bytes[i*self.cbEnt:i*self.cbEnt+entry_size]))
def __repr__(self):
return 'PageType: %s, Entries: %s, Level: %s' % (hex(self.ptype), self.cEnt, self.cLevel)
class BTENTRY:
def __init__(self, bytes):
if len(bytes) == 12: # ansi
self.btkey = struct.unpack('I',bytes[:4])[0]
self.BREF = BREF(bytes[4:])
else: # unicode 24
self.btkey = struct.unpack('Q',bytes[:8])[0]
self.BREF = BREF(bytes[8:])
def __repr__(self):
return '%s' % (self.BREF)
class BBTENTRY:
def __init__(self, bytes):
if len(bytes) == 12: #ansi
self.BREF = BREF(bytes[:8])
self.cb, self.cRef = struct.unpack('HH',bytes[8:12])
else: # unicode (24)
self.BREF = BREF(bytes[:16])
self.cb, self.cRef = struct.unpack('HH',bytes[16:20])
self.key = self.BREF.bid.bid
def __repr__(self):
return '%s, data size: %s' % (self.BREF, self.cb)
class NBTENTRY:
def __init__(self, bytes):
if len(bytes) == 16: #ansi
self.nid, self.bidData, self.bidSub, self.nidParent = struct.unpack('4s4s4s4s',bytes)
else: # unicode (32)
self.nid, padding, self.bidData, self.bidSub, self.nidParent = struct.unpack('4s4s8s8s4s',bytes[:-4])
self.nid = NID(self.nid)
self.bidData = BID(self.bidData)
self.bidSub = BID(self.bidSub)
self.nidParent = NID(self.nidParent)
self.key = self.nid.nid
def __repr__(self):
return '%s, bidData: %s, bidSub: %s' % (self.nid, self.bidData, self.bidSub)
class SLENTRY:
def __init__(self, bytes):
if len(bytes) == 12: #ansi
self.nid, self.bidData, self.bidSub = struct.unpack('4s4s4s',bytes)
else: # unicode 24
self.nid, padding, self.bidData, self.bidSub = struct.unpack('4s4s8s8s',bytes)
self.nid = NID(self.nid)
self.bidData = BID(self.bidData)
self.bidSub = BID(self.bidSub)
def __repr__(self):
return '%s %s sub%s' % (self.nid, self.bidData, self.bidSub)
class SIENTRY:
def __init__(self, bytes):
if len(bytes) == 8: #ansi
self.nid, self.bid = struct.unpack('4s4s',bytes)
else: # unicode 16
self.nid, padding, self.bid = struct.unpack('4s4s8s',bytes)
self.nid = NID(self.nid)
self.bid = BID(self.bid)
class Block:
# this has the first 512 entries removed, as decoding only uses from 512 onwards
mpbbCryptFrom512 = (71, 241, 180, 230, 11, 106, 114, 72, 133, 78, 158, 235, 226, 248, 148, 83, 224, 187, 160, 2, 232, 90, 9, 171, 219, 227, 186, 198, 124, 195, 16, 221,
57, 5, 150, 48, 245, 55, 96, 130, 140, 201, 19, 74, 107, 29, 243, 251, 143, 38, 151, 202, 145, 23, 1, 196, 50, 45, 110, 49, 149, 255, 217, 35,
209, 0, 94, 121, 220, 68, 59, 26, 40, 197, 97, 87, 32, 144, 61, 131, 185, 67, 190, 103, 210, 70, 66, 118, 192, 109, 91, 126, 178, 15, 22, 41,
60, 169, 3, 84, 13, 218, 93, 223, 246, 183, 199, 98, 205, 141, 6, 211, 105, 92, 134, 214, 20, 247, 165, 102, 117, 172, 177, 233, 69, 33, 112, 12,
135, 159, 116, 164, 34, 76, 111, 191, 31, 86, 170, 46, 179, 120, 51, 80, 176, 163, 146, 188, 207, 25, 28, 167, 99, 203, 30, 77, 62, 75, 27, 155,
79, 231, 240, 238, 173, 58, 181, 89, 4, 234, 64, 85, 37, 81, 229, 122, 137, 56, 104, 82, 123, 252, 39, 174, 215, 189, 250, 7, 244, 204, 142, 95,
239, 53, 156, 132, 43, 21, 213, 119, 52, 73, 182, 18, 10, 127, 113, 136, 253, 157, 24, 65, 125, 147, 216, 88, 44, 206, 254, 36, 175, 222, 184, 54,
200, 161, 128, 166, 153, 152, 168, 47, 14, 129, 101, 115, 228, 194, 162, 138, 212, 225, 17, 208, 8, 139, 42, 242, 237, 154, 100, 63, 193, 108, 249, 236)
if sys.hexversion >= 0x03000000:
decrypt_table = bytes.maketrans(bytearray(range(256)), bytearray(mpbbCryptFrom512))
else:
decrypt_table = string.maketrans(b''.join(map(chr, range(256))), b''.join(map(chr, mpbbCryptFrom512)))
btypeData = 0
btypeXBLOCK = 1
btypeXXBLOCK = 2
btypeSLBLOCK = 3
btypeSIBLOCK = 4
def __init__(self, bytes, offset, data_size, is_ansi, bid_check, bCryptMethod):
self.is_ansi = is_ansi
self.offset = offset # for debugging
if self.is_ansi: # 12
self.cb, self.wSig, self.bid, self.dwCRC = struct.unpack('HH4sI',bytes[-12:])
bid_size = 4
slentry_size = 12
sientry_size = 8
sl_si_entries_offset = 4 # [MS-PST] WRONG for SLBLOCK and SIBLOCK for ANSI: there is no 4 byte padding
else: # unicode 16
self.cb, self.wSig, self.dwCRC, self.bid = struct.unpack('HHI8s',bytes[-16:])
bid_size = 8
slentry_size = 24
sientry_size = 16
sl_si_entries_offset = 8
self.bid = BID(self.bid)
if self.bid.bid != bid_check.bid:
raise PSTException('Block bid %s != ref bid %s' % (self.bid, bid_check))
if data_size != self.cb:
raise PSTException('BBT Entry data size %s != Block data size %s' % (data_size, self.cb) )
if not self.bid.is_internal:
self.block_type = Block.btypeData
self.btype = 0
self.cLevel = 0
if bCryptMethod == 1: #NDB_CRYPT_PERMUTE
self.data = bytes[:data_size].translate(Block.decrypt_table)
else: # no data encoding
self.data = bytes[:data_size] # data block
else: # XBLOCK, XXBLOCK, SLBLOCK or SIBLOCK
self.btype, self.cLevel, self.cEnt = struct.unpack('BBH',bytes[:4])
if self.btype == 1: #XBLOCK, XXBLOCK
self.lcbTotal = struct.unpack('I',bytes[4:8])[0]
if self.cLevel == 1: #XBLOCK
self.block_type = Block.btypeXBLOCK
elif self.cLevel == 2: #XXBLOCK
self.block_type = Block.btypeXXBLOCK
else:
raise PSTException('Invalid Block Level %s' % self.cLevel)
self.rgbid = []
for i in range(self.cEnt):
self.rgbid.append(BID(bytes[8+i*bid_size:8+(i+1)*bid_size]))
elif self.btype == 2: # SLBLOCK, SIBLOCK
self.rgentries = []
if self.cLevel == 0: #SLBLOCK
self.block_type = Block.btypeSLBLOCK
for i in range(self.cEnt):
self.rgentries.append(SLENTRY(bytes[sl_si_entries_offset + i*slentry_size:sl_si_entries_offset + (i+1)*slentry_size]))
elif self.cLevel ==1: #SIBLOCK
self.block_type = Block.btypeSIBLOCK
for i in range(self.cEnt):
self.rgentries.append(SIENTRY(bytes[sl_si_entries_offset + i*sientry_size:sl_si_entries_offset + (i+1)*sientry_size]))
else:
raise PSTException('Invalid Block Level %s' % self.cLevel)
else:
raise PSTException('Invalid Block Type %s' % self.btype)
def __repr__(self):
return 'Block %s %s %s' % (self.bid, self.btype, self.cLevel)
class NBD:
"""Node Database Layer"""
def __init__(self, fd, header):
self.fd = fd
self.header = header
self.nbt_entries = self.get_page_leaf_entries(NBTENTRY, self.header.root.BREFNBT.ib)
self.bbt_entries = self.get_page_leaf_entries(BBTENTRY, self.header.root.BREFBBT.ib)
def fetch_page(self, offset):
self.fd.seek(offset)
return Page(self.fd.read(Page.PAGE_SIZE), self.header.is_ansi)
def fetch_block(self, bid):
try:
bbt_entry = self.bbt_entries[bid.bid]
except KeyError:
raise PSTException('Invalid BBTEntry: %s' % bid)
offset = bbt_entry.BREF.ib
data_size = bbt_entry.cb
if self.header.is_ansi:
block_trailer_size = 12
else: # unicode
block_trailer_size = 16
# block size must align on 64 bytes
size_diff = (data_size + block_trailer_size) % 64
if size_diff == 0:
block_size = data_size + block_trailer_size
else:
block_size = data_size + block_trailer_size + 64 - size_diff
self.fd.seek(offset)
return Block(self.fd.read(block_size), offset, data_size, self.header.is_ansi, bid, self.header.bCryptMethod)
def fetch_all_block_data(self, bid):
"""returns list of block datas"""
datas = []
block = self.fetch_block(bid)
if block.block_type == Block.btypeData:
datas.append(block.data)
elif block.block_type == Block.btypeXBLOCK:
for xbid in block.rgbid:
xblock = self.fetch_block(xbid)
if xblock.block_type != Block.btypeData:
raise PSTException('Expecting data block, got block type %s' % xblock.block_type)
datas.append(xblock.data)
elif block.block_type == Block.btypeXXBLOCK:
for xxbid in block.rgbid:
xxblock = self.fetch_block(xxbid)
if xxblock.block_type != Block.btypeXBLOCK:
raise PSTException('Expecting XBLOCK, got block type %s' % xxblock.block_type)
datas.extend(self.fetch_all_block_data(xxbid))
else:
raise PSTException('Invalid block type (not data/XBLOCK/XXBLOCK), got %s' % block.block_type)
return datas
def fetch_subnodes(self, bid):
""" get dictionary of subnode SLENTRYs for subnode bid"""
subnodes = {}
block = self.fetch_block(bid)
if block.block_type == Block.btypeSLBLOCK:
for slentry in block.rgentries:
if slentry.nid in subnodes.keys():
raise PSTException('Duplicate subnode %s' % slentry.nid)
subnodes[slentry.nid.nid] = slentry
elif block.block_type == Block.btypeSIBLOCK:
for sientry in block.rgentries:
subnodes.update(self.fetch_subnodes(sientry.bid))
else:
raise PSTException('Invalid block type (not SLBLOCK/SIBLOCK), got %s' % block.block_type)
return subnodes
def get_page_leaf_entries(self, entry_type, page_offset):
""" entry type is NBTENTRY or BBTENTRY"""
leaf_entries = {}
page = self.fetch_page(page_offset)
for entry in page.rgEntries:
if isinstance(entry, entry_type):
if entry.key in leaf_entries.keys():
raise PSTException('Invalid Leaf Key %s' % entry)
leaf_entries[entry.key] = entry
elif isinstance(entry, BTENTRY):
leaf_entries.update(self.get_page_leaf_entries(entry_type, entry.BREF.ib))
else:
raise PSTException('Invalid Entry Type')
return leaf_entries
################################################################################################################################################################################
# _ _ _ _____ _ _ _ ____ _ _ ___ _____ ______ _
# | | (_)___| |_ ___ |_ _|_ _| |__ | | ___ ___ __ _ _ __ __| | | _ \ _ __ ___ _ __ ___ _ __| |_(_) ___ ___ / / | |_ _| _ \ \ | | __ _ _ _ ___ _ __
# | | | / __| __/ __| | |/ _` | '_ \| |/ _ \/ __| / _` | '_ \ / _` | | |_) | '__/ _ \| '_ \ / _ \ '__| __| |/ _ \/ __| | || | | | | |_) | | | | / _` | | | |/ _ \ '__|
# | |___| \__ \ |_\__ \_ | | (_| | |_) | | __/\__ \_ | (_| | | | | (_| | | __/| | | (_) | |_) | __/ | | |_| | __/\__ \ | || |___| | | __/| | | |__| (_| | |_| | __/ |
# |_____|_|___/\__|___( ) |_|\__,_|_.__/|_|\___||___( ) \__,_|_| |_|\__,_| |_| |_| \___/| .__/ \___|_| \__|_|\___||___/ | ||_____|_| |_| | | |_____\__,_|\__, |\___|_|
# |/ |/ |_| \_\ /_/ |___/
################################################################################################################################################################################
class HID:
def __init__(self, bytes):
# hidIndex cannot be zero, first 5 bits must be zero (hidType)
self.hidIndex, self.hidBlockIndex = struct.unpack('HH', bytes)
self.hidType = self.hidIndex & 0x1F
self.hidIndex = (self.hidIndex >> 5) & 0x7FF
self.is_hid = True
self.is_nid = False
class HNPAGEMAP:
def __init__(self, bytes):
self.cAlloc, self.cFree = struct.unpack('HH', bytes[:4])
self.rgibAlloc = []
for i in range(self.cAlloc+1): # cAlloc+1 is next free
self.rgibAlloc.append(struct.unpack('H', bytes[4+i*2:4+(i+1)*2])[0])
class HN:
bTypeTC = 0x7C
bTypeBTH = 0xB5
bTypePC = 0xBC
def __init__(self, nbt_entry, ltp, datas):
"""datas = list of data sections from blocks"""
self.nbt_entry = nbt_entry
self.datas = datas
self.ltp = ltp
self.hnpagemaps = []
for i in range(len(datas)):
bytes = datas[i]
if i == 0: # HNHDR
ibHnpm, self.bSig, self.bClientSig, self.hidUserRoot, self.rgbFillLevel = struct.unpack('HBB4sI', bytes[:12])
self.hidUserRoot = HID(self.hidUserRoot)
if self.bSig != 0xEC:
raise PSTException('Invalid HN Signature %s' % self.bSig)
else: # HNPAGEHDR or HNBITMAPHDR
ibHnpm = struct.unpack('H', bytes[:2])[0]
self.hnpagemaps.append(HNPAGEMAP(bytes[ibHnpm:]))
# subnode SLENTRYs
self.subnodes = None
if self.nbt_entry.bidSub.bid != 0:
self.subnodes = self.ltp.nbd.fetch_subnodes(self.nbt_entry.bidSub)
def get_hid_data(self, hid):
start_offset = self.hnpagemaps[hid.hidBlockIndex].rgibAlloc[hid.hidIndex-1]
end_offset = self.hnpagemaps[hid.hidBlockIndex].rgibAlloc[hid.hidIndex]
return self.datas[hid.hidBlockIndex][start_offset:end_offset]
def __repr__(self):
return 'HN: %s, Blocks: %s' % (self.nbt_entry, len(self.datas))
class BTHData:
def __init__(self, key, data):
self.key = key
self.data = data
class BTHIntermediate:
def __init__(self, key, hidNextLevel, bIdxLevel):
self.key = key
self.hidNextLevel = hidNextLevel
self.bIdxLevel = bIdxLevel
class BTH:
def __init__(self, hn, bth_hid):
""" hn = HN heapnode, bth_hid is hid of BTH header"""
#BTHHEADER
bth_header_bytes = hn.get_hid_data(bth_hid)
self.bType, self.cbKey, self.cbEnt, self.bIdxLevels, self.hidRoot = struct.unpack('BBBB4s', bth_header_bytes)
self.hidRoot = HID(self.hidRoot)
if self.bType != HN.bTypeBTH:
raise PSTException('Invalid BTH Type %s' % self.bType)
self.bth_datas = []
bth_working_stack = []
if self.hidRoot != 0:
bytes = hn.get_hid_data(self.hidRoot)
bth_record_list = self.get_bth_records(bytes, self.bIdxLevels)
if self.bIdxLevels == 0: # no intermediate levels
self.bth_datas = bth_record_list
else:
bth_working_stack = bth_record_list
while bth_working_stack:
bth_intermediate = bth_working_stack.pop()
bytes = hn.get_hid_data(bth_intermediate.hidNextLevel)
bth_record_list = self.get_bth_records(bytes, bth_intermediate.bIdxLevel - 1)
if bth_intermediate.bIdxLevel - 1 == 0: # leafs
self.bth_datas.extend(bth_record_list)
else:
bth_working_stack.extend(bth_record_list)
def get_bth_records(self, bytes, bIdxLevel):
bth_record_list = []
if bIdxLevel == 0: # leaf
record_size = self.cbKey + self.cbEnt
records = len(bytes) // record_size
for i in range(records):
key, data = struct.unpack('%ss%ss' % (self.cbKey, self.cbEnt) , bytes[i*record_size:(i+1)*record_size])
bth_record_list.append(BTHData(key, data))
else: # intermediate
record_size = self.cbKey + 4
records = len(bytes) // record_size
for i in range(records):
key, hidNextLevel = struct.unpack('%ss4s' % self.cbKey , bytes[i*record_size:(i+1)*record_size])
hidNextLevel = HID(hidNextLevel)
bth_record_list.append(BTHIntermediate(key, hidNextLevel, bIdxLevel))
return bth_record_list
class PCBTHData:
def __init__(self, bth_data, hn):
self.wPropId = struct.unpack('H', bth_data.key)[0]
self.wPropType, self.dwValueHnid = struct.unpack('H4s', bth_data.data)
ptype = hn.ltp.ptypes[self.wPropType]
if not ptype.is_variable and not ptype.is_multi:
if ptype.byte_count <= 4:
self.value = ptype.value(self.dwValueHnid[:ptype.byte_count])
else:
self.hid = HID(self.dwValueHnid)
self.value = ptype.value(hn.get_hid_data(self.hid))
else:
if NID(self.dwValueHnid).nidType == NID.NID_TYPE_HID:
self.hid = HID(self.dwValueHnid)
self.value = ptype.value(hn.get_hid_data(self.hid))
else:
self.subnode_nid = NID(self.dwValueHnid)
if self.subnode_nid.nid in hn.subnodes.keys():
subnode_nid_bid = hn.subnodes[self.subnode_nid.nid].bidData
else:
raise PSTException('Invalid NID subnode reference %s' % self.subnode_nid)
datas = hn.ltp.nbd.fetch_all_block_data(subnode_nid_bid)
self.value = ptype.value(b''.join(datas))
def __repr__(self):
return '%s (%s) = %s' % (hex(self.wPropId), hex(self.wPropType), repr(self.value))
class PTypeEnum:
PtypInteger16 = 0x02
PtypInteger32 = 0x03
PtypFloating32 = 0x04
PtypFloating64 = 0x05
PtypCurrency = 0x06
PtypFloatingTime = 0x07
PtypErrorCode = 0x0A
PtypBoolean = 0x0B
PtypInteger64 = 0x14
PtypString = 0x1F
PtypString8 = 0x1E
PtypTime = 0x40
PtypGuid = 0x48
PtypServerId = 0xFB
PtypRestriction = 0xFD
PtypRuleAction = 0xFE
PtypBinary = 0x102
PtypMultipleInteger16 = 0x1002
PtypMultipleInteger32 = 0x1003
PtypMultipleFloating32 = 0x1004
PtypMultipleFloating64 = 0x1005
PtypMultipleCurrency = 0x1006
PtypMultipleFloatingTime = 0x1007
PtypMultipleInteger64 = 0x1014
PtypMultipleString = 0x101F
PtypMultipleString8 = 0x101E
PtypMultipleTime = 0x1040
PtypMultipleGuid = 0x1048
PtypMultipleBinary = 0x1102
PtypUnspecified = 0x0
PtypNull = 0x01
PtypObject = 0x0D
class PType:
def __init__(self, ptype, byte_count, is_variable, is_multi):
self.ptype, self.byte_count, self.is_variable, self.is_multi = ptype, byte_count, is_variable, is_multi
def value(self, bytes):
if self.ptype == PTypeEnum.PtypInteger16:
return struct.unpack('h', bytes)[0]
elif self.ptype == PTypeEnum.PtypInteger32:
return struct.unpack('i', bytes)[0]
elif self.ptype == PTypeEnum.PtypFloating32:
return struct.unpack('f', bytes)[0]
elif self.ptype == PTypeEnum.PtypFloating64:
return struct.unpack('d', bytes)[0]
elif self.ptype == PTypeEnum.PtypCurrency:
raise PSTException('PtypCurrency value not implemented')
elif self.ptype == PTypeEnum.PtypFloatingTime:
return self.get_floating_time(bytes)
elif self.ptype == PTypeEnum.PtypErrorCode:
return struct.unpack('I', bytes)[0]
elif self.ptype == PTypeEnum.PtypBoolean:
return (struct.unpack('B', bytes)[0] != 0)
elif self.ptype == PTypeEnum.PtypInteger64:
return struct.unpack('q', bytes)[0]
elif self.ptype == PTypeEnum.PtypString:
return bytes.decode('utf-16-le') # unicode
elif self.ptype == PTypeEnum.PtypString8:
return bytes
elif self.ptype == PTypeEnum.PtypTime:
return self.get_time(bytes)
elif self.ptype == PTypeEnum.PtypGuid:
return bytes
elif self.ptype == PTypeEnum.PtypServerId:
raise PSTException('PtypServerId value not implemented')
elif self.ptype == PTypeEnum.PtypRestriction:
raise PSTException('PtypRestriction value not implemented')
elif self.ptype == PTypeEnum.PtypRuleAction:
raise PSTException('PtypRuleAction value not implemented')
elif self.ptype == PTypeEnum.PtypBinary:
#count = struct.unpack('H', bytes[:2])[0]
return bytes
elif self.ptype == PTypeEnum.PtypMultipleInteger16:
count = len(bytes) // 2
return [struct.unpack('h', bytes[i*2:(i+1)*2])[0] for i in range(count)]
elif self.ptype == PTypeEnum.PtypMultipleInteger32:
count = len(bytes) // 4
return [struct.unpack('i', bytes[i*4:(i+1)*4])[0] for i in range(count)]
elif self.ptype == PTypeEnum.PtypMultipleFloating32:
count = len(bytes) // 4
return [struct.unpack('f', bytes[i*4:(i+1)*4])[0] for i in range(count)]
elif self.ptype == PTypeEnum.PtypMultipleFloating64:
ccount = len(bytes) // 8
return [struct.unpack('d', bytes[i*8:(i+1)*8])[0] for i in range(count)]
elif self.ptype == PTypeEnum.PtypMultipleCurrency:
raise PSTException('PtypMultipleCurrency value not implemented')
elif self.ptype == PTypeEnum.PtypMultipleFloatingTime:
count = len(bytes) // 8
return [self.get_floating_time(bytes[i*8:(i+1)*8]) for i in range(count)]
elif self.ptype == PTypeEnum.PtypMultipleInteger64:
count = len(bytes) // 8
return [struct.unpack('q', bytes[i*8:(i+1)*8])[0] for i in range(count)]
elif self.ptype == PTypeEnum.PtypMultipleString:
ulCount, rgulDataOffsets = self.get_multi_value_offsets(bytes)
s = []
for i in range(ulCount):
s.append(bytes[rgulDataOffsets[i]:rgulDataOffsets[i+1]].decode('utf-16-le'))
return s
elif self.ptype == PTypeEnum.PtypMultipleString8:
ulCount, rgulDataOffsets = self.get_multi_value_offsets(bytes)
datas = []
for i in range(ulCount):
datas.append(bytes[rgulDataOffsets[i]:rgulDataOffsets[i+1]])
return datas
elif self.ptype == PTypeEnum.PtypMultipleTime:
count = len(bytes) // 8
return [self.get_time(bytes[i*8:(i+1)*8]) for i in range(count)]
elif self.ptype == PTypeEnum.PtypMultipleGuid:
count = len(bytes) // 16
return [bytes[i*16:(i+1)*16] for i in range(count)]
elif self.ptype == PTypeEnum.PtypMultipleBinary:
ulCount, rgulDataOffsets = self.get_multi_value_offsets(bytes)
datas = []
for i in range(ulCount):
datas.append(bytes[rgulDataOffsets[i]:rgulDataOffsets[i+1]])
return datas
elif self.ptype == PTypeEnum.PtypUnspecified:
return bytes
elif self.ptype == PTypeEnum.PtypNull:
return None
elif self.ptype == PTypeEnum.PtypObject:
return bytes[:4]
else:
raise PSTException('Invalid PTypeEnum for value %s ' % self.ptype)
def get_floating_time(self, bytes):
return datetime.datetime(year=1899, month=12, day=30) + datetime.timedelta(days=struct.unpack('d', bytes)[0])
def get_time(self, bytes):
return datetime.datetime(year=1601, month=1, day=1) + datetime.timedelta(microseconds = struct.unpack('q', bytes)[0]/10.0)
def get_multi_value_offsets(self, bytes):
ulCount = struct.unpack('I', bytes[:4])[0]
rgulDataOffsets = [struct.unpack('I', bytes[(i+1)*4:(i+2)*4])[0] for i in range(ulCount)]
rgulDataOffsets.append(len(bytes))
return ulCount, rgulDataOffsets
class PropIdEnum:
PidTagNameidBucketCount = 0x0001
PidTagNameidStreamGuid = 0x0002
PidTagNameidStreamEntry = 0x0003
PidTagNameidStreamString = 0x0004
PidTagNameidBucketBase = 0x1000
PidTagItemTemporaryFlags = 0x1097
PidTagPstBestBodyProptag = 0x661D
PidTagPstIpmsubTreeDescendant = 0x6705
PidTagPstSubTreeContainer = 0x6772
PidTagLtpParentNid = 0x67F1
PidTagLtpRowId = 0x67F2
PidTagLtpRowVer = 0x67F3
PidTagPstPassword = 0x67FF
PidTagMapiFormComposeCommand = 0x682F
PidTagRecordKey = 0x0FF9
PidTagDisplayName = 0x3001
PidTagIpmSubTreeEntryId = 0x35E0
PidTagIpmWastebasketEntryId = 0x35E3
PidTagFinderEntryId = 0x35E7
PidTagContentCount = 0x3602
PidTagContentUnreadCount = 0x3603
PidTagSubfolders = 0x360A
PidTagReplItemid = 0x0E30
PidTagReplChangenum = 0x0E33
PidTagReplVersionHistory = 0x0E34
PidTagReplFlags = 0x0E38
PidTagContainerClass = 0x3613
PidTagPstHiddenCount = 0x6635
PidTagPstHiddenUnread = 0x6636
PidTagImportance = 0x0017
PidTagMessageClassW = 0x001A
PidTagSensitivity = 0x0036
PidTagSubjectW = 0x0037
PidTagClientSubmitTime = 0x0039
PidTagSentRepresentingSearchKey = 0x003B
PidTagSentRepresentingNameW = 0x0042
PidTagMessageToMe = 0x0057
PidTagMessageCcMe = 0x0058
PidTagConversationTopicW = 0x0070
PidTagConversationIndex = 0x0071
PidTagDisplayCcW = 0x0E03
PidTagDisplayToW = 0x0E04
PidTagMessageDeliveryTime = 0x0E06
PidTagMessageFlags = 0x0E07
PidTagMessageSize = 0x0E08
PidTagMessageStatus = 0x0E17
PidTagReplCopiedfromVersionhistory = 0x0E3C
PidTagReplCopiedfromItemid = 0x0E3D
PidTagLastModificationTime = 0x3008
PidTagSmtpAddress = 0x39FE
PidTagSecureSubmitFlags = 0x65C6
PidTagOfflineAddressBookName = 0x6800
PidTagSendOutlookRecallReport = 0x6803
PidTagOfflineAddressBookTruncatedProperties = 0x6805
PidTagMapiFormComposeCommand = 0x682F
PidTagViewDescriptorFlags = 0x7003
PidTagViewDescriptorLinkTo = 0x7004
PidTagViewDescriptorViewFolder = 0x7005
PidTagViewDescriptorName = 0x7006
PidTagViewDescriptorVersion = 0x7007
PidTagCreationTime = 0x3007
PidTagSearchKey = 0x300B
PidTagRecipientType = 0x0c15
PidTagResponsibility = 0x0E0F
PidTagObjectType = 0x0FFE
PidTagEntryID = 0x0FFF
PidTagAddressType = 0x3002
PidTagEmailAddress = 0x3003
PidTagDisplayType = 0x3900
PidTag7BitDisplayName = 0x39FF
PidTagSendRichInfo = 0x3A40
PidTagAttachmentSize = 0x0E20
PidTagAttachFilename = 0x3704
PidTagAttachMethod = 0x3705
PidTagRenderingPosition = 0x370B
PidTagSenderEntryId = 0x0C19
PidTagSenderName = 0x0C1A
PidTagSenderSearchKey = 0x0C1D
PidTagSenderAddressType = 0x0C1E
PidTagRead = 0x0E69
PidTagHasAttachments = 0x0E1B
PidTagBody = 0x1000
PidTagRtfCompressed = 0x1009
PidTagAttachDataBinary = 0x3701
PidTagAttachDataObject = 0x3701
PidTagOriginalDisplayTo = 0x0074
PidTagTransportMessageHeaders = 0x007D
PidTagSenderSmtpAddress = 0x5D01
PidTagSentRepresentingSmtpAddress = 0x5D02
PidTagReceivedBySmtpAddress = 0x5D07
PidTagReceivedRepresentingSmtpAddress = 0x5D08
PidTagAttachMimeTag = 0x370E
PidTagAttachExtension = 0x3703
PidTagAttachLongFilename = 0x3707
class PC: # Property Context
def __init__(self, hn):
self.hn = hn
if hn.bClientSig != HN.bTypePC:
raise PSTException('Invalid HN bClientSig, not bTypePC, is %s' % hn.bClientSig)
self.bth = BTH(hn, hn.hidUserRoot)
if self.bth.cbKey != 2:
raise PSTException('Invalid PC BTH key size: %s' % self.bth.cbKey)
if self.bth.cbEnt != 6:
raise PSTException('Invalid PC BTH data size: %s' % self.bth.cbEnt)
self.props = {}
for bth_data in self.bth.bth_datas:
pc_prop = PCBTHData(bth_data, hn)
if pc_prop.wPropId in (PropIdEnum.PidTagFinderEntryId, PropIdEnum.PidTagIpmSubTreeEntryId, PropIdEnum.PidTagIpmWastebasketEntryId, PropIdEnum.PidTagEntryID):
pc_prop.value = EntryID(pc_prop.value)
self.props[pc_prop.wPropId] = pc_prop
def getval(self, propid):
if propid in self.props.keys():
return self.props[propid].value
else:
return None
def __repr__(self):
s = 'PC %s\n' % self.hn
s += '\n'.join(['Property %s' % self.props[wPropId] for wPropId in sorted(self.props.keys())])
return s
class TCOLDESC:
def __init__(self, bytes):
#self.tag is 4 byte (self.wPropId, self.wPropType): where is documentation MS?
self.wPropType, self.wPropId, self.ibData, self.cbData, self.iBit = struct.unpack('HHHBB', bytes)
def __repr__(self):
return 'Tag: %s/%s, Offset+Size: %s+%s' % (hex(self.wPropId), hex(self.wPropType), self.ibData, self.cbData)
class TCROWID:
def __init__(self, bth_data):
self.dwRowID = struct.unpack('I', bth_data.key)[0] # dwRowID
self.nid = NID(bth_data.key) # for hierarchy TCs
if len(bth_data.data) == 2: # ansi
self.dwRowIndex = struct.unpack('H', bth_data.data)[0]
else: # unicode (4)
self.dwRowIndex = struct.unpack('I', bth_data.data)[0]
class TC: # Table Context
TCI_4b = 0
TCI_2b = 1
TCI_1b = 2
TCI_bm = 3
def __init__(self, hn):
self.hn = hn
if hn.bClientSig != HN.bTypeTC:
raise PSTException('Invalid HN bClientSig, not bTypeTC, is %s' % hn.bClientSig)
tcinfo_bytes = hn.get_hid_data(hn.hidUserRoot)
self.bType, self.cCols = struct.unpack('BB', tcinfo_bytes[:2])
if self.bType != HN.bTypeTC:
raise PSTException('Invalid TCINFO bType, not bTypeTC, is %s' % self.bType)
self.rgib = struct.unpack('HHHH', tcinfo_bytes[2:10])
self.hidRowIndex, self.hnidRows, self.hidIndex = struct.unpack('4s4s4s', tcinfo_bytes[10:22])
self.hidRowIndex = HID(self.hidRowIndex)
if NID(self.hnidRows).nidType == NID.NID_TYPE_HID:
self.hnidRows = HID(self.hnidRows)
else:
self.hnidRows = NID(self.hnidRows)
self.rgTCOLDESC = []
for i in range(self.cCols):
self.rgTCOLDESC.append(TCOLDESC(tcinfo_bytes[22+i*8:22+(i+1)*8]))
self.setup_row_index()
self.setup_row_matrix()
def setup_row_index(self):
self.RowIndex = {} # key is dwRowID, value is dwRowIndex
if not (self.hnidRows.is_hid and self.hnidRows.hidIndex == 0):
row_index_bth = BTH(self.hn, self.hidRowIndex)
if row_index_bth.cbKey != 4:
raise PSTException('Invalid TC RowIndex key size %s' % row_index_bth.cbKey)