-
Notifications
You must be signed in to change notification settings - Fork 58
/
evm-cpu.py
executable file
·982 lines (833 loc) · 41.6 KB
/
evm-cpu.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
import idc
import idaapi
from idc import *
from idaapi import *
import idautils
import known_hashes
'''
Code from manticore
'''
class EVMAsm(object):
'''
EVM Instruction factory
Example use::
>>> from manticore.platforms.evm import EVMAsm
>>> EVMAsm.disassemble_one('\\x60\\x10')
Instruction(0x60, 'PUSH', 1, 0, 1, 0, 'Place 1 byte item on stack.', 16, 0)
>>> EVMAsm.assemble_one('PUSH1 0x10')
Instruction(0x60, 'PUSH', 1, 0, 1, 0, 'Place 1 byte item on stack.', 16, 0)
>>> tuple(EVMAsm.disassemble_all('\\x30\\x31'))
(Instruction(0x30, 'ADDRESS', 0, 0, 1, 2, 'Get address of currently executing account.', None, 0),
Instruction(0x31, 'BALANCE', 0, 1, 1, 20, 'Get balance of the given account.', None, 1))
>>> tuple(EVMAsm.assemble_all('ADDRESS\\nBALANCE'))
(Instruction(0x30, 'ADDRESS', 0, 0, 1, 2, 'Get address of currently executing account.', None, 0),
Instruction(0x31, 'BALANCE', 0, 1, 1, 20, 'Get balance of the given account.', None, 1))
>>> EVMAsm.assemble_hex(
... """PUSH1 0x60
... BLOCKHASH
... MSTORE
... PUSH1 0x2
... PUSH2 0x100
... """
... )
'0x606040526002610100'
>>> EVMAsm.disassemble_hex('0x606040526002610100')
'PUSH1 0x60\\nBLOCKHASH\\nMSTORE\\nPUSH1 0x2\\nPUSH2 0x100'
'''
class Instruction(object):
def __init__(self, opcode, name, operand_size, pops, pushes, fee, description, operand=None, offset=0):
'''
This represents an EVM instruction.
EVMAsm will create this for you.
:param opcode: the opcode value
:param name: instruction name
:param operand_size: immediate operand size in bytes
:param pops: number of items popped from the stack
:param pushes: number of items pushed into the stack
:param fee: gas fee for the instruction
:param description: textual description of the instruction
:param operand: optional immediate operand
:param offset: optional offset of this instruction in the program
Example use::
instruction = EVMAsm.assemble_one('PUSH1 0x10')
print 'Instruction: %s'% instruction
print '\tdescription:', instruction.description
print '\tgroup:', instruction.group
print '\taddress:', instruction.offset
print '\tsize:', instruction.size
print '\thas_operand:', instruction.has_operand
print '\toperand_size:', instruction.operand_size
print '\toperand:', instruction.operand
print '\tsemantics:', instruction.semantics
print '\tpops:', instruction.pops
print '\tpushes:', instruction.pushes
print '\tbytes:', '0x'+instruction.bytes.encode('hex')
print '\twrites to stack:', instruction.writes_to_stack
print '\treads from stack:', instruction.reads_from_stack
print '\twrites to memory:', instruction.writes_to_memory
print '\treads from memory:', instruction.reads_from_memory
print '\twrites to storage:', instruction.writes_to_storage
print '\treads from storage:', instruction.reads_from_storage
print '\tis terminator', instruction.is_terminator
'''
self._opcode = opcode
self._name = name
self._operand_size = operand_size
self._pops = pops
self._pushes = pushes
self._fee = fee
self._description = description
self._operand = operand #Immediate operand if any
if operand_size != 0 and operand is not None:
mask = (1<<operand_size*8)-1
if ~mask & operand:
raise ValueError("operand should be %d bits long"%(operand_size*8))
self._offset=offset
def __eq__(self, other):
''' Instructions are equal if all features match '''
return self._opcode == other._opcode and\
self._name == other._name and\
self._operand == other._operand and\
self._operand_size == other._operand_size and\
self._pops == other._pops and\
self._pushes == other._pushes and\
self._fee == other._fee and\
self._offset == other._offset and\
self._description == other._description
def __repr__(self):
output = 'Instruction(0x%x, %r, %d, %d, %d, %d, %r, %r, %r)'%(self._opcode, self._name, self._operand_size, self._pops, self._pushes, self._fee, self._description, self._operand, self._offset)
return output
def __str__(self):
output = self.name + (' 0x%x'%self.operand if self.has_operand else '')
return output
@property
def opcode(self):
''' The opcode as an integer '''
return self._opcode
@property
def name(self):
''' The instruction name/mnemonic '''
if self._name == 'PUSH':
return 'PUSH%d'%self.operand_size
elif self._name == 'DUP':
return 'DUP%d'%self.pops
elif self._name == 'SWAP':
return 'SWAP%d'%(self.pops-1)
elif self._name == 'LOG':
return 'LOG%d'%(self.pops-2)
return self._name
def parse_operand(self, buf):
''' Parses an operand from buf
:param buf: a buffer
:type buf: iterator/generator/string
'''
buf = iter(buf)
try:
operand = 0
for _ in range(self.operand_size):
operand <<= 8
operand |= ord(next(buf))
self._operand = operand
except StopIteration:
raise Exception("Not enough data for decoding")
@property
def operand_size(self):
''' The immediate operand size '''
return self._operand_size
@property
def has_operand(self):
''' True if the instruction uses an immediate operand'''
return self.operand_size > 0
@property
def operand(self):
''' The immediate operand '''
return self._operand
@property
def pops(self):
'''Number words popped from the stack'''
return self._pops
@property
def pushes(self):
'''Number words pushed to the stack'''
return self._pushes
@property
def size(self):
''' Size of the encoded instruction '''
return self._operand_size + 1
@property
def fee(self):
''' The basic gas fee of the instruction '''
return self._fee
@property
def semantics(self):
''' Canonical semantics '''
return self._name
@property
def description(self):
''' Coloquial description of the instruction '''
return self._description
@property
def bytes(self):
''' Encoded instruction '''
bytes = []
bytes.append(chr(self._opcode))
for offset in reversed(xrange(self.operand_size)):
c = (self.operand >> offset*8 ) & 0xff
bytes.append(chr(c))
return ''.join(bytes)
@property
def offset(self):
'''Location in the program (optional)'''
return self._offset
@property
def group(self):
'''Instruction classification as per the yellow paper'''
classes = {
0: 'Stop and Arithmetic Operations',
1: 'Comparison & Bitwise Logic Operations',
2: 'SHA3',
3: 'Environmental Information',
4: 'Block Information',
5: 'Stack, Memory, Storage and Flow Operations',
6: 'Push Operations',
7: 'Push Operations',
8: 'Duplication Operations',
9: 'Exchange Operations',
0xa: 'Logging Operations',
0xf: 'System operations'
}
return classes.get(self.opcode>>4, 'Invalid instruction')
@property
def uses_stack(self):
''' True if the instruction reads/writes from/to the stack '''
return self.reads_from_stack or self.writes_to_stack
@property
def reads_from_stack(self):
''' True if the instruction reads from stack '''
return self.pops > 0
@property
def writes_to_stack(self):
''' True if the instruction writes to the stack '''
return self.pushes > 0
@property
def reads_from_memory(self):
''' True if the instruction reads from memory '''
return self.semantics in ('MLOAD','CREATE', 'CALL', 'CALLCODE', 'RETURN', 'DELEGATECALL', 'REVERT')
@property
def writes_to_memory(self):
''' True if the instruction writes to memory '''
return self.semantics in ('MSTORE', 'MSTORE8', 'CALLDATACOPY', 'CODECOPY', 'EXTCODECOPY')
@property
def reads_from_memory(self):
''' True if the instruction reads from memory '''
return self.semantics in ('MLOAD','CREATE', 'CALL', 'CALLCODE', 'RETURN', 'DELEGATECALL', 'REVERT')
@property
def writes_to_storage(self):
''' True if the instruction writes to the storage '''
return self.semantics in ('SSTORE')
@property
def reads_from_storage(self):
''' True if the instruction reads from the storage '''
return self.semantics in ('SLOAD')
@property
def is_terminator(self):
''' True if the instruction is a basic block terminator '''
return self.semantics in ('RETURN', 'STOP', 'INVALID', 'JUMP', 'JUMPI', 'SELFDESTRUCT', 'REVERT')
@property
def is_branch(self):
''' True if the instruction is a jump'''
return self.semantics in ('JUMP', 'JUMPI')
@property
def is_environmental(self):
''' True if the instruction access enviromental data '''
return self.group == 'Environmental Information'
@property
def is_system(self):
''' True if the instruction is a system operation '''
return self.group == 'System operations'
@property
def uses_block_info(self):
''' True if the instruction access block information'''
return self.group == 'Block Information'
@property
def is_arithmetic(self):
''' True if the instruction is an arithmetic operation '''
return self.semantics in ('ADD', 'MUL', 'SUB', 'DIV', 'SDIV', 'MOD', 'SMOD', 'ADDMOD', 'MULMOD', 'EXP', 'SIGNEXTEND')
#from http://gavwood.com/paper.pdf
_table = {#opcode: (name, immediate_operand_size, pops, pushes, gas, description)
0x00: ('STOP', 0, 0, 0, 0, 'Halts execution.'),
0x01: ('ADD', 0, 2, 1, 3, 'Addition operation.'),
0x02: ('MUL', 0, 2, 1, 5, 'Multiplication operation.'),
0x03: ('SUB', 0, 2, 1, 3, 'Subtraction operation.'),
0x04: ('DIV', 0, 2, 1, 5, 'Integer division operation.'),
0x05: ('SDIV', 0, 2, 1, 5, 'Signed integer division operation (truncated).'),
0x06: ('MOD', 0, 2, 1, 5, 'Modulo remainder operation.'),
0x07: ('SMOD', 0, 2, 1, 5, 'Signed modulo remainder operation.'),
0x08: ('ADDMOD', 0, 3, 1, 8, 'Modulo addition operation.'),
0x09: ('MULMOD', 0, 3, 1, 8, 'Modulo multiplication operation.'),
0x0a: ('EXP', 0, 2, 1, 10, 'Exponential operation.'),
0x0b: ('SIGNEXTEND', 0, 2, 1, 5, "Extend length of two's complement signed integer."),
0x10: ('LT', 0, 2, 1, 3, 'Less-than comparision.'),
0x11: ('GT', 0, 2, 1, 3, 'Greater-than comparision.'),
0x12: ('SLT', 0, 2, 1, 3, 'Signed less-than comparision.'),
0x13: ('SGT', 0, 2, 1, 3, 'Signed greater-than comparision.'),
0x14: ('EQ', 0, 2, 1, 3, 'Equality comparision.'),
0x15: ('ISZERO', 0, 1, 1, 3, 'Simple not operator.'),
0x16: ('AND', 0, 2, 1, 3, 'Bitwise AND operation.'),
0x17: ('OR', 0, 2, 1, 3, 'Bitwise OR operation.'),
0x18: ('XOR', 0, 2, 1, 3, 'Bitwise XOR operation.'),
0x19: ('NOT', 0, 1, 1, 3, 'Bitwise NOT operation.'),
0x1a: ('BYTE', 0, 2, 1, 3, 'Retrieve single byte from word.'),
0x1b: ('SHL', 0, 2, 1, 3, 'Shift Left.'),
0x1c: ('SHR', 0, 2, 1, 3, 'Logical Shift Right.'),
0x1d: ('SAR', 0, 2, 1, 3, 'Arithmetic Shift Right.'),
0x20: ('SHA3', 0, 2, 1, 30, 'Compute Keccak-256 hash.'),
0x30: ('ADDRESS', 0, 0, 1, 2, 'Get address of currently executing account .'),
0x31: ('BALANCE', 0, 1, 1, 20, 'Get balance of the given account.'),
0x32: ('ORIGIN', 0, 0, 1, 2, 'Get execution origination address.'),
0x33: ('CALLER', 0, 0, 1, 2, 'Get caller address.'),
0x34: ('CALLVALUE', 0, 0, 1, 2, 'Get deposited value by the instruction/transaction responsible for this execution.'),
0x35: ('CALLDATALOAD', 0, 1, 1, 3, 'Get input data of current environment.'),
0x36: ('CALLDATASIZE', 0, 0, 1, 2, 'Get size of input data in current environment.'),
0x37: ('CALLDATACOPY', 0, 3, 0, 3, 'Copy input data in current environment to memory.'),
0x38: ('CODESIZE', 0, 0, 1, 2, 'Get size of code running in current environment.'),
0x39: ('CODECOPY', 0, 3, 0, 3, 'Copy code running in current environment to memory.'),
0x3a: ('GASPRICE', 0, 0, 1, 2, 'Get price of gas in current environment.'),
0x3b: ('EXTCODESIZE', 0, 1, 1, 20, "Get size of an account's code."),
0x3c: ('EXTCODECOPY', 0, 4, 0, 20, "Copy an account's code to memory."),
0x3d: ('RETURNDATASIZE', 0, 0, 1, 2, "Pushes the size of the return data buffer onto the stack."),
0x3e: ('RETURNDATACOPY', 0, 3, 0, 3, "Copies data from the return data buffer to memory."),
0x3f: ('EXTCODEHASH', 0, 1, 1, 700, "Returns the keccak256 hash of a contract's code."),
0x40: ('BLOCKHASH', 0, 1, 1, 20, 'Get the hash of one of the 256 most recent complete blocks.'),
0x41: ('COINBASE', 0, 0, 1, 2, "Get the block's beneficiary address."),
0x42: ('TIMESTAMP', 0, 0, 1, 2, "Get the block's timestamp."),
0x43: ('NUMBER', 0, 0, 1, 2, "Get the block's number."),
0x44: ('DIFFICULTY', 0, 0, 1, 2, "Get the block's difficulty."),
0x45: ('GASLIMIT', 0, 0, 1, 2, "Get the block's gas limit."),
0x46: ('CHAINID', 0, 0, 1, 2, "Returns the current chain's EIP-155 unique identifier."),
0x48: ('BASEFEE', 0, 0, 1, 2, "Returns the value of the base fee of the current block it is executing in."),
0x50: ('POP', 0, 1, 0, 2, 'Remove item from stack.'),
0x51: ('MLOAD', 0, 1, 1, 3, 'Load word from memory.'),
0x52: ('MSTORE', 0, 2, 0, 3, 'Save word to memory.'),
0x53: ('MSTORE8', 0, 2, 0, 3, 'Save byte to memory.'),
0x54: ('SLOAD', 0, 1, 1, 50, 'Load word from storage.'),
0x55: ('SSTORE', 0, 2, 0, 0, 'Save word to storage.'),
0x56: ('JUMP', 0, 1, 0, 8, 'Alter the program counter.'),
0x57: ('JUMPI', 0, 2, 0, 10, 'Conditionally alter the program counter.'),
0x58: ('GETPC', 0, 0, 1, 2, 'Get the value of the program counter prior to the increment.'),
0x59: ('MSIZE', 0, 0, 1, 2, 'Get the size of active memory in bytes.'),
0x5a: ('GAS', 0, 0, 1, 2, 'Get the amount of available gas, including the corresponding reduction the amount of available gas.'),
0x5b: ('JUMPDEST', 0, 0, 0, 1, 'Mark a valid destination for jumps.'),
0x60: ('PUSH', 1, 0, 1, 0, 'Place 1 byte item on stack.'),
0x61: ('PUSH', 2, 0, 1, 0, 'Place 2-byte item on stack.'),
0x62: ('PUSH', 3, 0, 1, 0, 'Place 3-byte item on stack.'),
0x63: ('PUSH', 4, 0, 1, 0, 'Place 4-byte item on stack.'),
0x64: ('PUSH', 5, 0, 1, 0, 'Place 5-byte item on stack.'),
0x65: ('PUSH', 6, 0, 1, 0, 'Place 6-byte item on stack.'),
0x66: ('PUSH', 7, 0, 1, 0, 'Place 7-byte item on stack.'),
0x67: ('PUSH', 8, 0, 1, 0, 'Place 8-byte item on stack.'),
0x68: ('PUSH', 9, 0, 1, 0, 'Place 9-byte item on stack.'),
0x69: ('PUSH', 10, 0, 1, 0, 'Place 10-byte item on stack.'),
0x6a: ('PUSH', 11, 0, 1, 0, 'Place 11-byte item on stack.'),
0x6b: ('PUSH', 12, 0, 1, 0, 'Place 12-byte item on stack.'),
0x6c: ('PUSH', 13, 0, 1, 0, 'Place 13-byte item on stack.'),
0x6d: ('PUSH', 14, 0, 1, 0, 'Place 14-byte item on stack.'),
0x6e: ('PUSH', 15, 0, 1, 0, 'Place 15-byte item on stack.'),
0x6f: ('PUSH', 16, 0, 1, 0, 'Place 16-byte item on stack.'),
0x70: ('PUSH', 17, 0, 1, 0, 'Place 17-byte item on stack.'),
0x71: ('PUSH', 18, 0, 1, 0, 'Place 18-byte item on stack.'),
0x72: ('PUSH', 19, 0, 1, 0, 'Place 19-byte item on stack.'),
0x73: ('PUSH', 20, 0, 1, 0, 'Place 20-byte item on stack.'),
0x74: ('PUSH', 21, 0, 1, 0, 'Place 21-byte item on stack.'),
0x75: ('PUSH', 22, 0, 1, 0, 'Place 22-byte item on stack.'),
0x76: ('PUSH', 23, 0, 1, 0, 'Place 23-byte item on stack.'),
0x77: ('PUSH', 24, 0, 1, 0, 'Place 24-byte item on stack.'),
0x78: ('PUSH', 25, 0, 1, 0, 'Place 25-byte item on stack.'),
0x79: ('PUSH', 26, 0, 1, 0, 'Place 26-byte item on stack.'),
0x7a: ('PUSH', 27, 0, 1, 0, 'Place 27-byte item on stack.'),
0x7b: ('PUSH', 28, 0, 1, 0, 'Place 28-byte item on stack.'),
0x7c: ('PUSH', 29, 0, 1, 0, 'Place 29-byte item on stack.'),
0x7d: ('PUSH', 30, 0, 1, 0, 'Place 30-byte item on stack.'),
0x7e: ('PUSH', 31, 0, 1, 0, 'Place 31-byte item on stack.'),
0x7f: ('PUSH', 32, 0, 1, 0, 'Place 32-byte (full word) item on stack.'),
0x80: ('DUP', 0, 1, 2, 3, 'Duplicate 1st stack item.'),
0x81: ('DUP', 0, 2, 3, 3, 'Duplicate 2nd stack item.'),
0x82: ('DUP', 0, 3, 4, 3, 'Duplicate 3rd stack item.'),
0x83: ('DUP', 0, 4, 5, 3, 'Duplicate 4th stack item.'),
0x84: ('DUP', 0, 5, 6, 3, 'Duplicate 5th stack item.'),
0x85: ('DUP', 0, 6, 7, 3, 'Duplicate 6th stack item.'),
0x86: ('DUP', 0, 7, 8, 3, 'Duplicate 7th stack item.'),
0x87: ('DUP', 0, 8, 9, 3, 'Duplicate 8th stack item.'),
0x88: ('DUP', 0, 9, 10, 3, 'Duplicate 9th stack item.'),
0x89: ('DUP', 0, 10, 11, 3, 'Duplicate 10th stack item.'),
0x8a: ('DUP', 0, 11, 12, 3, 'Duplicate 11th stack item.'),
0x8b: ('DUP', 0, 12, 13, 3, 'Duplicate 12th stack item.'),
0x8c: ('DUP', 0, 13, 14, 3, 'Duplicate 13th stack item.'),
0x8d: ('DUP', 0, 14, 15, 3, 'Duplicate 14th stack item.'),
0x8e: ('DUP', 0, 15, 16, 3, 'Duplicate 15th stack item.'),
0x8f: ('DUP', 0, 16, 17, 3, 'Duplicate 16th stack item.'),
0x90: ('SWAP', 0, 2, 2, 3, 'Exchange 1st and 2nd stack items.'),
0x91: ('SWAP', 0, 3, 3, 3, 'Exchange 1st and 3rd stack items.'),
0x92: ('SWAP', 0, 4, 4, 3, 'Exchange 1st and 4th stack items.'),
0x93: ('SWAP', 0, 5, 5, 3, 'Exchange 1st and 5th stack items.'),
0x94: ('SWAP', 0, 6, 6, 3, 'Exchange 1st and 6th stack items.'),
0x95: ('SWAP', 0, 7, 7, 3, 'Exchange 1st and 7th stack items.'),
0x96: ('SWAP', 0, 8, 8, 3, 'Exchange 1st and 8th stack items.'),
0x97: ('SWAP', 0, 9, 9, 3, 'Exchange 1st and 9th stack items.'),
0x98: ('SWAP', 0, 10, 10, 3, 'Exchange 1st and 10th stack items.'),
0x99: ('SWAP', 0, 11, 11, 3, 'Exchange 1st and 11th stack items.'),
0x9a: ('SWAP', 0, 12, 12, 3, 'Exchange 1st and 12th stack items.'),
0x9b: ('SWAP', 0, 13, 13, 3, 'Exchange 1st and 13th stack items.'),
0x9c: ('SWAP', 0, 14, 14, 3, 'Exchange 1st and 14th stack items.'),
0x9d: ('SWAP', 0, 15, 15, 3, 'Exchange 1st and 15th stack items.'),
0x9e: ('SWAP', 0, 16, 16, 3, 'Exchange 1st and 16th stack items.'),
0x9f: ('SWAP', 0, 17, 17, 3, 'Exchange 1st and 17th stack items.'),
0xa0: ('LOG', 0, 2, 0, 375, 'Append log record with no topics.'),
0xa1: ('LOG', 0, 3, 0, 750, 'Append log record with one topic.'),
0xa2: ('LOG', 0, 4, 0, 1125, 'Append log record with two topics.'),
0xa3: ('LOG', 0, 5, 0, 1500, 'Append log record with three topics.'),
0xa4: ('LOG', 0, 6, 0, 1875, 'Append log record with four topics.'),
0xf0: ('CREATE', 0, 3, 1, 32000, 'Create a new account with associated code.'),
0xf1: ('CALL', 0, 7, 1, 40, 'Message-call into an account.'),
0xf2: ('CALLCODE', 0, 7, 1, 40, "Message-call into this account with alternative account's code."),
0xf3: ('RETURN', 0, 2, 0, 0, 'Halt execution returning output data.'),
0xf4: ('DELEGATECALL', 0, 6, 1, 40, "Message-call into this account with an alternative account's code, but persisting into this account with an alternative account's code."),
0xf5: ('BREAKPOINT', 0, 0, 0, 40, 'Not in yellow paper FIXME'),
0xf6: ('RNGSEED', 0, 1, 1, 0, 'Not in yellow paper FIXME'),
0xf7: ('SSIZEEXT', 0, 2, 1, 0, 'Not in yellow paper FIXME'),
0xf8: ('SLOADBYTES', 0, 3, 0, 0, 'Not in yellow paper FIXME'),
0xf9: ('SSTOREBYTES', 0, 3, 0, 0, 'Not in yellow paper FIXME'),
0xfa: ('SSIZE', 0, 1, 1, 40, 'Not in yellow paper FIXME'),
0xfb: ('STATEROOT', 0, 1, 1, 0, 'Not in yellow paper FIXME'),
0xfc: ('TXEXECGAS', 0, 0, 1, 0, 'Not in yellow paper FIXME'),
0xfd: ('REVERT', 0, 2, 0, 0, 'Stop execution and revert state changes, without consuming all provided gas and providing a reason.'),
0xfe: ('INVALID', 0, 0, 0, 0, 'Designated invalid instruction.'),
0xff: ('SELFDESTRUCT', 0, 1, 0, 5000, 'Halt execution and register account for later deletion.')
}
@staticmethod
#@memoized
def _get_reverse_table():
''' Build an internal table used in the assembler '''
reverse_table = {}
for (opcode, (name, immediate_operand_size, pops, pushes, gas, description)) in EVMAsm._table.items():
mnemonic = name
if name == 'PUSH':
mnemonic = '%s%d'%(name, (opcode&0x1f) + 1)
elif name in ('SWAP', 'LOG', 'DUP'):
mnemonic = '%s%d'%(name, (opcode&0xf) + 1)
reverse_table[mnemonic] = opcode, name, immediate_operand_size, pops, pushes, gas, description
return reverse_table
@staticmethod
def assemble_one(assembler, offset=0):
''' Assemble one EVM instruction from its textual representation.
:param assembler: assembler code for one instruction
:param offset: offset of the instruction in the bytecode (optional)
:return: An Instruction object
Example use::
>>> print evm.EVMAsm.assemble_one('LT')
'''
try:
_reverse_table = EVMAsm._get_reverse_table()
assembler = assembler.strip().split()
opcode, name, operand_size, pops, pushes, gas, description = _reverse_table[assembler[0].upper()]
if operand_size > 0:
assert len(assembler) == 2
operand = int(assembler[1],0)
else:
assert len(assembler) == 1
operand = None
return EVMAsm.Instruction(opcode, name, operand_size, pops, pushes, gas, description, operand=operand, offset=offset)
except:
raise Exception("Something wrong at offset %d"%offset)
@staticmethod
def assemble_all(assembler, offset=0):
''' Assemble a sequence of textual representation of EVM instructions
:param assembler: assembler code for any number of instructions
:param offset: offset of the first instruction in the bytecode(optional)
:return: An generator of Instruction objects
Example use::
>>> evm.EVMAsm.encode_one("""PUSH1 0x60
PUSH1 0x40
MSTORE
PUSH1 0x2
PUSH2 0x108
PUSH1 0x0
POP
SSTORE
PUSH1 0x40
MLOAD
""")
'''
if isinstance(assembler, str):
assembler = assembler.split('\n')
assembler = iter(assembler)
for line in assembler:
if not line.strip():
continue
instr = EVMAsm.assemble_one(line, offset=offset)
yield instr
offset += instr.size
@staticmethod
def disassemble_one(bytecode, offset=0):
''' Decode a single instruction from a bytecode
:param bytecode: the bytecode stream
:param offset: offset of the instruction in the bytecode(optional)
:type bytecode: iterator/sequence/str
:return: an Instruction object
Example use::
>>> print EVMAsm.assemble_one('PUSH1 0x10')
'''
bytecode = iter(bytecode)
opcode = ord(next(bytecode))
invalid = ('INVALID', 0, 0, 0, 0, 'Unknown opcode')
name, operand_size, pops, pushes, gas, description = EVMAsm._table.get(opcode, invalid)
instruction = EVMAsm.Instruction(opcode, name, operand_size, pops, pushes, gas, description, offset=offset)
if instruction.has_operand:
instruction.parse_operand(bytecode)
return instruction
@staticmethod
def disassemble_all(bytecode, offset=0):
''' Decode all instructions in bytecode
:param bytecode: an evm bytecode (binary)
:param offset: offset of the first instruction in the bytecode(optional)
:type bytecode: iterator/sequence/str
:return: An generator of Instruction objects
Example use::
>>> for inst in EVMAsm.decode_all(bytecode):
... print inst
...
PUSH1 0x60
PUSH1 0x40
MSTORE
PUSH1 0x2
PUSH2 0x108
PUSH1 0x0
POP
SSTORE
PUSH1 0x40
MLOAD
'''
bytecode = iter(bytecode)
while True:
instr = EVMAsm.disassemble_one(bytecode, offset=offset)
offset += instr.size
yield instr
@staticmethod
def disassemble(bytecode, offset=0):
''' Disassemble an EVM bytecode
:param bytecode: binary representation of an evm bytecode (hexadecimal)
:param offset: offset of the first instruction in the bytecode(optional)
:type bytecode: str
:return: the text representation of the aseembler code
Example use::
>>> EVMAsm.disassemble("\x60\x60\x60\x40\x52\x60\x02\x61\x01\x00")
...
PUSH1 0x60
BLOCKHASH
MSTORE
PUSH1 0x2
PUSH2 0x100
'''
return '\n'.join(map(str, EVMAsm.disassemble_all(bytecode, offset=offset)))
@staticmethod
def assemble(asmcode, offset=0):
''' Assemble an EVM program
:param asmcode: an evm assembler program
:param offset: offset of the first instruction in the bytecode(optional)
:type asmcode: str
:return: the hex representation of the bytecode
Example use::
>>> EVMAsm.assemble( """PUSH1 0x60
BLOCKHASH
MSTORE
PUSH1 0x2
PUSH2 0x100
"""
)
...
"\x60\x60\x60\x40\x52\x60\x02\x61\x01\x00"
'''
return ''.join(map(lambda x:x.bytes, EVMAsm.assemble_all(asmcode, offset=offset)))
@staticmethod
def disassemble_hex(bytecode, offset=0):
''' Disassemble an EVM bytecode
:param bytecode: canonical representation of an evm bytecode (hexadecimal)
:param int offset: offset of the first instruction in the bytecode(optional)
:type bytecode: str
:return: the text representation of the aseembler code
Example use::
>>> EVMAsm.disassemble_hex("0x6060604052600261010")
...
PUSH1 0x60
BLOCKHASH
MSTORE
PUSH1 0x2
PUSH2 0x100
'''
if bytecode.startswith('0x'):
bytecode = bytecode[2:]
bytecode = bytecode.decode('hex')
return EVMAsm.disassemble(bytecode, offset=offset)
@staticmethod
def assemble_hex(asmcode, offset=0):
''' Assemble an EVM program
:param asmcode: an evm assembler program
:param offset: offset of the first instruction in the bytecode(optional)
:type asmcode: str
:return: the hex representation of the bytecode
Example use::
>>> EVMAsm.assemble_hex( """PUSH1 0x60
BLOCKHASH
MSTORE
PUSH1 0x2
PUSH2 0x100
"""
)
...
"0x6060604052600261010"
'''
return '0x' + EVMAsm.assemble(asmcode, offset=offset).encode('hex')
# thanks to https://github.com/themadinventor/ida-xtensa/issues/12 for showing all the ida7 sdk changes
# and thanks quarsklab for an IDP overview at https://blog.quarkslab.com/ida-processor-module.html
class EVMProcessor(idaapi.processor_t):
id = 0x8000 + 0x6576
flag = PR_ADJSEGS | PRN_HEX | PR_ASSEMBLE
cnbits = 8
dnbits = 8
psnames = ["evm"]
plnames = ["EVM"]
segreg_size = 0
instruc_start = 0
reg_names = ["SP"]
assembler = {
"header": [".evm"],
"flag": AS_NCHRE | ASH_HEXF0 | ASD_DECF0 | ASO_OCTF0 | ASB_BINF0 | AS_NOTAB,
"uflag": 0,
"name": "evm assembler",
"origin": ".org",
"end": ".end",
"cmnt": ";",
"ascsep": "'",
"accsep": "'",
"esccodes": "\"'",
"a_ascii": ".ascii",
"a_byte": ".byte",
"a_word": ".word",
"a_dword": ".dword",
"a_bss": "dfs %s",
"a_seg": "seg",
"a_curip": "PC",
"a_public": "",
"a_weak": "",
"a_extrn": ".extern",
"a_comdef": "",
"a_align": ".align",
"lbrace": "(",
"rbrace": ")",
"a_mod": "%",
"a_band": "&",
"a_bor": "|",
"a_xor": "^",
"a_bnot": "~",
"a_shl": "<<",
"a_shr": ">>",
"a_sizeof_fmt": "size %s",
}
def trace_sp(self, insn):
pass
@staticmethod
def get_prototype(num):
hash_str = '0x%x' %(num, )
function_prototype = known_hashes.knownHashes.get(hash_str, '').encode('ascii','ignore')
return function_prototype
def notify_emu(self, insn):
feature = insn.get_canon_feature()
#print "emulating", insn.get_canon_mnem(), hex(feature)
mnemonic = insn.get_canon_mnem()
if mnemonic == "PUSH4":
function_prototype = self.get_prototype(self.get_operand(insn[0]))
if function_prototype:
ida_bytes.set_cmt(insn.ea, function_prototype, True)
elif mnemonic == "CALLI":
# operand 0 is the address
# operand 1 is the function hash
add_cref(insn.ea, insn.ea + insn.size, fl_JN) # false branch is following instruction
addr = insn[0].addr
add_cref(insn.ea, addr, fl_CN) # true branch is stored in operand index 0
ida_bytes.set_cmt(insn.ea, "JUMPI", True)
jump_hash = insn[1].value
function_prototype = self.get_prototype(jump_hash)
label = '%s (0x%x)' %(function_prototype, jump_hash)
if not ida_lines.get_extra_cmt(addr, ida_lines.E_PREV + 0): # don't dup
ida_lines.add_extra_cmt(addr, True, label)
if function_prototype == '':
function_prototype = 'func_0x%x' % (jump_hash, )
set_name(addr, function_prototype, SN_NOCHECK|SN_NOWARN|SN_FORCE)
elif mnemonic == "JUMPI":
# add ref to next instruction for false branch
add_cref(insn.ea, insn.ea + insn.size, fl_JN)
# maybe we have a simple puch
prev_insn = idautils.DecodePreviousInstruction(insn.ea)
if prev_insn:
if prev_insn.get_canon_mnem().startswith("PUSH"):
jump_addr = self.get_operand(prev_insn[0])
add_cref(insn.ea, jump_addr, fl_JN)
elif mnemonic == "JUMP":
prev_insn = idautils.DecodePreviousInstruction(insn.ea)
if prev_insn:
# TODO: implement stack modeling to resolve actual top value of stack
if prev_insn.get_canon_mnem().startswith("PUSH"):
jump_addr = self.get_operand(prev_insn[0])
#print "found jump to", hex(jump_addr)
add_cref(insn.ea, jump_addr, fl_JN)
# TODO: adjust function boundary to include all code
#func = get_func(insn.ea)
#if func:
# #print "appending new tail"
# #append_func_tail(func, jump_addr, BADADDR)
# #reanalyze_function(func)
flows = (feature & CF_STOP) == 0
if flows:
add_cref(insn.ea, insn.ea + insn.size, fl_F)
if may_trace_sp():
if flows:
self.trace_sp(insn)
else:
idc.recalc_spd(insn.ea)
return True
def notify_func_bounds(self, code, func_ea, max_func_end_ea):
"""
find_func_bounds() finished its work
The module may fine tune the function bounds
args:
possible code - one of FIND_FUNC_XXX (check find_func_bounds)
func_ea - func start ea
max_func_end_ea (from the kernel's point of view)
returns: possible_return_code
"""
#print hex(func_ea), hex(max_func_end_ea), code
#print print_insn_mnem(max_func_end_ea-1)
#append_func_tail(func, jump_addr, BADADDR)
#reanalyze_function(func)
return FIND_FUNC_OK
@staticmethod
def get_operand(op):
operand = 0
if op.type == o_idpspec0:
# re-read all of the bytes from instruction
buf = ida_bytes.get_bytes(op.addr, op.specval) # specval stores number of bytes for operand
for i in range(len(buf)):
operand <<= 8
operand |= ord(buf[i])
elif op.type == o_near:
operand = op.addr
return operand
def notify_out_operand(self, ctx, op):
if op.type == o_idpspec0:
operand = self.get_operand(op)
ctx.out_line("0x%x" %(operand, ))
elif op.type == o_near:
operand_value = self.get_operand(op)
r = ctx.out_name_expr(op, operand_value, BADADDR)
if not r:
ctx.out_line("0x%x" %(operand_value, ))
return True
def notify_out_insn(self, ctx):
ctx.out_mnemonic()
if ctx.insn[0].type == o_idpspec0:
ctx.out_char(" ")
ctx.out_one_operand(0)
elif ctx.insn[0].type == o_near:
ctx.out_char(" ")
ctx.out_one_operand(0)
ctx.set_gen_cmt()
ctx.flush_outbuf()
return
def notify_ana(self, insn):
ea = insn.ea
bytes_left = chunk_start(ea) + chunk_size(ea) - ea
bytecode = ida_bytes.get_bytes(insn.ea, bytes_left) # get remaining bytes in chunk
try:
instruction = EVMAsm.disassemble_one(bytecode)
except Exception as e:
print e
return
insn.size = instruction.size
#initialize operands to voids
operands = [insn[i] for i in xrange(1, 6)]
for o in operands:
o.type = o_void
# set the instruction
insn.itype = self.instruction_index.get(instruction._opcode, 0)
#detect "CALLI"
if instruction.name == "JUMPI":
# TODO: check for 4 prev instructions where there is a DUP2 in the middle
#decode 3 previous instructions
prev_insns = [insn]
for i in range(3):
prev_insns.append(idautils.DecodePreviousInstruction(prev_insns[-1].ea))
prev_insns.pop(0) # remove orig
# sanity check previous instructions
for i in prev_insns:
#print i.get_canon_mnem(),
if i.ea == ida_idaapi.BADADDR:
print 'ERROR'
if (prev_insns[0].get_canon_mnem().startswith("PUSH2") and
prev_insns[1].get_canon_mnem().startswith("EQ") and
prev_insns[2].get_canon_mnem().startswith("PUSH4")):
# switch it to a "CALLI"
insn.itype = 1 # CALLI instruction index
jump_target = self.get_operand(prev_insns[0][0]) # operand on the push2
insn[0].type = o_near
insn[0].addr = jump_target
function_hash = self.get_operand(prev_insns[2][0]) # operand on the push4
insn[1].type = o_imm
insn[1].value = function_hash
# set the target to be a function
AutoMark(jump_target, AU_PROC)
if instruction.has_operand:
insn[0].type = o_idpspec0 # custom type
insn[0].dtype = dt_byte32
insn[0].addr = ea + 1 # operand is located after opcode
insn[0].specval = instruction.operand_size
#o.specval = str(instruction.operand)
# return the instruction size here
return insn.size
def notify_assemble(self, ea, cs, ip, use32, line):
try:
asm = EVMAsm.assemble_one(line, 0)
except Exception as e:
print "Error trying to assemble '%s': %s" %(line, e)
return None
return asm.bytes
def __init__(self):
processor_t.__init__(self)
self.reg_names = ["SP", "virtualCS", "virtualDS"]
self.reg_first_sreg = self.reg_names.index("virtualCS")
self.reg_code_sreg = self.reg_names.index("virtualCS")
self.reg_last_sreg = self.reg_names.index("virtualDS")
self.reg_data_sreg = self.reg_names.index("virtualDS")
self.instruc = [] # why is there no T in this?
self.instruction_index = {}
#put first instruction as invalid
self.instruc.append({'name':"INVALID", 'feature':0})
self.instruction_index[0x100] = 0
self.instruc.append({'name':"CALLI", 'feature':CF_USE2|CF_STOP|CF_CALL}) # pseudo instruction
self.instruction_index[0x101] = 1
i = len(self.instruc)
for (mnemonic, info) in EVMAsm._get_reverse_table().iteritems(): #_table.iteritems():
features = 0 # initially zero
if info[2] != 0: # has immediate
features |= CF_USE1
if mnemonic in ('RETURN', 'STOP', 'INVALID', 'JUMP', 'JUMPI', 'SELFDESTRUCT', 'REVERT'):
features |= CF_STOP
if mnemonic in ('JUMP', 'JUMPI'):
features |= CF_JUMP
self.instruc.append({'name': mnemonic, 'feature':features})
self.instruction_index[info[0]] = i # look up index into instruc from opcode
if mnemonic == 'RETURN':
self.icode_return = i
i += 1
self.instruc_end = len(self.instruc)
def PROCESSOR_ENTRY():
return EVMProcessor()