diff --git a/CHANGELOG.md b/CHANGELOG.md index a405edf5..0156cecd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,7 @@ # 1.9.1 (Unreleased) +* Fix a hang due to incorrect jump-table boundaries inferred from irrelevant register correlations to the index register + # 1.9.0 * Stop generating debian metapackages and packages with the version attached diff --git a/examples/asm_examples/ex_relative_jump_tables4/Makefile b/examples/asm_examples/ex_relative_jump_tables4/Makefile new file mode 100644 index 00000000..cd3634c9 --- /dev/null +++ b/examples/asm_examples/ex_relative_jump_tables4/Makefile @@ -0,0 +1,10 @@ + +all: ex_original.s + gcc ex_original.s -o ex + @./ex > out.txt +clean: + rm -f ex out.txt + rm -fr ex.unstripped ex.s *.old* dl_files *.gtirb +check: + ./ex > /tmp/res.txt + @ diff out.txt /tmp/res.txt && echo TEST OK diff --git a/examples/asm_examples/ex_relative_jump_tables4/ex_original.s b/examples/asm_examples/ex_relative_jump_tables4/ex_original.s new file mode 100644 index 00000000..3c80b542 --- /dev/null +++ b/examples/asm_examples/ex_relative_jump_tables4/ex_original.s @@ -0,0 +1,312 @@ +// Similar to ex_relative_jump_tables except that this example uses +// `cmov` in computing the value for the bound variable, and the `cmov` +// is associated with ambiguous last defs (from the two incoming edges). +// +// To prevent potential overhead, Ddisasm uses a conservative way of +// finding `jump_table_max` by not creating `value_reg_limit` when there +// are multiple correlated reg relations. +// +// This example is to make sure that Ddisasm is not too aggressive in +// finding `jump_table_max` by considering all the ambiguous last defs. +// +// Note that if Ddisasm is aggressive, it will find `jump_table_max` +// for `jump_table_A`, and identify entries for `jump_table_B` +// in this example. +// However, we have observed a hang in spec2006/tonto, etc. + + .text + .intel_syntax noprefix + .file "ex.c" + +# -- Begin function one + .globl one + .p2align 4, 0x90 + .type one,@function +one: + push rbx + mov ebx, edi + lea rdi, [rip + .L.str] + call puts@PLT + mov eax, ebx + pop rbx + ret +.Lfunc_end0: +.size one, .Lfunc_end0-one +# -- End function + +# -- Begin function two + .globl two + .p2align 4, 0x90 + .type two,@function +two: + push rbx + mov ebx, edi + lea rdi, [rip + .L.str.1] + call puts@PLT + mov eax, ebx + pop rbx + ret +.Lfunc_end1: +.size two, .Lfunc_end1-two +# -- End function + +# -- Begin function three + .globl three + .p2align 4, 0x90 + .type three,@function +three: + push rbx + mov ebx, edi + lea rdi, [rip + .L.str.2] + call puts@PLT + lea eax, [rbx + 1] + pop rbx + ret +.Lfunc_end2: +.size three, .Lfunc_end2-three +# -- End function + +# -- Begin function four + .globl four + .p2align 4, 0x90 + .type four,@function +four: + push rbx + mov ebx, edi + lea rdi, [rip + .L.str.3] + call puts@PLT + mov eax, ebx + pop rbx + ret +.Lfunc_end3: +.size four, .Lfunc_end3-four +# -- End function + +# -- Begin function five + .globl five + .p2align 4, 0x90 + .type five,@function +five: + push rbx + mov ebx, edi + lea rdi, [rip + .L.str.4] + call puts@PLT + mov eax, ebx + pop rbx + ret +.Lfunc_end4: +.size five, .Lfunc_end4-five +# -- End function + +# -- Begin function six + .globl six + .p2align 4, 0x90 + .type six,@function +six: + push rbx + mov ebx, edi + lea rdi, [rip + .L.str.5] + call puts@PLT + mov eax, ebx + pop rbx + ret +.Lfunc_end5: +.size six, .Lfunc_end5-six +# -- End function + +# -- Begin function def + .globl def + .p2align 4, 0x90 + .type def,@function +def: + push rbx + mov ebx, edi + lea rdi, [rip + .L.str.6] + call puts@PLT + mov eax, ebx + pop rbx + ret +.Lfunc_end6: +.size def, .Lfunc_end6-def +# -- End function + +# -- Begin function fun + .globl fun + .p2align 4, 0x90 + .type fun,@function +fun: + push rbp + push r9 + push r10 + push r12 + push r13 + push rbx + mov rbp, rsp + mov r13d, esi + mov ebx, edi + cmp ebx, r13d + jge .LBB5_10 +.LBB5_2: + lea r9, [rip + .jump_table_A] + lea eax, [rbx - 1] + cmp eax, 1 + ja .LBB5_9 + jbe .target1 + jmp .target2 +.target1: + mov edi, ebx + call one + test rbx, 1 + jnz .L_odd1 + mov r12, 33 + jmp .L_end1 +.L_odd1: + mov r12, 34 +.L_end1: + lea rax, dword ptr [r12-32] + test rax, rax + cmove rax, r12 + cmp al, 4 + jbe .L_jump1 + jmp .LBB5_9 +.L_jump1: + sub r12, 32 + lea r10, [rip + .jump_table_B] + movsxd rax, dword ptr [r9 + 4*r12] + add rax, r9 + jmp rax + .p2align 4, 0x90 +.target2: + mov edi, ebx + call two + lea r10, [rip + .jump_table_B] + test rbx, 1 + jnz .L_odd2 + mov r12, 0 + jmp .L_end2 +.L_odd2: + mov r12, 1 +.L_end2: + movsxd rax, dword ptr [r9 + 4*r12] + add rax, r9 + jmp rax + .p2align 4, 0x90 +.jump_table_target3: + mov edi, ebx + call three + test rbx, 1 + jnz .L_odd3 + mov r12, 32 + jmp .L_end3 +.L_odd3: + mov r12, 33 +.L_end3: + sub r12, 32 + movsxd rax, dword ptr [r10 + 4*r12] + add rax, r10 + jmp rax + .p2align 4, 0x90 +.jump_table_target4: + mov edi, ebx + call four + jmp .LBB5_9 + .p2align 4, 0x90 +.jump_table_target5: + mov edi, ebx + call five + jmp .LBB5_9 + .p2align 4, 0x90 +.jump_table_target6: + mov edi, ebx + call six +.LBB5_9: + add ebx, 1 + cmp r13d, ebx + jne .LBB5_2 +.LBB5_10: + pop rbx + pop r13 + pop r12 + pop r10 + pop r9 + pop rbp + ret +.Lfunc_end8: + .size fun, .Lfunc_end8-fun + .section .rodata,"a",@progbits + .p2align 2 + +// here we have tables of relative offsets (symbol minus symbol) +.jump_table_A: + .long .target1-.jump_table_A + .long .jump_table_target3-.jump_table_A + .long .jump_table_target4-.jump_table_A +.jump_table_B: + .long .jump_table_target5-.jump_table_B + .long .jump_table_target6-.jump_table_B +# -- End function + + .text +# -- Begin function main + .globl main + .p2align 4, 0x90 + .type main,@function +main: + push rax + lea rdi, [rip + .L.str.7] + call puts@PLT + mov edi, 1 + mov esi, 6 + call fun + xor eax, eax + pop rcx + ret +.Lfunc_end7: + .size main, .Lfunc_end7-main +# -- End function + + + .type .L.str,@object # @.str + .section .rodata.str1.1,"aMS",@progbits,1 +.L.str: + .asciz "one" + .size .L.str, 4 + + .type .L.str.1,@object # @.str.1 +.L.str.1: + .asciz "two" + .size .L.str.1, 4 + + .type .L.str.2,@object # @.str.2 +.L.str.2: + .asciz "three" + .size .L.str.2, 6 + + .type .L.str.3,@object # @.str.3 +.L.str.3: + .asciz "four" + .size .L.str.3, 5 + + .type .L.str.4,@object # @.str.4 +.L.str.4: + .asciz "five" + .size .L.str.4, 5 + + .type .L.str.5,@object # @.str.5 +.L.str.5: + .asciz "six" + .size .L.str.5, 5 + + .type .L.str.6,@object # @.str.6 +.L.str.6: + .asciz "last" + .size .L.str.6, 5 + + .type .L.str.7,@object # @.str.7 +.L.str.7: + .asciz "!!!Hello World!!!" + .size .L.str.7, 18 + + + .ident "clang version 6.0.0 (tags/RELEASE_600/final)" + .section ".note.GNU-stack","",@progbits diff --git a/src/datalog/boundary_value_analysis.dl b/src/datalog/boundary_value_analysis.dl index 2d81cc55..1794ee8a 100644 --- a/src/datalog/boundary_value_analysis.dl +++ b/src/datalog/boundary_value_analysis.dl @@ -266,10 +266,13 @@ correlated_live_reg(Block,DstReg,SrcReg,Offset):- // simple arithmetic operation derives DstReg from SrcReg at OpEA limit_reg_op(OpEA,DstReg,SrcReg,Offset), reg_def_use.last_def_in_block(Block,OpEA,DstReg), + !reg_def_use.ambiguous_last_def_in_block(Block,DstReg), ( // SrcReg defined in block, same def live at end of block and OpEA reg_def_use.block_last_def(OpEA,SrcFrom,SrcReg), - reg_def_use.last_def_in_block(Block,SrcFrom,SrcReg) + !reg_def_use.ambiguous_block_last_def(OpEA,SrcReg), + reg_def_use.last_def_in_block(Block,SrcFrom,SrcReg), + !reg_def_use.ambiguous_last_def_in_block(Block,SrcReg) ; // SrcReg not defined in block !reg_def_use.last_def_in_block(Block,_,SrcReg) diff --git a/src/datalog/use_def_analysis.dl b/src/datalog/use_def_analysis.dl index 1a120468..7c048272 100644 --- a/src/datalog/use_def_analysis.dl +++ b/src/datalog/use_def_analysis.dl @@ -155,6 +155,8 @@ analysis for both registers and stack variables. /** The last address prior to EA where Var was defined within the block + + Multiple are possible in conditional cases. */ .decl block_last_def(EA:address,EA_def:address,Var:T) @@ -168,6 +170,17 @@ analysis for both registers and stack variables. ea_propagates_def(EA,Var), local_next(EA,EA_next). + /** + Indicates at EA, Var has multiple possible last definitions in the block + due to conditional def(s). + */ + .decl ambiguous_block_last_def(EA:address,Var:T) + + ambiguous_block_last_def(EA,Var):- + block_last_def(EA,EA_def,Var), + block_last_def(EA,EA_other,Var), + EA_def != EA_other. + /** The last definition(s) of in a given block. @@ -186,6 +199,17 @@ analysis for both registers and stack variables. block_last_def(BlockEnd,EA_def,Var) ). + /** + Indicates at the end of Block, Var has multiple possible last definitions + in the block due to conditional def(s). + */ + .decl ambiguous_last_def_in_block(Block:address,Var:T) + + ambiguous_last_def_in_block(EA,Var):- + last_def_in_block(EA,EA_def,Var), + last_def_in_block(EA,EA_other,Var), + EA_def != EA_other. + /** A is referenced in a block */ diff --git a/tests/cfg_test.py b/tests/cfg_test.py index 7349cfcd..f6fb8765 100644 --- a/tests/cfg_test.py +++ b/tests/cfg_test.py @@ -45,6 +45,34 @@ def test_relative_jump_tables(self): dest_blocks = [e.target for e in jumping_block.outgoing_edges] self.assertEqual(set(dest_blocks), set(expected_dest_blocks)) + @unittest.skipUnless( + platform.system() == "Linux", "This test is linux only." + ) + def test_relative_jump_table_with_cmov(self): + """ + Make sure that the jump-table is not resolved when jump-table + bounary cannot be conservatively found due to multiple correlations + between the index register and the correlated register. + """ + + binary = Path("ex") + with cd(ex_asm_dir / "ex_relative_jump_tables4"): + self.assertTrue(compile("gcc", "g++", "-O0", [])) + ir_library = disassemble(binary).ir() + m = ir_library.modules[0] + + # check that the jump_table entry targets do not have + # any incoming edge. + jt_target5_sym = next(m.symbols_named(".jump_table_target5")) + assert isinstance(jt_target5_sym.referent, gtirb.CodeBlock) + jt_target5_block = jt_target5_sym.referent + self.assertEqual(len(list(jt_target5_block.incoming_edges)), 0) + + jt_target6_sym = next(m.symbols_named(".jump_table_target6")) + assert isinstance(jt_target6_sym.referent, gtirb.CodeBlock) + jt_target6_block = jt_target6_sym.referent + self.assertEqual(len(list(jt_target6_block.incoming_edges)), 0) + @unittest.skipUnless( platform.system() == "Linux", "This test is linux only." )