From 4a625dd7a8f145c83a571f9c284d7e3c899bed27 Mon Sep 17 00:00:00 2001 From: Junghee Lim Date: Tue, 30 Apr 2024 11:29:56 -0400 Subject: [PATCH 1/6] Add alignment for instructions that require alignment --- CHANGELOG.md | 2 + .../ex_aligned_data_in_code/Makefile | 12 ++++++ .../ex_aligned_data_in_code/ex_original.s | 43 +++++++++++++++++++ src/datalog/arch/arch.dl | 9 ++++ src/datalog/arch/intel/arch_x86.dl | 18 ++++++++ src/datalog/main.dl | 20 +++++++++ tests/linux-elf-x64.yaml | 7 +++ 7 files changed, 111 insertions(+) create mode 100644 examples/asm_examples/ex_aligned_data_in_code/Makefile create mode 100644 examples/asm_examples/ex_aligned_data_in_code/ex_original.s diff --git a/CHANGELOG.md b/CHANGELOG.md index 9d519e17..7bebc535 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,8 @@ * Infer jump table boundaries from comparisons of registers correlated to the index register. * Relax constraints for inferring jump table boundaries from comparisons of indirect operands * Fix bug where a relative jump table starting with consecutive zero offsets was truncated at the first non-zero value. +* Add alignment for instructions that require alignment (e.g., some SIMD + instructions) # 1.8.0 diff --git a/examples/asm_examples/ex_aligned_data_in_code/Makefile b/examples/asm_examples/ex_aligned_data_in_code/Makefile new file mode 100644 index 00000000..bbb195de --- /dev/null +++ b/examples/asm_examples/ex_aligned_data_in_code/Makefile @@ -0,0 +1,12 @@ +CC="gcc" +CFLAGS= + +all: ex_original.s + $(CC) ex_original.s $(CFLAGS) -o ex + @./ex > out.txt +clean: + rm -f ex out.txt + rm -fr ex.unstripped ex.s *.old* dl_files *.gtirb +check: + ./ex > /tmp/res.txt + @ diff out.txt /tmp/res.txt && echo TEST OK diff --git a/examples/asm_examples/ex_aligned_data_in_code/ex_original.s b/examples/asm_examples/ex_aligned_data_in_code/ex_original.s new file mode 100644 index 00000000..34c35114 --- /dev/null +++ b/examples/asm_examples/ex_aligned_data_in_code/ex_original.s @@ -0,0 +1,43 @@ +# This example is to demonostrate that data-in-code is properly aligned. +# Otherwise, it may cause a segmentation fault due to alignment requirement +# violation. + + .section .text + +.globl main +.type main, @function +main: + call print_message1 + + # Load data into XMM register using movdqa: `data` needs to be aligned. + movdqa data(%rip), %xmm0 + + call print_message2 + + xorq %rax, %rax + + ret + +.type print_message1, @function +print_message1: + lea message1(%rip), %rdi + call printf + ret + +.align 16 +.type print_message2, @function +print_message2: + lea message2(%rip), %rdi + call printf + ret + +message1: + .ascii "Performing SIMD operations...\n" + .byte 0 +message2: + .ascii "SIMD operations completed.\n" + .byte 0 + +.align 16 +data: + .byte 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 diff --git a/src/datalog/arch/arch.dl b/src/datalog/arch/arch.dl index 3b0165a2..49cbdff3 100644 --- a/src/datalog/arch/arch.dl +++ b/src/datalog/arch/arch.dl @@ -187,6 +187,15 @@ cmp_zero_operation(""):- .decl call_operation_op_index(Operation:symbol,operand_index:operand_index) + +/** +Operations that require alignment. +*/ +.decl operation_alignment_required(Operation:symbol) + +operation_alignment_required(""):- + false. + // =========================================================================== // Address-targeted instruction properties // =========================================================================== diff --git a/src/datalog/arch/intel/arch_x86.dl b/src/datalog/arch/intel/arch_x86.dl index 5925a4b3..0009e829 100644 --- a/src/datalog/arch/intel/arch_x86.dl +++ b/src/datalog/arch/intel/arch_x86.dl @@ -273,4 +273,22 @@ simple_data_load(EA,Data,Size):- instruction_memory_access_size(EA,MemIndex,Size), Size != 0. +// The following AVX instructions always require alignment +// (See Table 14-23 in https://cdrdv2.intel.com/v1/dl/getContent/671200): +operation_alignment_required("MOVDQA"). +operation_alignment_required("MOVAPS"). +operation_alignment_required("MOVAPD"). +operation_alignment_required("MOVNTPS"). +operation_alignment_required("MOVNTPD"). +operation_alignment_required("MOVNTDQ"). +operation_alignment_required("MOVNTDQA"). + +operation_alignment_required("VMOVDQA"). +operation_alignment_required("VMOVAPS"). +operation_alignment_required("VMOVAPD"). +operation_alignment_required("VMOVNTPS"). +operation_alignment_required("VMOVNTPD"). +operation_alignment_required("VMOVNTDQ"). +operation_alignment_required("VMOVNTDQA"). + } diff --git a/src/datalog/main.dl b/src/datalog/main.dl index d67ca456..ed3192e2 100644 --- a/src/datalog/main.dl +++ b/src/datalog/main.dl @@ -696,6 +696,26 @@ Information about alignment in bits for a given address alignment(0,0):- false. +// Data in code needs to be aligned sometimes: e.g., SIMD instructions +// with alignment requirement. +// For simplicity, the amount of alignment is determined based on the +// given address here. +// NOTE: This might introduce alignments larger than necessary. +// TODO: Typically, such an operation is associated with a fixed size of +// alignment, which can be inferred by the operand size. +alignment(DataEA, AlignInBits):- + pc_relative_operand(EA,_,DataEA), + instruction_get_operation(EA,Operation), + arch.operation_alignment_required(Operation), + data_in_code(Begin,End), + DataEA >= Begin, + DataEA < End, + ( + DataEA % 32 = 0, AlignInBits = 32; + DataEA % 32 != 0, DataEA % 16 = 0, AlignInBits = 16; + DataEA % 32 != 0, DataEA % 16 != 0, DataEA % 8 = 0, AlignInBits = 8 + ). + ////////////////////////////////////////////////////////////////////////////////// // Operations to abstract features of instructions diff --git a/tests/linux-elf-x64.yaml b/tests/linux-elf-x64.yaml index bfb7a61e..86a40083 100644 --- a/tests/linux-elf-x64.yaml +++ b/tests/linux-elf-x64.yaml @@ -512,6 +512,9 @@ tests: <<: *assembly binary: fun.so + - name: ex_aligned_data_in_code + <<: *assembly + # ---------------------------------------------------------------------------- # Assembly examples. (stripped) # ---------------------------------------------------------------------------- @@ -597,6 +600,10 @@ tests: <<: *test-strip-default binary: fun.so + - name: ex_aligned_data_in_code + <<: *assembly + <<: *test-strip-default + # ---------------------------------------------------------------------------- # Relocatable ELF objects (.o). # ---------------------------------------------------------------------------- From 3ba4d1d951240f2427cbf4789ee4c048d8e4d28b Mon Sep 17 00:00:00 2001 From: Junghee Lim Date: Wed, 1 May 2024 10:59:58 -0400 Subject: [PATCH 2/6] Add arch.alignment_required instead - Replace arch.operation_alignment_required with arch.alignment_required, which has alignment amount inferred from memory operand - Revise the unit test --- CHANGELOG.md | 4 +- .../ex_aligned_data_in_code/Makefile | 4 +- .../ex_aligned_data_in_code/ex_original.s | 49 +++++++++++++++---- src/datalog/arch/arch.dl | 16 +++--- src/datalog/arch/intel/arch_x86.dl | 14 +++++- src/datalog/main.dl | 13 +---- tests/linux-elf-x64.yaml | 7 --- tests/misc_test.py | 20 ++++++++ 8 files changed, 84 insertions(+), 43 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7bebc535..3810ed82 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,8 +25,8 @@ * Infer jump table boundaries from comparisons of registers correlated to the index register. * Relax constraints for inferring jump table boundaries from comparisons of indirect operands * Fix bug where a relative jump table starting with consecutive zero offsets was truncated at the first non-zero value. -* Add alignment for instructions that require alignment (e.g., some SIMD - instructions) +* Add alignment for instructions that require explicitly aligned memory + (e.g., some SIMD instructions) # 1.8.0 diff --git a/examples/asm_examples/ex_aligned_data_in_code/Makefile b/examples/asm_examples/ex_aligned_data_in_code/Makefile index bbb195de..c3181abd 100644 --- a/examples/asm_examples/ex_aligned_data_in_code/Makefile +++ b/examples/asm_examples/ex_aligned_data_in_code/Makefile @@ -1,8 +1,6 @@ -CC="gcc" -CFLAGS= all: ex_original.s - $(CC) ex_original.s $(CFLAGS) -o ex + gcc ex_original.s -o ex @./ex > out.txt clean: rm -f ex out.txt diff --git a/examples/asm_examples/ex_aligned_data_in_code/ex_original.s b/examples/asm_examples/ex_aligned_data_in_code/ex_original.s index 34c35114..e9baa28a 100644 --- a/examples/asm_examples/ex_aligned_data_in_code/ex_original.s +++ b/examples/asm_examples/ex_aligned_data_in_code/ex_original.s @@ -1,6 +1,8 @@ -# This example is to demonostrate that data-in-code is properly aligned. -# Otherwise, it may cause a segmentation fault due to alignment requirement -# violation. +# This example is to demonostrate that data-in-code is properly aligned +# when it is referenced by instructions that require explicitly aligned memory. +# If not properly aligned, it may cause a segmentation fault due to alignment +# requirement violation. +# See Table 15-6 in https://cdrdv2.intel.com/v1/dl/getContent/671200. .section .text @@ -9,8 +11,17 @@ main: call print_message1 - # Load data into XMM register using movdqa: `data` needs to be aligned. - movdqa data(%rip), %xmm0 + # Load data into XMM register using movdqa: `data128` needs to be aligned. + movdqa data128(%rip), %xmm0 + + # Load data into YMM register using movdqa: `data256` needs to be aligned. + vmovapd data256(%rip), %ymm0 + + # Load data into ZMM register using movdqa: `data512` needs to be aligned. + vmovaps data512(%rip), %zmm0 + + # Load data into ZMM register using vmovups: `data512u` does not need to be aligned. + vmovups data512u(%rip), %zmm1 call print_message2 @@ -30,6 +41,30 @@ print_message2: lea message2(%rip), %rdi call printf ret + .zero 3 + +.align 16 +data128: + .byte 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +.align 32 +data256: + .byte 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 + .byte 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +.align 64 +data512: + .byte 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 + .byte 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 + .byte 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 + .byte 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 + + .zero 3 +data512u: + .byte 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 + .byte 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 + .byte 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 + .byte 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 + + .section .data message1: .ascii "Performing SIMD operations...\n" @@ -37,7 +72,3 @@ message1: message2: .ascii "SIMD operations completed.\n" .byte 0 - -.align 16 -data: - .byte 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 diff --git a/src/datalog/arch/arch.dl b/src/datalog/arch/arch.dl index 49cbdff3..9baee8ef 100644 --- a/src/datalog/arch/arch.dl +++ b/src/datalog/arch/arch.dl @@ -188,14 +188,6 @@ cmp_zero_operation(""):- .decl call_operation_op_index(Operation:symbol,operand_index:operand_index) -/** -Operations that require alignment. -*/ -.decl operation_alignment_required(Operation:symbol) - -operation_alignment_required(""):- - false. - // =========================================================================== // Address-targeted instruction properties // =========================================================================== @@ -553,6 +545,14 @@ invalid(), etc. instruction_at(Instruction,Instruction):- instruction(Instruction,_,_,_,_,_,_,_,_,_). +/** +Instruction at EA requires alignment on the referenced memory. +*/ +.decl alignment_required(EA:address,AlignInBits:unsigned) + +alignment_required(0,0):- + false. + /** The size of a register, in bytes. */ diff --git a/src/datalog/arch/intel/arch_x86.dl b/src/datalog/arch/intel/arch_x86.dl index 0009e829..87a50206 100644 --- a/src/datalog/arch/intel/arch_x86.dl +++ b/src/datalog/arch/intel/arch_x86.dl @@ -273,8 +273,12 @@ simple_data_load(EA,Data,Size):- instruction_memory_access_size(EA,MemIndex,Size), Size != 0. -// The following AVX instructions always require alignment -// (See Table 14-23 in https://cdrdv2.intel.com/v1/dl/getContent/671200): +// Operation that requires aligned memory +.decl operation_alignment_required(Operation:symbol) + +// The following AVX instructions require explicitly aligned memory +// (See Table 15-6 in https://cdrdv2.intel.com/v1/dl/getContent/671200): + operation_alignment_required("MOVDQA"). operation_alignment_required("MOVAPS"). operation_alignment_required("MOVAPD"). @@ -291,4 +295,10 @@ operation_alignment_required("VMOVNTPD"). operation_alignment_required("VMOVNTDQ"). operation_alignment_required("VMOVNTDQA"). +alignment_required(EA,AlignInBits):- + instruction_get_operation(EA,Operation), + operation_alignment_required(Operation), + instruction_get_op(EA,MemIndex,Op), + instruction_memory_access_size(EA,MemIndex,AlignInBits). + } diff --git a/src/datalog/main.dl b/src/datalog/main.dl index ed3192e2..c4a3d618 100644 --- a/src/datalog/main.dl +++ b/src/datalog/main.dl @@ -698,23 +698,12 @@ alignment(0,0):- false. // Data in code needs to be aligned sometimes: e.g., SIMD instructions // with alignment requirement. -// For simplicity, the amount of alignment is determined based on the -// given address here. -// NOTE: This might introduce alignments larger than necessary. -// TODO: Typically, such an operation is associated with a fixed size of -// alignment, which can be inferred by the operand size. alignment(DataEA, AlignInBits):- pc_relative_operand(EA,_,DataEA), - instruction_get_operation(EA,Operation), - arch.operation_alignment_required(Operation), data_in_code(Begin,End), DataEA >= Begin, DataEA < End, - ( - DataEA % 32 = 0, AlignInBits = 32; - DataEA % 32 != 0, DataEA % 16 = 0, AlignInBits = 16; - DataEA % 32 != 0, DataEA % 16 != 0, DataEA % 8 = 0, AlignInBits = 8 - ). + arch.alignment_required(EA,AlignInBits). ////////////////////////////////////////////////////////////////////////////////// // Operations to abstract features of instructions diff --git a/tests/linux-elf-x64.yaml b/tests/linux-elf-x64.yaml index 86a40083..bfb7a61e 100644 --- a/tests/linux-elf-x64.yaml +++ b/tests/linux-elf-x64.yaml @@ -512,9 +512,6 @@ tests: <<: *assembly binary: fun.so - - name: ex_aligned_data_in_code - <<: *assembly - # ---------------------------------------------------------------------------- # Assembly examples. (stripped) # ---------------------------------------------------------------------------- @@ -600,10 +597,6 @@ tests: <<: *test-strip-default binary: fun.so - - name: ex_aligned_data_in_code - <<: *assembly - <<: *test-strip-default - # ---------------------------------------------------------------------------- # Relocatable ELF objects (.o). # ---------------------------------------------------------------------------- diff --git a/tests/misc_test.py b/tests/misc_test.py index fbe5560f..c697a53b 100644 --- a/tests/misc_test.py +++ b/tests/misc_test.py @@ -436,6 +436,26 @@ def test_soname(self): self.assertEqual(m.aux_data["elfSoname"].data, binary) + @unittest.skipUnless( + platform.system() == "Linux", "This test is linux only." + ) + def test_aligned_data_in_code(self): + """ + Test that alignment directives are correctly generated for + data_in_code referenced by instructions that require aligned memory. + """ + binary = "ex" + with cd(ex_asm_dir / "ex_aligned_data_in_code"): + self.assertTrue(compile("gcc", "g++", "-O0", [])) + ir = disassemble(Path(binary)).ir() + m = ir.modules[0] + + alignments = m.aux_data["alignment"].data.items() + alignment_list = [alignment for uuid, alignment in alignments] + self.assertEqual(alignment_list.count(16), 2) + self.assertEqual(alignment_list.count(32), 1) + self.assertEqual(alignment_list.count(64), 2) + class RawGtirbTests(unittest.TestCase): @unittest.skipUnless( From f9a736924c0ce9a4ea6df549a9874fa55f4e9cac Mon Sep 17 00:00:00 2001 From: Junghee Lim Date: Wed, 1 May 2024 14:09:01 -0400 Subject: [PATCH 3/6] Minor cleanups --- src/datalog/arch/intel/arch_x86.dl | 3 +-- src/datalog/main.dl | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/datalog/arch/intel/arch_x86.dl b/src/datalog/arch/intel/arch_x86.dl index 87a50206..c4082033 100644 --- a/src/datalog/arch/intel/arch_x86.dl +++ b/src/datalog/arch/intel/arch_x86.dl @@ -298,7 +298,6 @@ operation_alignment_required("VMOVNTDQA"). alignment_required(EA,AlignInBits):- instruction_get_operation(EA,Operation), operation_alignment_required(Operation), - instruction_get_op(EA,MemIndex,Op), - instruction_memory_access_size(EA,MemIndex,AlignInBits). + instruction_memory_access_size(EA,_,AlignInBits). } diff --git a/src/datalog/main.dl b/src/datalog/main.dl index c4a3d618..a6a9605a 100644 --- a/src/datalog/main.dl +++ b/src/datalog/main.dl @@ -696,8 +696,8 @@ Information about alignment in bits for a given address alignment(0,0):- false. -// Data in code needs to be aligned sometimes: e.g., SIMD instructions -// with alignment requirement. +// Data in code needs to be aligned when referenced by instruction that +// requires aligned memory: e.g., some SIMD instructions alignment(DataEA, AlignInBits):- pc_relative_operand(EA,_,DataEA), data_in_code(Begin,End), From 2b5bf1bb1caaf04063f5f47255ccf3e1a960ab89 Mon Sep 17 00:00:00 2001 From: Junghee Lim Date: Thu, 2 May 2024 12:21:44 -0400 Subject: [PATCH 4/6] Add handling of composite_data_access case --- CHANGELOG.md | 2 +- .../ex_aligned_data_in_code/ex_original.s | 14 +++++++++++--- src/datalog/main.dl | 9 ++++++--- 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3810ed82..4a844a07 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,7 +25,7 @@ * Infer jump table boundaries from comparisons of registers correlated to the index register. * Relax constraints for inferring jump table boundaries from comparisons of indirect operands * Fix bug where a relative jump table starting with consecutive zero offsets was truncated at the first non-zero value. -* Add alignment for instructions that require explicitly aligned memory +* Add alignment for x86-64 instructions that require explicitly aligned memory (e.g., some SIMD instructions) # 1.8.0 diff --git a/examples/asm_examples/ex_aligned_data_in_code/ex_original.s b/examples/asm_examples/ex_aligned_data_in_code/ex_original.s index e9baa28a..368f2e94 100644 --- a/examples/asm_examples/ex_aligned_data_in_code/ex_original.s +++ b/examples/asm_examples/ex_aligned_data_in_code/ex_original.s @@ -11,8 +11,13 @@ main: call print_message1 - # Load data into XMM register using movdqa: `data128` needs to be aligned. - movdqa data128(%rip), %xmm0 + # Load data into XMM register using movdqa: `data128.1` needs to be aligned. + movdqa data128.1(%rip), %xmm0 + + # A pair of instructions from an access to `data128.2`, which needs to + # be aligned. + lea data128.2(%rip), %rax + movdqa 0(%rax), %xmm1 # Load data into YMM register using movdqa: `data256` needs to be aligned. vmovapd data256(%rip), %ymm0 @@ -44,7 +49,10 @@ print_message2: .zero 3 .align 16 -data128: +data128.1: + .byte 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +.align 16 +data128.2: .byte 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 .align 32 data256: diff --git a/src/datalog/main.dl b/src/datalog/main.dl index a6a9605a..f2446937 100644 --- a/src/datalog/main.dl +++ b/src/datalog/main.dl @@ -699,11 +699,14 @@ alignment(0,0):- false. // Data in code needs to be aligned when referenced by instruction that // requires aligned memory: e.g., some SIMD instructions alignment(DataEA, AlignInBits):- - pc_relative_operand(EA,_,DataEA), + arch.alignment_required(EA,AlignInBits), + ( + pc_relative_operand(EA,_,DataEA); + composite_data_access(_,EA,DataEA,AlignInBits) + ), data_in_code(Begin,End), DataEA >= Begin, - DataEA < End, - arch.alignment_required(EA,AlignInBits). + DataEA < End. ////////////////////////////////////////////////////////////////////////////////// // Operations to abstract features of instructions From e096caa2b9b94d9ae6828cc72e871e878196cb5e Mon Sep 17 00:00:00 2001 From: Junghee Lim Date: Thu, 2 May 2024 12:25:43 -0400 Subject: [PATCH 5/6] Update the unit test baseline --- tests/misc_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/misc_test.py b/tests/misc_test.py index c697a53b..3053456b 100644 --- a/tests/misc_test.py +++ b/tests/misc_test.py @@ -452,7 +452,7 @@ def test_aligned_data_in_code(self): alignments = m.aux_data["alignment"].data.items() alignment_list = [alignment for uuid, alignment in alignments] - self.assertEqual(alignment_list.count(16), 2) + self.assertEqual(alignment_list.count(16), 3) self.assertEqual(alignment_list.count(32), 1) self.assertEqual(alignment_list.count(64), 2) From ae3186a715642bf13d395cb7e8ed51ba27d3a1e9 Mon Sep 17 00:00:00 2001 From: Junghee Lim Date: Thu, 2 May 2024 16:03:50 -0400 Subject: [PATCH 6/6] Add some comment --- tests/misc_test.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/misc_test.py b/tests/misc_test.py index 3053456b..a04e7562 100644 --- a/tests/misc_test.py +++ b/tests/misc_test.py @@ -452,8 +452,12 @@ def test_aligned_data_in_code(self): alignments = m.aux_data["alignment"].data.items() alignment_list = [alignment for uuid, alignment in alignments] + + # alignment=16: `data128.1`, `data128.2`, and `main` self.assertEqual(alignment_list.count(16), 3) + # alignment=32: `data256` self.assertEqual(alignment_list.count(32), 1) + # alignment=64: `data512` and `_start` self.assertEqual(alignment_list.count(64), 2)