diff --git a/generated-src/ios-aarch64/crypto/fipsmodule/sha1-armv8.S b/generated-src/ios-aarch64/crypto/fipsmodule/sha1-armv8.S index 744c630646c..8f847749e34 100644 --- a/generated-src/ios-aarch64/crypto/fipsmodule/sha1-armv8.S +++ b/generated-src/ios-aarch64/crypto/fipsmodule/sha1-armv8.S @@ -8,23 +8,13 @@ .text - -.private_extern _OPENSSL_armcap_P -.globl _sha1_block_data_order -.private_extern _sha1_block_data_order +.globl _sha1_block_data_order_nohw +.private_extern _sha1_block_data_order_nohw .align 6 -_sha1_block_data_order: +_sha1_block_data_order_nohw: // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. AARCH64_VALID_CALL_TARGET -#if defined(OPENSSL_HWASAN) && __clang_major__ >= 10 - adrp x16,:pg_hi21_nc:_OPENSSL_armcap_P -#else - adrp x16,_OPENSSL_armcap_P@PAGE -#endif - ldr w16,[x16,_OPENSSL_armcap_P@PAGEOFF] - tst w16,#ARMV8_SHA1 - b.ne Lv8_entry stp x29,x30,[sp,#-96]! add x29,sp,#0 @@ -1081,12 +1071,13 @@ Loop: ldr x29,[sp],#96 ret +.globl _sha1_block_data_order_hw +.private_extern _sha1_block_data_order_hw .align 6 -sha1_block_armv8: +_sha1_block_data_order_hw: // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. AARCH64_VALID_CALL_TARGET -Lv8_entry: stp x29,x30,[sp,#-16]! add x29,sp,#0 diff --git a/generated-src/ios-aarch64/crypto/fipsmodule/sha256-armv8.S b/generated-src/ios-aarch64/crypto/fipsmodule/sha256-armv8.S index 05c5dc0834f..0f886deb1e6 100644 --- a/generated-src/ios-aarch64/crypto/fipsmodule/sha256-armv8.S +++ b/generated-src/ios-aarch64/crypto/fipsmodule/sha256-armv8.S @@ -50,24 +50,11 @@ .text - -.private_extern _OPENSSL_armcap_P -.globl _sha256_block_data_order -.private_extern _sha256_block_data_order +.globl _sha256_block_data_order_nohw +.private_extern _sha256_block_data_order_nohw .align 6 -_sha256_block_data_order: - AARCH64_VALID_CALL_TARGET -#ifndef __KERNEL__ -#if defined(OPENSSL_HWASAN) && __clang_major__ >= 10 - adrp x16,:pg_hi21_nc:_OPENSSL_armcap_P -#else - adrp x16,_OPENSSL_armcap_P@PAGE -#endif - ldr w16,[x16,_OPENSSL_armcap_P@PAGEOFF] - tst w16,#ARMV8_SHA256 - b.ne Lv8_entry -#endif +_sha256_block_data_order_nohw: AARCH64_SIGN_LINK_REGISTER stp x29,x30,[sp,#-128]! add x29,sp,#0 @@ -1060,10 +1047,11 @@ LK256: .align 2 .text #ifndef __KERNEL__ +.globl _sha256_block_data_order_hw +.private_extern _sha256_block_data_order_hw .align 6 -sha256_block_armv8: -Lv8_entry: +_sha256_block_data_order_hw: #ifdef BORINGSSL_DISPATCH_TEST adrp x9,_BORINGSSL_function_hit@PAGE @@ -1072,6 +1060,7 @@ Lv8_entry: strb w10, [x9,#6] // kFlag_sha256_hw #endif // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. + AARCH64_VALID_CALL_TARGET stp x29,x30,[sp,#-16]! add x29,sp,#0 diff --git a/generated-src/ios-aarch64/crypto/fipsmodule/sha512-armv8.S b/generated-src/ios-aarch64/crypto/fipsmodule/sha512-armv8.S index 65dadcaf468..16e09c8c18f 100644 --- a/generated-src/ios-aarch64/crypto/fipsmodule/sha512-armv8.S +++ b/generated-src/ios-aarch64/crypto/fipsmodule/sha512-armv8.S @@ -50,24 +50,11 @@ .text - -.private_extern _OPENSSL_armcap_P -.globl _sha512_block_data_order -.private_extern _sha512_block_data_order +.globl _sha512_block_data_order_nohw +.private_extern _sha512_block_data_order_nohw .align 6 -_sha512_block_data_order: - AARCH64_VALID_CALL_TARGET -#ifndef __KERNEL__ -#if defined(OPENSSL_HWASAN) && __clang_major__ >= 10 - adrp x16,:pg_hi21_nc:_OPENSSL_armcap_P -#else - adrp x16,_OPENSSL_armcap_P@PAGE -#endif - ldr w16,[x16,_OPENSSL_armcap_P@PAGEOFF] - tst w16,#ARMV8_SHA512 - b.ne Lv8_entry -#endif +_sha512_block_data_order_nohw: AARCH64_SIGN_LINK_REGISTER stp x29,x30,[sp,#-128]! add x29,sp,#0 @@ -1084,10 +1071,11 @@ LK512: .align 2 .text #ifndef __KERNEL__ +.globl _sha512_block_data_order_hw +.private_extern _sha512_block_data_order_hw .align 6 -sha512_block_armv8: -Lv8_entry: +_sha512_block_data_order_hw: #ifdef BORINGSSL_DISPATCH_TEST adrp x9,_BORINGSSL_function_hit@PAGE @@ -1095,6 +1083,8 @@ Lv8_entry: mov w10, #1 strb w10, [x9,#8] // kFlag_sha512_hw #endif + // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. + AARCH64_VALID_CALL_TARGET stp x29,x30,[sp,#-16]! add x29,sp,#0 diff --git a/generated-src/linux-aarch64/crypto/fipsmodule/sha1-armv8.S b/generated-src/linux-aarch64/crypto/fipsmodule/sha1-armv8.S index a18a8a55788..f2df2dd30c9 100644 --- a/generated-src/linux-aarch64/crypto/fipsmodule/sha1-armv8.S +++ b/generated-src/linux-aarch64/crypto/fipsmodule/sha1-armv8.S @@ -8,23 +8,13 @@ .text - -.hidden OPENSSL_armcap_P -.globl sha1_block_data_order -.hidden sha1_block_data_order -.type sha1_block_data_order,%function +.globl sha1_block_data_order_nohw +.hidden sha1_block_data_order_nohw +.type sha1_block_data_order_nohw,%function .align 6 -sha1_block_data_order: +sha1_block_data_order_nohw: // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. AARCH64_VALID_CALL_TARGET -#if defined(OPENSSL_HWASAN) && __clang_major__ >= 10 - adrp x16,:pg_hi21_nc:OPENSSL_armcap_P -#else - adrp x16,OPENSSL_armcap_P -#endif - ldr w16,[x16,:lo12:OPENSSL_armcap_P] - tst w16,#ARMV8_SHA1 - b.ne .Lv8_entry stp x29,x30,[sp,#-96]! add x29,sp,#0 @@ -1080,13 +1070,14 @@ sha1_block_data_order: ldp x27,x28,[sp,#80] ldr x29,[sp],#96 ret -.size sha1_block_data_order,.-sha1_block_data_order -.type sha1_block_armv8,%function +.size sha1_block_data_order_nohw,.-sha1_block_data_order_nohw +.globl sha1_block_data_order_hw +.hidden sha1_block_data_order_hw +.type sha1_block_data_order_hw,%function .align 6 -sha1_block_armv8: +sha1_block_data_order_hw: // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. AARCH64_VALID_CALL_TARGET -.Lv8_entry: stp x29,x30,[sp,#-16]! add x29,sp,#0 @@ -1213,7 +1204,7 @@ sha1_block_armv8: ldr x29,[sp],#16 ret -.size sha1_block_armv8,.-sha1_block_armv8 +.size sha1_block_data_order_hw,.-sha1_block_data_order_hw .section .rodata .align 6 .Lconst: diff --git a/generated-src/linux-aarch64/crypto/fipsmodule/sha256-armv8.S b/generated-src/linux-aarch64/crypto/fipsmodule/sha256-armv8.S index 564f166f318..b66f6ae4080 100644 --- a/generated-src/linux-aarch64/crypto/fipsmodule/sha256-armv8.S +++ b/generated-src/linux-aarch64/crypto/fipsmodule/sha256-armv8.S @@ -50,24 +50,11 @@ .text - -.hidden OPENSSL_armcap_P -.globl sha256_block_data_order -.hidden sha256_block_data_order -.type sha256_block_data_order,%function +.globl sha256_block_data_order_nohw +.hidden sha256_block_data_order_nohw +.type sha256_block_data_order_nohw,%function .align 6 -sha256_block_data_order: - AARCH64_VALID_CALL_TARGET -#ifndef __KERNEL__ -#if defined(OPENSSL_HWASAN) && __clang_major__ >= 10 - adrp x16,:pg_hi21_nc:OPENSSL_armcap_P -#else - adrp x16,OPENSSL_armcap_P -#endif - ldr w16,[x16,:lo12:OPENSSL_armcap_P] - tst w16,#ARMV8_SHA256 - b.ne .Lv8_entry -#endif +sha256_block_data_order_nohw: AARCH64_SIGN_LINK_REGISTER stp x29,x30,[sp,#-128]! add x29,sp,#0 @@ -1031,7 +1018,7 @@ sha256_block_data_order: ldp x29,x30,[sp],#128 AARCH64_VALIDATE_LINK_REGISTER ret -.size sha256_block_data_order,.-sha256_block_data_order +.size sha256_block_data_order_nohw,.-sha256_block_data_order_nohw .section .rodata .align 6 @@ -1060,10 +1047,11 @@ sha256_block_data_order: .align 2 .text #ifndef __KERNEL__ -.type sha256_block_armv8,%function +.globl sha256_block_data_order_hw +.hidden sha256_block_data_order_hw +.type sha256_block_data_order_hw,%function .align 6 -sha256_block_armv8: -.Lv8_entry: +sha256_block_data_order_hw: #ifdef BORINGSSL_DISPATCH_TEST adrp x9,BORINGSSL_function_hit @@ -1072,6 +1060,7 @@ sha256_block_armv8: strb w10, [x9,#6] // kFlag_sha256_hw #endif // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. + AARCH64_VALID_CALL_TARGET stp x29,x30,[sp,#-16]! add x29,sp,#0 @@ -1206,6 +1195,6 @@ sha256_block_armv8: ldr x29,[sp],#16 ret -.size sha256_block_armv8,.-sha256_block_armv8 +.size sha256_block_data_order_hw,.-sha256_block_data_order_hw #endif #endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__ELF__) diff --git a/generated-src/linux-aarch64/crypto/fipsmodule/sha512-armv8.S b/generated-src/linux-aarch64/crypto/fipsmodule/sha512-armv8.S index 4e5563cd118..6aa688f8ae7 100644 --- a/generated-src/linux-aarch64/crypto/fipsmodule/sha512-armv8.S +++ b/generated-src/linux-aarch64/crypto/fipsmodule/sha512-armv8.S @@ -50,24 +50,11 @@ .text - -.hidden OPENSSL_armcap_P -.globl sha512_block_data_order -.hidden sha512_block_data_order -.type sha512_block_data_order,%function +.globl sha512_block_data_order_nohw +.hidden sha512_block_data_order_nohw +.type sha512_block_data_order_nohw,%function .align 6 -sha512_block_data_order: - AARCH64_VALID_CALL_TARGET -#ifndef __KERNEL__ -#if defined(OPENSSL_HWASAN) && __clang_major__ >= 10 - adrp x16,:pg_hi21_nc:OPENSSL_armcap_P -#else - adrp x16,OPENSSL_armcap_P -#endif - ldr w16,[x16,:lo12:OPENSSL_armcap_P] - tst w16,#ARMV8_SHA512 - b.ne .Lv8_entry -#endif +sha512_block_data_order_nohw: AARCH64_SIGN_LINK_REGISTER stp x29,x30,[sp,#-128]! add x29,sp,#0 @@ -1031,7 +1018,7 @@ sha512_block_data_order: ldp x29,x30,[sp],#128 AARCH64_VALIDATE_LINK_REGISTER ret -.size sha512_block_data_order,.-sha512_block_data_order +.size sha512_block_data_order_nohw,.-sha512_block_data_order_nohw .section .rodata .align 6 @@ -1084,10 +1071,11 @@ sha512_block_data_order: .align 2 .text #ifndef __KERNEL__ -.type sha512_block_armv8,%function +.globl sha512_block_data_order_hw +.hidden sha512_block_data_order_hw +.type sha512_block_data_order_hw,%function .align 6 -sha512_block_armv8: -.Lv8_entry: +sha512_block_data_order_hw: #ifdef BORINGSSL_DISPATCH_TEST adrp x9,BORINGSSL_function_hit @@ -1095,6 +1083,8 @@ sha512_block_armv8: mov w10, #1 strb w10, [x9,#8] // kFlag_sha512_hw #endif + // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. + AARCH64_VALID_CALL_TARGET stp x29,x30,[sp,#-16]! add x29,sp,#0 @@ -1608,6 +1598,6 @@ sha512_block_armv8: ldr x29,[sp],#16 ret -.size sha512_block_armv8,.-sha512_block_armv8 +.size sha512_block_data_order_hw,.-sha512_block_data_order_hw #endif #endif // !OPENSSL_NO_ASM && defined(OPENSSL_AARCH64) && defined(__ELF__) diff --git a/generated-src/linux-x86_64/crypto/chacha/chacha-x86_64.S b/generated-src/linux-x86_64/crypto/chacha/chacha-x86_64.S index eac7a00fbc9..caea7a015b5 100644 --- a/generated-src/linux-x86_64/crypto/chacha/chacha-x86_64.S +++ b/generated-src/linux-x86_64/crypto/chacha/chacha-x86_64.S @@ -6,9 +6,6 @@ #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__ELF__) .text -.extern OPENSSL_ia32cap_P -.hidden OPENSSL_ia32cap_P - .section .rodata .align 64 .Lzero: @@ -40,19 +37,13 @@ .long 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16 .byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .text -.globl ChaCha20_ctr32 -.hidden ChaCha20_ctr32 -.type ChaCha20_ctr32,@function +.globl ChaCha20_ctr32_nohw +.hidden ChaCha20_ctr32_nohw +.type ChaCha20_ctr32_nohw,@function .align 64 -ChaCha20_ctr32: +ChaCha20_ctr32_nohw: .cfi_startproc _CET_ENDBR - cmpq $0,%rdx - je .Lno_data - movq OPENSSL_ia32cap_P+4(%rip),%r10 - testl $512,%r10d - jnz .LChaCha20_ssse3 - pushq %rbx .cfi_adjust_cfa_offset 8 .cfi_offset rbx,-16 @@ -329,18 +320,16 @@ _CET_ENDBR .Lno_data: .byte 0xf3,0xc3 .cfi_endproc -.size ChaCha20_ctr32,.-ChaCha20_ctr32 -.type ChaCha20_ssse3,@function +.size ChaCha20_ctr32_nohw,.-ChaCha20_ctr32_nohw +.globl ChaCha20_ctr32_ssse3 +.hidden ChaCha20_ctr32_ssse3 +.type ChaCha20_ctr32_ssse3,@function .align 32 -ChaCha20_ssse3: -.LChaCha20_ssse3: +ChaCha20_ctr32_ssse3: .cfi_startproc +_CET_ENDBR movq %rsp,%r9 .cfi_def_cfa_register r9 - cmpq $128,%rdx - ja .LChaCha20_4x - -.Ldo_sse3_after_all: subq $64+8,%rsp movdqa .Lsigma(%rip),%xmm0 movdqu (%rcx),%xmm1 @@ -466,26 +455,17 @@ ChaCha20_ssse3: .Lssse3_epilogue: .byte 0xf3,0xc3 .cfi_endproc -.size ChaCha20_ssse3,.-ChaCha20_ssse3 -.type ChaCha20_4x,@function +.size ChaCha20_ctr32_ssse3,.-ChaCha20_ctr32_ssse3 +.globl ChaCha20_ctr32_ssse3_4x +.hidden ChaCha20_ctr32_ssse3_4x +.type ChaCha20_ctr32_ssse3_4x,@function .align 32 -ChaCha20_4x: -.LChaCha20_4x: +ChaCha20_ctr32_ssse3_4x: .cfi_startproc +_CET_ENDBR movq %rsp,%r9 .cfi_def_cfa_register r9 movq %r10,%r11 - shrq $32,%r10 - testq $32,%r10 - jnz .LChaCha20_8x - cmpq $192,%rdx - ja .Lproceed4x - - andq $71303168,%r11 - cmpq $4194304,%r11 - je .Ldo_sse3_after_all - -.Lproceed4x: subq $0x140+8,%rsp movdqa .Lsigma(%rip),%xmm11 movdqu (%rcx),%xmm15 @@ -1018,12 +998,14 @@ ChaCha20_4x: .L4x_epilogue: .byte 0xf3,0xc3 .cfi_endproc -.size ChaCha20_4x,.-ChaCha20_4x -.type ChaCha20_8x,@function +.size ChaCha20_ctr32_ssse3_4x,.-ChaCha20_ctr32_ssse3_4x +.globl ChaCha20_ctr32_avx2 +.hidden ChaCha20_ctr32_avx2 +.type ChaCha20_ctr32_avx2,@function .align 32 -ChaCha20_8x: -.LChaCha20_8x: +ChaCha20_ctr32_avx2: .cfi_startproc +_CET_ENDBR movq %rsp,%r9 .cfi_def_cfa_register r9 subq $0x280+8,%rsp @@ -1624,5 +1606,5 @@ ChaCha20_8x: .L8x_epilogue: .byte 0xf3,0xc3 .cfi_endproc -.size ChaCha20_8x,.-ChaCha20_8x +.size ChaCha20_ctr32_avx2,.-ChaCha20_ctr32_avx2 #endif diff --git a/generated-src/linux-x86_64/crypto/fipsmodule/sha1-x86_64.S b/generated-src/linux-x86_64/crypto/fipsmodule/sha1-x86_64.S index 1b64f02014a..9596e290240 100644 --- a/generated-src/linux-x86_64/crypto/fipsmodule/sha1-x86_64.S +++ b/generated-src/linux-x86_64/crypto/fipsmodule/sha1-x86_64.S @@ -5,36 +5,14 @@ #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__ELF__) .text -.extern OPENSSL_ia32cap_P -.hidden OPENSSL_ia32cap_P -.globl sha1_block_data_order -.hidden sha1_block_data_order -.type sha1_block_data_order,@function +.globl sha1_block_data_order_nohw +.hidden sha1_block_data_order_nohw +.type sha1_block_data_order_nohw,@function .align 16 -sha1_block_data_order: +sha1_block_data_order_nohw: .cfi_startproc _CET_ENDBR - leaq OPENSSL_ia32cap_P(%rip),%r10 - movl 0(%r10),%r9d - movl 4(%r10),%r8d - movl 8(%r10),%r10d - testl $512,%r8d - jz .Lialu - testl $536870912,%r10d - jnz _shaext_shortcut - andl $296,%r10d - cmpl $296,%r10d - je _avx2_shortcut - andl $268435456,%r8d - andl $1073741824,%r9d - orl %r9d,%r8d - cmpl $1342177280,%r8d - je _avx_shortcut - jmp _ssse3_shortcut - -.align 16 -.Lialu: movq %rsp,%rax .cfi_def_cfa_register %rax pushq %rbx @@ -1264,12 +1242,14 @@ _CET_ENDBR .Lepilogue: .byte 0xf3,0xc3 .cfi_endproc -.size sha1_block_data_order,.-sha1_block_data_order -.type sha1_block_data_order_shaext,@function +.size sha1_block_data_order_nohw,.-sha1_block_data_order_nohw +.globl sha1_block_data_order_hw +.hidden sha1_block_data_order_hw +.type sha1_block_data_order_hw,@function .align 32 -sha1_block_data_order_shaext: -_shaext_shortcut: +sha1_block_data_order_hw: .cfi_startproc +_CET_ENDBR movdqu (%rdi),%xmm0 movd 16(%rdi),%xmm1 movdqa K_XX_XX+160(%rip),%xmm3 @@ -1434,12 +1414,14 @@ _shaext_shortcut: movd %xmm1,16(%rdi) .byte 0xf3,0xc3 .cfi_endproc -.size sha1_block_data_order_shaext,.-sha1_block_data_order_shaext +.size sha1_block_data_order_hw,.-sha1_block_data_order_hw +.globl sha1_block_data_order_ssse3 +.hidden sha1_block_data_order_ssse3 .type sha1_block_data_order_ssse3,@function .align 16 sha1_block_data_order_ssse3: -_ssse3_shortcut: .cfi_startproc +_CET_ENDBR movq %rsp,%r11 .cfi_def_cfa_register %r11 pushq %rbx @@ -2623,11 +2605,13 @@ _ssse3_shortcut: .byte 0xf3,0xc3 .cfi_endproc .size sha1_block_data_order_ssse3,.-sha1_block_data_order_ssse3 +.globl sha1_block_data_order_avx +.hidden sha1_block_data_order_avx .type sha1_block_data_order_avx,@function .align 16 sha1_block_data_order_avx: -_avx_shortcut: .cfi_startproc +_CET_ENDBR movq %rsp,%r11 .cfi_def_cfa_register %r11 pushq %rbx @@ -3751,11 +3735,13 @@ _avx_shortcut: .byte 0xf3,0xc3 .cfi_endproc .size sha1_block_data_order_avx,.-sha1_block_data_order_avx +.globl sha1_block_data_order_avx2 +.hidden sha1_block_data_order_avx2 .type sha1_block_data_order_avx2,@function .align 16 sha1_block_data_order_avx2: -_avx2_shortcut: .cfi_startproc +_CET_ENDBR movq %rsp,%r11 .cfi_def_cfa_register %r11 pushq %rbx diff --git a/generated-src/linux-x86_64/crypto/fipsmodule/sha256-x86_64.S b/generated-src/linux-x86_64/crypto/fipsmodule/sha256-x86_64.S index 554d3e4d19e..468c4e13467 100644 --- a/generated-src/linux-x86_64/crypto/fipsmodule/sha256-x86_64.S +++ b/generated-src/linux-x86_64/crypto/fipsmodule/sha256-x86_64.S @@ -6,28 +6,13 @@ #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__ELF__) .text -.extern OPENSSL_ia32cap_P -.hidden OPENSSL_ia32cap_P -.globl sha256_block_data_order -.hidden sha256_block_data_order -.type sha256_block_data_order,@function +.globl sha256_block_data_order_nohw +.hidden sha256_block_data_order_nohw +.type sha256_block_data_order_nohw,@function .align 16 -sha256_block_data_order: +sha256_block_data_order_nohw: .cfi_startproc _CET_ENDBR - leaq OPENSSL_ia32cap_P(%rip),%r11 - movl 0(%r11),%r9d - movl 4(%r11),%r10d - movl 8(%r11),%r11d - testl $536870912,%r11d - jnz .Lshaext_shortcut - andl $1073741824,%r9d - andl $268435968,%r10d - orl %r9d,%r10d - cmpl $1342177792,%r10d - je .Lavx_shortcut - testl $512,%r10d - jnz .Lssse3_shortcut movq %rsp,%rax .cfi_def_cfa_register %rax pushq %rbx @@ -1733,7 +1718,7 @@ _CET_ENDBR .Lepilogue: .byte 0xf3,0xc3 .cfi_endproc -.size sha256_block_data_order,.-sha256_block_data_order +.size sha256_block_data_order_nohw,.-sha256_block_data_order_nohw .section .rodata .align 64 .type K256,@object @@ -1779,16 +1764,18 @@ K256: .long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908 .byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .text -.type sha256_block_data_order_shaext,@function +.globl sha256_block_data_order_hw +.hidden sha256_block_data_order_hw +.type sha256_block_data_order_hw,@function .align 64 -sha256_block_data_order_shaext: -.Lshaext_shortcut: +sha256_block_data_order_hw: .cfi_startproc #ifdef BORINGSSL_DISPATCH_TEST .extern BORINGSSL_function_hit .hidden BORINGSSL_function_hit movb $1,BORINGSSL_function_hit+6(%rip) #endif +_CET_ENDBR leaq K256+128(%rip),%rcx movdqu (%rdi),%xmm1 movdqu 16(%rdi),%xmm2 @@ -1992,12 +1979,14 @@ sha256_block_data_order_shaext: movdqu %xmm2,16(%rdi) .byte 0xf3,0xc3 .cfi_endproc -.size sha256_block_data_order_shaext,.-sha256_block_data_order_shaext +.size sha256_block_data_order_hw,.-sha256_block_data_order_hw +.globl sha256_block_data_order_ssse3 +.hidden sha256_block_data_order_ssse3 .type sha256_block_data_order_ssse3,@function .align 64 sha256_block_data_order_ssse3: .cfi_startproc -.Lssse3_shortcut: +_CET_ENDBR movq %rsp,%rax .cfi_def_cfa_register %rax pushq %rbx @@ -3106,11 +3095,13 @@ sha256_block_data_order_ssse3: .byte 0xf3,0xc3 .cfi_endproc .size sha256_block_data_order_ssse3,.-sha256_block_data_order_ssse3 +.globl sha256_block_data_order_avx +.hidden sha256_block_data_order_avx .type sha256_block_data_order_avx,@function .align 64 sha256_block_data_order_avx: .cfi_startproc -.Lavx_shortcut: +_CET_ENDBR movq %rsp,%rax .cfi_def_cfa_register %rax pushq %rbx diff --git a/generated-src/linux-x86_64/crypto/fipsmodule/sha512-x86_64.S b/generated-src/linux-x86_64/crypto/fipsmodule/sha512-x86_64.S index 66a60d73d0e..117d55eed48 100644 --- a/generated-src/linux-x86_64/crypto/fipsmodule/sha512-x86_64.S +++ b/generated-src/linux-x86_64/crypto/fipsmodule/sha512-x86_64.S @@ -6,24 +6,13 @@ #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__ELF__) .text -.extern OPENSSL_ia32cap_P -.hidden OPENSSL_ia32cap_P -.globl sha512_block_data_order -.hidden sha512_block_data_order -.type sha512_block_data_order,@function +.globl sha512_block_data_order_nohw +.hidden sha512_block_data_order_nohw +.type sha512_block_data_order_nohw,@function .align 16 -sha512_block_data_order: +sha512_block_data_order_nohw: .cfi_startproc _CET_ENDBR - leaq OPENSSL_ia32cap_P(%rip),%r11 - movl 0(%r11),%r9d - movl 4(%r11),%r10d - movl 8(%r11),%r11d - andl $1073741824,%r9d - andl $268435968,%r10d - orl %r9d,%r10d - cmpl $1342177792,%r10d - je .Lavx_shortcut movq %rsp,%rax .cfi_def_cfa_register %rax pushq %rbx @@ -1729,7 +1718,7 @@ _CET_ENDBR .Lepilogue: .byte 0xf3,0xc3 .cfi_endproc -.size sha512_block_data_order,.-sha512_block_data_order +.size sha512_block_data_order_nohw,.-sha512_block_data_order_nohw .section .rodata .align 64 .type K512,@object @@ -1819,11 +1808,13 @@ K512: .quad 0x0001020304050607,0x08090a0b0c0d0e0f .byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .text +.globl sha512_block_data_order_avx +.hidden sha512_block_data_order_avx .type sha512_block_data_order_avx,@function .align 64 sha512_block_data_order_avx: .cfi_startproc -.Lavx_shortcut: +_CET_ENDBR movq %rsp,%rax .cfi_def_cfa_register %rax pushq %rbx diff --git a/generated-src/mac-x86_64/crypto/chacha/chacha-x86_64.S b/generated-src/mac-x86_64/crypto/chacha/chacha-x86_64.S index b80364b7a42..c2c9b4bf386 100644 --- a/generated-src/mac-x86_64/crypto/chacha/chacha-x86_64.S +++ b/generated-src/mac-x86_64/crypto/chacha/chacha-x86_64.S @@ -6,8 +6,6 @@ #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__) .text - - .section __DATA,__const .p2align 6 L$zero: @@ -39,19 +37,13 @@ L$sixteen: .long 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16 .byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .text -.globl _ChaCha20_ctr32 -.private_extern _ChaCha20_ctr32 +.globl _ChaCha20_ctr32_nohw +.private_extern _ChaCha20_ctr32_nohw .p2align 6 -_ChaCha20_ctr32: +_ChaCha20_ctr32_nohw: _CET_ENDBR - cmpq $0,%rdx - je L$no_data - movq _OPENSSL_ia32cap_P+4(%rip),%r10 - testl $512,%r10d - jnz L$ChaCha20_ssse3 - pushq %rbx pushq %rbp @@ -323,17 +315,15 @@ L$no_data: .byte 0xf3,0xc3 +.globl _ChaCha20_ctr32_ssse3 +.private_extern _ChaCha20_ctr32_ssse3 .p2align 5 -ChaCha20_ssse3: -L$ChaCha20_ssse3: +_ChaCha20_ctr32_ssse3: +_CET_ENDBR movq %rsp,%r9 - cmpq $128,%rdx - ja L$ChaCha20_4x - -L$do_sse3_after_all: subq $64+8,%rsp movdqa L$sigma(%rip),%xmm0 movdqu (%rcx),%xmm1 @@ -460,25 +450,16 @@ L$ssse3_epilogue: .byte 0xf3,0xc3 +.globl _ChaCha20_ctr32_ssse3_4x +.private_extern _ChaCha20_ctr32_ssse3_4x .p2align 5 -ChaCha20_4x: -L$ChaCha20_4x: +_ChaCha20_ctr32_ssse3_4x: +_CET_ENDBR movq %rsp,%r9 movq %r10,%r11 - shrq $32,%r10 - testq $32,%r10 - jnz L$ChaCha20_8x - cmpq $192,%rdx - ja L$proceed4x - - andq $71303168,%r11 - cmpq $4194304,%r11 - je L$do_sse3_after_all - -L$proceed4x: subq $0x140+8,%rsp movdqa L$sigma(%rip),%xmm11 movdqu (%rcx),%xmm15 @@ -1012,11 +993,13 @@ L$4x_epilogue: .byte 0xf3,0xc3 +.globl _ChaCha20_ctr32_avx2 +.private_extern _ChaCha20_ctr32_avx2 .p2align 5 -ChaCha20_8x: -L$ChaCha20_8x: +_ChaCha20_ctr32_avx2: +_CET_ENDBR movq %rsp,%r9 subq $0x280+8,%rsp diff --git a/generated-src/mac-x86_64/crypto/fipsmodule/sha1-x86_64.S b/generated-src/mac-x86_64/crypto/fipsmodule/sha1-x86_64.S index 51bdebbf6fa..c2da422bbe2 100644 --- a/generated-src/mac-x86_64/crypto/fipsmodule/sha1-x86_64.S +++ b/generated-src/mac-x86_64/crypto/fipsmodule/sha1-x86_64.S @@ -6,34 +6,13 @@ #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__) .text - -.globl _sha1_block_data_order -.private_extern _sha1_block_data_order +.globl _sha1_block_data_order_nohw +.private_extern _sha1_block_data_order_nohw .p2align 4 -_sha1_block_data_order: +_sha1_block_data_order_nohw: _CET_ENDBR - leaq _OPENSSL_ia32cap_P(%rip),%r10 - movl 0(%r10),%r9d - movl 4(%r10),%r8d - movl 8(%r10),%r10d - testl $512,%r8d - jz L$ialu - testl $536870912,%r10d - jnz _shaext_shortcut - andl $296,%r10d - cmpl $296,%r10d - je _avx2_shortcut - andl $268435456,%r8d - andl $1073741824,%r9d - orl %r9d,%r8d - cmpl $1342177280,%r8d - je _avx_shortcut - jmp _ssse3_shortcut - -.p2align 4 -L$ialu: movq %rsp,%rax pushq %rbx @@ -1264,11 +1243,13 @@ L$epilogue: .byte 0xf3,0xc3 +.globl _sha1_block_data_order_hw +.private_extern _sha1_block_data_order_hw .p2align 5 -sha1_block_data_order_shaext: -_shaext_shortcut: +_sha1_block_data_order_hw: +_CET_ENDBR movdqu (%rdi),%xmm0 movd 16(%rdi),%xmm1 movdqa K_XX_XX+160(%rip),%xmm3 @@ -1434,11 +1415,13 @@ L$oop_shaext: .byte 0xf3,0xc3 +.globl _sha1_block_data_order_ssse3 +.private_extern _sha1_block_data_order_ssse3 .p2align 4 -sha1_block_data_order_ssse3: -_ssse3_shortcut: +_sha1_block_data_order_ssse3: +_CET_ENDBR movq %rsp,%r11 pushq %rbx @@ -2622,11 +2605,13 @@ L$epilogue_ssse3: .byte 0xf3,0xc3 +.globl _sha1_block_data_order_avx +.private_extern _sha1_block_data_order_avx .p2align 4 -sha1_block_data_order_avx: -_avx_shortcut: +_sha1_block_data_order_avx: +_CET_ENDBR movq %rsp,%r11 pushq %rbx @@ -3750,11 +3735,13 @@ L$epilogue_avx: .byte 0xf3,0xc3 +.globl _sha1_block_data_order_avx2 +.private_extern _sha1_block_data_order_avx2 .p2align 4 -sha1_block_data_order_avx2: -_avx2_shortcut: +_sha1_block_data_order_avx2: +_CET_ENDBR movq %rsp,%r11 pushq %rbx diff --git a/generated-src/mac-x86_64/crypto/fipsmodule/sha256-x86_64.S b/generated-src/mac-x86_64/crypto/fipsmodule/sha256-x86_64.S index a7307cf7775..31e0cb39953 100644 --- a/generated-src/mac-x86_64/crypto/fipsmodule/sha256-x86_64.S +++ b/generated-src/mac-x86_64/crypto/fipsmodule/sha256-x86_64.S @@ -6,27 +6,13 @@ #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__) .text - -.globl _sha256_block_data_order -.private_extern _sha256_block_data_order +.globl _sha256_block_data_order_nohw +.private_extern _sha256_block_data_order_nohw .p2align 4 -_sha256_block_data_order: +_sha256_block_data_order_nohw: _CET_ENDBR - leaq _OPENSSL_ia32cap_P(%rip),%r11 - movl 0(%r11),%r9d - movl 4(%r11),%r10d - movl 8(%r11),%r11d - testl $536870912,%r11d - jnz L$shaext_shortcut - andl $1073741824,%r9d - andl $268435968,%r10d - orl %r9d,%r10d - cmpl $1342177792,%r10d - je L$avx_shortcut - testl $512,%r10d - jnz L$ssse3_shortcut movq %rsp,%rax pushq %rbx @@ -1778,15 +1764,17 @@ K256: .long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908 .byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .text +.globl _sha256_block_data_order_hw +.private_extern _sha256_block_data_order_hw .p2align 6 -sha256_block_data_order_shaext: -L$shaext_shortcut: +_sha256_block_data_order_hw: #ifdef BORINGSSL_DISPATCH_TEST movb $1,_BORINGSSL_function_hit+6(%rip) #endif +_CET_ENDBR leaq K256+128(%rip),%rcx movdqu (%rdi),%xmm1 movdqu 16(%rdi),%xmm2 @@ -1991,11 +1979,13 @@ L$oop_shaext: .byte 0xf3,0xc3 +.globl _sha256_block_data_order_ssse3 +.private_extern _sha256_block_data_order_ssse3 .p2align 6 -sha256_block_data_order_ssse3: +_sha256_block_data_order_ssse3: -L$ssse3_shortcut: +_CET_ENDBR movq %rsp,%rax pushq %rbx @@ -3104,11 +3094,13 @@ L$epilogue_ssse3: .byte 0xf3,0xc3 +.globl _sha256_block_data_order_avx +.private_extern _sha256_block_data_order_avx .p2align 6 -sha256_block_data_order_avx: +_sha256_block_data_order_avx: -L$avx_shortcut: +_CET_ENDBR movq %rsp,%rax pushq %rbx diff --git a/generated-src/mac-x86_64/crypto/fipsmodule/sha512-x86_64.S b/generated-src/mac-x86_64/crypto/fipsmodule/sha512-x86_64.S index 9adc2029143..5b873695c85 100644 --- a/generated-src/mac-x86_64/crypto/fipsmodule/sha512-x86_64.S +++ b/generated-src/mac-x86_64/crypto/fipsmodule/sha512-x86_64.S @@ -6,23 +6,13 @@ #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && defined(__APPLE__) .text - -.globl _sha512_block_data_order -.private_extern _sha512_block_data_order +.globl _sha512_block_data_order_nohw +.private_extern _sha512_block_data_order_nohw .p2align 4 -_sha512_block_data_order: +_sha512_block_data_order_nohw: _CET_ENDBR - leaq _OPENSSL_ia32cap_P(%rip),%r11 - movl 0(%r11),%r9d - movl 4(%r11),%r10d - movl 8(%r11),%r11d - andl $1073741824,%r9d - andl $268435968,%r10d - orl %r9d,%r10d - cmpl $1342177792,%r10d - je L$avx_shortcut movq %rsp,%rax pushq %rbx @@ -1818,11 +1808,13 @@ K512: .quad 0x0001020304050607,0x08090a0b0c0d0e0f .byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .text +.globl _sha512_block_data_order_avx +.private_extern _sha512_block_data_order_avx .p2align 6 -sha512_block_data_order_avx: +_sha512_block_data_order_avx: -L$avx_shortcut: +_CET_ENDBR movq %rsp,%rax pushq %rbx diff --git a/generated-src/win-aarch64/crypto/fipsmodule/sha1-armv8.S b/generated-src/win-aarch64/crypto/fipsmodule/sha1-armv8.S index f5082a0d1b7..f8c8b861218 100644 --- a/generated-src/win-aarch64/crypto/fipsmodule/sha1-armv8.S +++ b/generated-src/win-aarch64/crypto/fipsmodule/sha1-armv8.S @@ -8,25 +8,15 @@ .text +.globl sha1_block_data_order_nohw - -.globl sha1_block_data_order - -.def sha1_block_data_order +.def sha1_block_data_order_nohw .type 32 .endef .align 6 -sha1_block_data_order: +sha1_block_data_order_nohw: // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. AARCH64_VALID_CALL_TARGET -#if defined(OPENSSL_HWASAN) && __clang_major__ >= 10 - adrp x16,:pg_hi21_nc:OPENSSL_armcap_P -#else - adrp x16,OPENSSL_armcap_P -#endif - ldr w16,[x16,:lo12:OPENSSL_armcap_P] - tst w16,#ARMV8_SHA1 - b.ne Lv8_entry stp x29,x30,[sp,#-96]! add x29,sp,#0 @@ -1083,14 +1073,15 @@ Loop: ldr x29,[sp],#96 ret -.def sha1_block_armv8 +.globl sha1_block_data_order_hw + +.def sha1_block_data_order_hw .type 32 .endef .align 6 -sha1_block_armv8: +sha1_block_data_order_hw: // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. AARCH64_VALID_CALL_TARGET -Lv8_entry: stp x29,x30,[sp,#-16]! add x29,sp,#0 diff --git a/generated-src/win-aarch64/crypto/fipsmodule/sha256-armv8.S b/generated-src/win-aarch64/crypto/fipsmodule/sha256-armv8.S index 493fc36bfa0..b77f9e8346e 100644 --- a/generated-src/win-aarch64/crypto/fipsmodule/sha256-armv8.S +++ b/generated-src/win-aarch64/crypto/fipsmodule/sha256-armv8.S @@ -50,26 +50,13 @@ .text +.globl sha256_block_data_order_nohw - -.globl sha256_block_data_order - -.def sha256_block_data_order +.def sha256_block_data_order_nohw .type 32 .endef .align 6 -sha256_block_data_order: - AARCH64_VALID_CALL_TARGET -#ifndef __KERNEL__ -#if defined(OPENSSL_HWASAN) && __clang_major__ >= 10 - adrp x16,:pg_hi21_nc:OPENSSL_armcap_P -#else - adrp x16,OPENSSL_armcap_P -#endif - ldr w16,[x16,:lo12:OPENSSL_armcap_P] - tst w16,#ARMV8_SHA256 - b.ne Lv8_entry -#endif +sha256_block_data_order_nohw: AARCH64_SIGN_LINK_REGISTER stp x29,x30,[sp,#-128]! add x29,sp,#0 @@ -1062,12 +1049,13 @@ LK256: .align 2 .text #ifndef __KERNEL__ -.def sha256_block_armv8 +.globl sha256_block_data_order_hw + +.def sha256_block_data_order_hw .type 32 .endef .align 6 -sha256_block_armv8: -Lv8_entry: +sha256_block_data_order_hw: #ifdef BORINGSSL_DISPATCH_TEST adrp x9,BORINGSSL_function_hit @@ -1076,6 +1064,7 @@ Lv8_entry: strb w10, [x9,#6] // kFlag_sha256_hw #endif // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. + AARCH64_VALID_CALL_TARGET stp x29,x30,[sp,#-16]! add x29,sp,#0 diff --git a/generated-src/win-aarch64/crypto/fipsmodule/sha512-armv8.S b/generated-src/win-aarch64/crypto/fipsmodule/sha512-armv8.S index cbb173e68d1..fcfefb0b29d 100644 --- a/generated-src/win-aarch64/crypto/fipsmodule/sha512-armv8.S +++ b/generated-src/win-aarch64/crypto/fipsmodule/sha512-armv8.S @@ -50,26 +50,13 @@ .text +.globl sha512_block_data_order_nohw - -.globl sha512_block_data_order - -.def sha512_block_data_order +.def sha512_block_data_order_nohw .type 32 .endef .align 6 -sha512_block_data_order: - AARCH64_VALID_CALL_TARGET -#ifndef __KERNEL__ -#if defined(OPENSSL_HWASAN) && __clang_major__ >= 10 - adrp x16,:pg_hi21_nc:OPENSSL_armcap_P -#else - adrp x16,OPENSSL_armcap_P -#endif - ldr w16,[x16,:lo12:OPENSSL_armcap_P] - tst w16,#ARMV8_SHA512 - b.ne Lv8_entry -#endif +sha512_block_data_order_nohw: AARCH64_SIGN_LINK_REGISTER stp x29,x30,[sp,#-128]! add x29,sp,#0 @@ -1086,12 +1073,13 @@ LK512: .align 2 .text #ifndef __KERNEL__ -.def sha512_block_armv8 +.globl sha512_block_data_order_hw + +.def sha512_block_data_order_hw .type 32 .endef .align 6 -sha512_block_armv8: -Lv8_entry: +sha512_block_data_order_hw: #ifdef BORINGSSL_DISPATCH_TEST adrp x9,BORINGSSL_function_hit @@ -1099,6 +1087,8 @@ Lv8_entry: mov w10, #1 strb w10, [x9,#8] // kFlag_sha512_hw #endif + // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. + AARCH64_VALID_CALL_TARGET stp x29,x30,[sp,#-16]! add x29,sp,#0 diff --git a/generated-src/win-x86_64/crypto/chacha/chacha-x86_64.asm b/generated-src/win-x86_64/crypto/chacha/chacha-x86_64.asm index 13e8f61f77c..b2a66c7439f 100644 --- a/generated-src/win-x86_64/crypto/chacha/chacha-x86_64.asm +++ b/generated-src/win-x86_64/crypto/chacha/chacha-x86_64.asm @@ -12,8 +12,6 @@ default rel section .text code align=64 -EXTERN OPENSSL_ia32cap_P - section .rdata rdata align=8 ALIGN 64 $L$zero: @@ -50,14 +48,14 @@ $L$sixteen: DB 108,46,111,114,103,62,0 section .text -global ChaCha20_ctr32 +global ChaCha20_ctr32_nohw ALIGN 64 -ChaCha20_ctr32: +ChaCha20_ctr32_nohw: mov QWORD[8+rsp],rdi ;WIN64 prologue mov QWORD[16+rsp],rsi mov rax,rsp -$L$SEH_begin_ChaCha20_ctr32: +$L$SEH_begin_ChaCha20_ctr32_nohw: mov rdi,rcx mov rsi,rdx mov rdx,r8 @@ -67,12 +65,6 @@ $L$SEH_begin_ChaCha20_ctr32: _CET_ENDBR - cmp rdx,0 - je NEAR $L$no_data - mov r10,QWORD[((OPENSSL_ia32cap_P+4))] - test r10d,512 - jnz NEAR $L$ChaCha20_ssse3 - push rbx push rbp @@ -345,14 +337,15 @@ $L$no_data: mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret -$L$SEH_end_ChaCha20_ctr32: +$L$SEH_end_ChaCha20_ctr32_nohw: +global ChaCha20_ctr32_ssse3 ALIGN 32 -ChaCha20_ssse3: +ChaCha20_ctr32_ssse3: mov QWORD[8+rsp],rdi ;WIN64 prologue mov QWORD[16+rsp],rsi mov rax,rsp -$L$SEH_begin_ChaCha20_ssse3: +$L$SEH_begin_ChaCha20_ctr32_ssse3: mov rdi,rcx mov rsi,rdx mov rdx,r8 @@ -360,14 +353,10 @@ $L$SEH_begin_ChaCha20_ssse3: mov r8,QWORD[40+rsp] -$L$ChaCha20_ssse3: +_CET_ENDBR mov r9,rsp - cmp rdx,128 - ja NEAR $L$ChaCha20_4x - -$L$do_sse3_after_all: sub rsp,64+40 movaps XMMWORD[(-40)+r9],xmm6 movaps XMMWORD[(-24)+r9],xmm7 @@ -500,14 +489,15 @@ $L$ssse3_epilogue: mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret -$L$SEH_end_ChaCha20_ssse3: +$L$SEH_end_ChaCha20_ctr32_ssse3: +global ChaCha20_ctr32_ssse3_4x ALIGN 32 -ChaCha20_4x: +ChaCha20_ctr32_ssse3_4x: mov QWORD[8+rsp],rdi ;WIN64 prologue mov QWORD[16+rsp],rsi mov rax,rsp -$L$SEH_begin_ChaCha20_4x: +$L$SEH_begin_ChaCha20_ctr32_ssse3_4x: mov rdi,rcx mov rsi,rdx mov rdx,r8 @@ -515,22 +505,11 @@ $L$SEH_begin_ChaCha20_4x: mov r8,QWORD[40+rsp] -$L$ChaCha20_4x: +_CET_ENDBR mov r9,rsp mov r11,r10 - shr r10,32 - test r10,32 - jnz NEAR $L$ChaCha20_8x - cmp rdx,192 - ja NEAR $L$proceed4x - - and r11,71303168 - cmp r11,4194304 - je NEAR $L$do_sse3_after_all - -$L$proceed4x: sub rsp,0x140+168 movaps XMMWORD[(-168)+r9],xmm6 movaps XMMWORD[(-152)+r9],xmm7 @@ -1086,14 +1065,15 @@ $L$4x_epilogue: mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret -$L$SEH_end_ChaCha20_4x: +$L$SEH_end_ChaCha20_ctr32_ssse3_4x: +global ChaCha20_ctr32_avx2 ALIGN 32 -ChaCha20_8x: +ChaCha20_ctr32_avx2: mov QWORD[8+rsp],rdi ;WIN64 prologue mov QWORD[16+rsp],rsi mov rax,rsp -$L$SEH_begin_ChaCha20_8x: +$L$SEH_begin_ChaCha20_ctr32_avx2: mov rdi,rcx mov rsi,rdx mov rdx,r8 @@ -1101,8 +1081,8 @@ $L$SEH_begin_ChaCha20_8x: mov r8,QWORD[40+rsp] -$L$ChaCha20_8x: +_CET_ENDBR mov r9,rsp sub rsp,0x280+168 @@ -1726,7 +1706,7 @@ $L$8x_epilogue: mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret -$L$SEH_end_ChaCha20_8x: +$L$SEH_end_ChaCha20_ctr32_avx2: EXTERN __imp_RtlVirtualUnwind ALIGN 16 @@ -1895,36 +1875,36 @@ full_handler: section .pdata rdata align=4 ALIGN 4 - DD $L$SEH_begin_ChaCha20_ctr32 wrt ..imagebase - DD $L$SEH_end_ChaCha20_ctr32 wrt ..imagebase - DD $L$SEH_info_ChaCha20_ctr32 wrt ..imagebase - - DD $L$SEH_begin_ChaCha20_ssse3 wrt ..imagebase - DD $L$SEH_end_ChaCha20_ssse3 wrt ..imagebase - DD $L$SEH_info_ChaCha20_ssse3 wrt ..imagebase - - DD $L$SEH_begin_ChaCha20_4x wrt ..imagebase - DD $L$SEH_end_ChaCha20_4x wrt ..imagebase - DD $L$SEH_info_ChaCha20_4x wrt ..imagebase - DD $L$SEH_begin_ChaCha20_8x wrt ..imagebase - DD $L$SEH_end_ChaCha20_8x wrt ..imagebase - DD $L$SEH_info_ChaCha20_8x wrt ..imagebase + DD $L$SEH_begin_ChaCha20_ctr32_nohw wrt ..imagebase + DD $L$SEH_end_ChaCha20_ctr32_nohw wrt ..imagebase + DD $L$SEH_info_ChaCha20_ctr32_nohw wrt ..imagebase + + DD $L$SEH_begin_ChaCha20_ctr32_ssse3 wrt ..imagebase + DD $L$SEH_end_ChaCha20_ctr32_ssse3 wrt ..imagebase + DD $L$SEH_info_ChaCha20_ctr32_ssse3 wrt ..imagebase + + DD $L$SEH_begin_ChaCha20_ctr32_ssse3_4x wrt ..imagebase + DD $L$SEH_end_ChaCha20_ctr32_ssse3_4x wrt ..imagebase + DD $L$SEH_info_ChaCha20_ctr32_ssse3_4x wrt ..imagebase + DD $L$SEH_begin_ChaCha20_ctr32_avx2 wrt ..imagebase + DD $L$SEH_end_ChaCha20_ctr32_avx2 wrt ..imagebase + DD $L$SEH_info_ChaCha20_ctr32_avx2 wrt ..imagebase section .xdata rdata align=8 ALIGN 8 -$L$SEH_info_ChaCha20_ctr32: +$L$SEH_info_ChaCha20_ctr32_nohw: DB 9,0,0,0 DD se_handler wrt ..imagebase -$L$SEH_info_ChaCha20_ssse3: +$L$SEH_info_ChaCha20_ctr32_ssse3: DB 9,0,0,0 DD ssse3_handler wrt ..imagebase DD $L$ssse3_body wrt ..imagebase,$L$ssse3_epilogue wrt ..imagebase -$L$SEH_info_ChaCha20_4x: +$L$SEH_info_ChaCha20_ctr32_ssse3_4x: DB 9,0,0,0 DD full_handler wrt ..imagebase DD $L$4x_body wrt ..imagebase,$L$4x_epilogue wrt ..imagebase -$L$SEH_info_ChaCha20_8x: +$L$SEH_info_ChaCha20_ctr32_avx2: DB 9,0,0,0 DD full_handler wrt ..imagebase DD $L$8x_body wrt ..imagebase,$L$8x_epilogue wrt ..imagebase diff --git a/generated-src/win-x86_64/crypto/fipsmodule/sha1-x86_64.asm b/generated-src/win-x86_64/crypto/fipsmodule/sha1-x86_64.asm index bb9a775471a..987fe122d65 100644 --- a/generated-src/win-x86_64/crypto/fipsmodule/sha1-x86_64.asm +++ b/generated-src/win-x86_64/crypto/fipsmodule/sha1-x86_64.asm @@ -11,16 +11,15 @@ default rel %include "openssl/boringssl_prefix_symbols_nasm.inc" section .text code align=64 -EXTERN OPENSSL_ia32cap_P -global sha1_block_data_order +global sha1_block_data_order_nohw ALIGN 16 -sha1_block_data_order: +sha1_block_data_order_nohw: mov QWORD[8+rsp],rdi ;WIN64 prologue mov QWORD[16+rsp],rsi mov rax,rsp -$L$SEH_begin_sha1_block_data_order: +$L$SEH_begin_sha1_block_data_order_nohw: mov rdi,rcx mov rsi,rdx mov rdx,r8 @@ -28,26 +27,6 @@ $L$SEH_begin_sha1_block_data_order: _CET_ENDBR - lea r10,[OPENSSL_ia32cap_P] - mov r9d,DWORD[r10] - mov r8d,DWORD[4+r10] - mov r10d,DWORD[8+r10] - test r8d,512 - jz NEAR $L$ialu - test r10d,536870912 - jnz NEAR _shaext_shortcut - and r10d,296 - cmp r10d,296 - je NEAR _avx2_shortcut - and r8d,268435456 - and r9d,1073741824 - or r8d,r9d - cmp r8d,1342177280 - je NEAR _avx_shortcut - jmp NEAR _ssse3_shortcut - -ALIGN 16 -$L$ialu: mov rax,rsp push rbx @@ -1279,21 +1258,22 @@ $L$epilogue: mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret -$L$SEH_end_sha1_block_data_order: +$L$SEH_end_sha1_block_data_order_nohw: +global sha1_block_data_order_hw ALIGN 32 -sha1_block_data_order_shaext: +sha1_block_data_order_hw: mov QWORD[8+rsp],rdi ;WIN64 prologue mov QWORD[16+rsp],rsi mov rax,rsp -$L$SEH_begin_sha1_block_data_order_shaext: +$L$SEH_begin_sha1_block_data_order_hw: mov rdi,rcx mov rsi,rdx mov rdx,r8 -_shaext_shortcut: +_CET_ENDBR lea rsp,[((-72))+rsp] movaps XMMWORD[(-8-64)+rax],xmm6 movaps XMMWORD[(-8-48)+rax],xmm7 @@ -1472,7 +1452,8 @@ $L$epilogue_shaext: mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret -$L$SEH_end_sha1_block_data_order_shaext: +$L$SEH_end_sha1_block_data_order_hw: +global sha1_block_data_order_ssse3 ALIGN 16 sha1_block_data_order_ssse3: @@ -1485,8 +1466,8 @@ $L$SEH_begin_sha1_block_data_order_ssse3: mov rdx,r8 -_ssse3_shortcut: +_CET_ENDBR mov r11,rsp push rbx @@ -2685,6 +2666,7 @@ $L$epilogue_ssse3: DB 0F3h,0C3h ;repret $L$SEH_end_sha1_block_data_order_ssse3: +global sha1_block_data_order_avx ALIGN 16 sha1_block_data_order_avx: @@ -2697,8 +2679,8 @@ $L$SEH_begin_sha1_block_data_order_avx: mov rdx,r8 -_avx_shortcut: +_CET_ENDBR mov r11,rsp push rbx @@ -3837,6 +3819,7 @@ $L$epilogue_avx: DB 0F3h,0C3h ;repret $L$SEH_end_sha1_block_data_order_avx: +global sha1_block_data_order_avx2 ALIGN 16 sha1_block_data_order_avx2: @@ -3849,8 +3832,8 @@ $L$SEH_begin_sha1_block_data_order_avx2: mov rdx,r8 -_avx2_shortcut: +_CET_ENDBR mov r11,rsp push rbx @@ -5742,12 +5725,12 @@ $L$common_seh_tail: section .pdata rdata align=4 ALIGN 4 - DD $L$SEH_begin_sha1_block_data_order wrt ..imagebase - DD $L$SEH_end_sha1_block_data_order wrt ..imagebase - DD $L$SEH_info_sha1_block_data_order wrt ..imagebase - DD $L$SEH_begin_sha1_block_data_order_shaext wrt ..imagebase - DD $L$SEH_end_sha1_block_data_order_shaext wrt ..imagebase - DD $L$SEH_info_sha1_block_data_order_shaext wrt ..imagebase + DD $L$SEH_begin_sha1_block_data_order_nohw wrt ..imagebase + DD $L$SEH_end_sha1_block_data_order_nohw wrt ..imagebase + DD $L$SEH_info_sha1_block_data_order_nohw wrt ..imagebase + DD $L$SEH_begin_sha1_block_data_order_hw wrt ..imagebase + DD $L$SEH_end_sha1_block_data_order_hw wrt ..imagebase + DD $L$SEH_info_sha1_block_data_order_hw wrt ..imagebase DD $L$SEH_begin_sha1_block_data_order_ssse3 wrt ..imagebase DD $L$SEH_end_sha1_block_data_order_ssse3 wrt ..imagebase DD $L$SEH_info_sha1_block_data_order_ssse3 wrt ..imagebase @@ -5759,10 +5742,10 @@ ALIGN 4 DD $L$SEH_info_sha1_block_data_order_avx2 wrt ..imagebase section .xdata rdata align=8 ALIGN 8 -$L$SEH_info_sha1_block_data_order: +$L$SEH_info_sha1_block_data_order_nohw: DB 9,0,0,0 DD se_handler wrt ..imagebase -$L$SEH_info_sha1_block_data_order_shaext: +$L$SEH_info_sha1_block_data_order_hw: DB 9,0,0,0 DD shaext_handler wrt ..imagebase $L$SEH_info_sha1_block_data_order_ssse3: diff --git a/generated-src/win-x86_64/crypto/fipsmodule/sha256-x86_64.asm b/generated-src/win-x86_64/crypto/fipsmodule/sha256-x86_64.asm index e7d366a1323..d43cd0094fe 100644 --- a/generated-src/win-x86_64/crypto/fipsmodule/sha256-x86_64.asm +++ b/generated-src/win-x86_64/crypto/fipsmodule/sha256-x86_64.asm @@ -12,15 +12,14 @@ default rel section .text code align=64 -EXTERN OPENSSL_ia32cap_P -global sha256_block_data_order +global sha256_block_data_order_nohw ALIGN 16 -sha256_block_data_order: +sha256_block_data_order_nohw: mov QWORD[8+rsp],rdi ;WIN64 prologue mov QWORD[16+rsp],rsi mov rax,rsp -$L$SEH_begin_sha256_block_data_order: +$L$SEH_begin_sha256_block_data_order_nohw: mov rdi,rcx mov rsi,rdx mov rdx,r8 @@ -28,19 +27,6 @@ $L$SEH_begin_sha256_block_data_order: _CET_ENDBR - lea r11,[OPENSSL_ia32cap_P] - mov r9d,DWORD[r11] - mov r10d,DWORD[4+r11] - mov r11d,DWORD[8+r11] - test r11d,536870912 - jnz NEAR $L$shaext_shortcut - and r9d,1073741824 - and r10d,268435968 - or r10d,r9d - cmp r10d,1342177792 - je NEAR $L$avx_shortcut - test r10d,512 - jnz NEAR $L$ssse3_shortcut mov rax,rsp push rbx @@ -1748,7 +1734,7 @@ $L$epilogue: mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret -$L$SEH_end_sha256_block_data_order: +$L$SEH_end_sha256_block_data_order_nohw: section .rdata rdata align=8 ALIGN 64 @@ -1799,24 +1785,25 @@ K256: DB 111,114,103,62,0 section .text +global sha256_block_data_order_hw ALIGN 64 -sha256_block_data_order_shaext: +sha256_block_data_order_hw: mov QWORD[8+rsp],rdi ;WIN64 prologue mov QWORD[16+rsp],rsi mov rax,rsp -$L$SEH_begin_sha256_block_data_order_shaext: +$L$SEH_begin_sha256_block_data_order_hw: mov rdi,rcx mov rsi,rdx mov rdx,r8 -$L$shaext_shortcut: %ifdef BORINGSSL_DISPATCH_TEST EXTERN BORINGSSL_function_hit mov BYTE[((BORINGSSL_function_hit+6))],1 %endif +_CET_ENDBR lea rsp,[((-88))+rsp] movaps XMMWORD[(-8-80)+rax],xmm6 movaps XMMWORD[(-8-64)+rax],xmm7 @@ -2036,7 +2023,8 @@ $L$epilogue_shaext: mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret -$L$SEH_end_sha256_block_data_order_shaext: +$L$SEH_end_sha256_block_data_order_hw: +global sha256_block_data_order_ssse3 ALIGN 64 sha256_block_data_order_ssse3: @@ -2050,7 +2038,7 @@ $L$SEH_begin_sha256_block_data_order_ssse3: -$L$ssse3_shortcut: +_CET_ENDBR mov rax,rsp push rbx @@ -3169,6 +3157,7 @@ $L$epilogue_ssse3: DB 0F3h,0C3h ;repret $L$SEH_end_sha256_block_data_order_ssse3: +global sha256_block_data_order_avx ALIGN 64 sha256_block_data_order_avx: @@ -3182,7 +3171,7 @@ $L$SEH_begin_sha256_block_data_order_avx: -$L$avx_shortcut: +_CET_ENDBR mov rax,rsp push rbx @@ -4393,12 +4382,12 @@ shaext_handler: section .pdata rdata align=4 ALIGN 4 - DD $L$SEH_begin_sha256_block_data_order wrt ..imagebase - DD $L$SEH_end_sha256_block_data_order wrt ..imagebase - DD $L$SEH_info_sha256_block_data_order wrt ..imagebase - DD $L$SEH_begin_sha256_block_data_order_shaext wrt ..imagebase - DD $L$SEH_end_sha256_block_data_order_shaext wrt ..imagebase - DD $L$SEH_info_sha256_block_data_order_shaext wrt ..imagebase + DD $L$SEH_begin_sha256_block_data_order_nohw wrt ..imagebase + DD $L$SEH_end_sha256_block_data_order_nohw wrt ..imagebase + DD $L$SEH_info_sha256_block_data_order_nohw wrt ..imagebase + DD $L$SEH_begin_sha256_block_data_order_hw wrt ..imagebase + DD $L$SEH_end_sha256_block_data_order_hw wrt ..imagebase + DD $L$SEH_info_sha256_block_data_order_hw wrt ..imagebase DD $L$SEH_begin_sha256_block_data_order_ssse3 wrt ..imagebase DD $L$SEH_end_sha256_block_data_order_ssse3 wrt ..imagebase DD $L$SEH_info_sha256_block_data_order_ssse3 wrt ..imagebase @@ -4407,11 +4396,11 @@ ALIGN 4 DD $L$SEH_info_sha256_block_data_order_avx wrt ..imagebase section .xdata rdata align=8 ALIGN 8 -$L$SEH_info_sha256_block_data_order: +$L$SEH_info_sha256_block_data_order_nohw: DB 9,0,0,0 DD se_handler wrt ..imagebase DD $L$prologue wrt ..imagebase,$L$epilogue wrt ..imagebase -$L$SEH_info_sha256_block_data_order_shaext: +$L$SEH_info_sha256_block_data_order_hw: DB 9,0,0,0 DD shaext_handler wrt ..imagebase $L$SEH_info_sha256_block_data_order_ssse3: diff --git a/generated-src/win-x86_64/crypto/fipsmodule/sha512-x86_64.asm b/generated-src/win-x86_64/crypto/fipsmodule/sha512-x86_64.asm index 2cfb76b7620..f81b4630bd4 100644 --- a/generated-src/win-x86_64/crypto/fipsmodule/sha512-x86_64.asm +++ b/generated-src/win-x86_64/crypto/fipsmodule/sha512-x86_64.asm @@ -12,15 +12,14 @@ default rel section .text code align=64 -EXTERN OPENSSL_ia32cap_P -global sha512_block_data_order +global sha512_block_data_order_nohw ALIGN 16 -sha512_block_data_order: +sha512_block_data_order_nohw: mov QWORD[8+rsp],rdi ;WIN64 prologue mov QWORD[16+rsp],rsi mov rax,rsp -$L$SEH_begin_sha512_block_data_order: +$L$SEH_begin_sha512_block_data_order_nohw: mov rdi,rcx mov rsi,rdx mov rdx,r8 @@ -28,15 +27,6 @@ $L$SEH_begin_sha512_block_data_order: _CET_ENDBR - lea r11,[OPENSSL_ia32cap_P] - mov r9d,DWORD[r11] - mov r10d,DWORD[4+r11] - mov r11d,DWORD[8+r11] - and r9d,1073741824 - and r10d,268435968 - or r10d,r9d - cmp r10d,1342177792 - je NEAR $L$avx_shortcut mov rax,rsp push rbx @@ -1744,7 +1734,7 @@ $L$epilogue: mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret -$L$SEH_end_sha512_block_data_order: +$L$SEH_end_sha512_block_data_order_nohw: section .rdata rdata align=8 ALIGN 64 @@ -1839,6 +1829,7 @@ K512: DB 111,114,103,62,0 section .text +global sha512_block_data_order_avx ALIGN 64 sha512_block_data_order_avx: @@ -1852,7 +1843,7 @@ $L$SEH_begin_sha512_block_data_order_avx: -$L$avx_shortcut: +_CET_ENDBR mov rax,rsp push rbx @@ -3125,15 +3116,15 @@ $L$in_prologue: section .pdata rdata align=4 ALIGN 4 - DD $L$SEH_begin_sha512_block_data_order wrt ..imagebase - DD $L$SEH_end_sha512_block_data_order wrt ..imagebase - DD $L$SEH_info_sha512_block_data_order wrt ..imagebase + DD $L$SEH_begin_sha512_block_data_order_nohw wrt ..imagebase + DD $L$SEH_end_sha512_block_data_order_nohw wrt ..imagebase + DD $L$SEH_info_sha512_block_data_order_nohw wrt ..imagebase DD $L$SEH_begin_sha512_block_data_order_avx wrt ..imagebase DD $L$SEH_end_sha512_block_data_order_avx wrt ..imagebase DD $L$SEH_info_sha512_block_data_order_avx wrt ..imagebase section .xdata rdata align=8 ALIGN 8 -$L$SEH_info_sha512_block_data_order: +$L$SEH_info_sha512_block_data_order_nohw: DB 9,0,0,0 DD se_handler wrt ..imagebase DD $L$prologue wrt ..imagebase,$L$epilogue wrt ..imagebase