diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 5d433204d5da08..0adf3cfb34c949 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -1804,9 +1804,12 @@ SDValue VectorLegalizer::ExpandFNEG(SDNode *Node) { EVT VT = Node->getValueType(0); EVT IntVT = VT.changeVectorElementTypeToInteger(); + if (!TLI.isOperationLegalOrCustom(ISD::XOR, IntVT)) + return SDValue(); + // FIXME: The FSUB check is here to force unrolling v1f64 vectors on AArch64. - if (!TLI.isOperationLegalOrCustom(ISD::XOR, IntVT) || - !(TLI.isOperationLegalOrCustomOrPromote(ISD::FSUB, VT) || VT.isScalableVector())) + if (!TLI.isOperationLegalOrCustomOrPromote(ISD::FSUB, VT) && + !VT.isScalableVector()) return SDValue(); SDLoc DL(Node); @@ -1821,8 +1824,12 @@ SDValue VectorLegalizer::ExpandFABS(SDNode *Node) { EVT VT = Node->getValueType(0); EVT IntVT = VT.changeVectorElementTypeToInteger(); - // FIXME: We shouldn't restrict this to scalable vectors. - if (!TLI.isOperationLegalOrCustom(ISD::AND, IntVT) || !VT.isScalableVector()) + if (!TLI.isOperationLegalOrCustom(ISD::AND, IntVT)) + return SDValue(); + + // FIXME: The FSUB check is here to force unrolling v1f64 vectors on AArch64. + if (!TLI.isOperationLegalOrCustomOrPromote(ISD::FSUB, VT) && + !VT.isScalableVector()) return SDValue(); SDLoc DL(Node); @@ -1837,10 +1844,14 @@ SDValue VectorLegalizer::ExpandFCOPYSIGN(SDNode *Node) { EVT VT = Node->getValueType(0); EVT IntVT = VT.changeVectorElementTypeToInteger(); - // FIXME: We shouldn't restrict this to scalable vectors. if (VT != Node->getOperand(1).getValueType() || !TLI.isOperationLegalOrCustom(ISD::AND, IntVT) || - !TLI.isOperationLegalOrCustom(ISD::OR, IntVT) || !VT.isScalableVector()) + !TLI.isOperationLegalOrCustom(ISD::OR, IntVT)) + return SDValue(); + + // FIXME: The FSUB check is here to force unrolling v1f64 vectors on AArch64. + if (!TLI.isOperationLegalOrCustomOrPromote(ISD::FSUB, VT) && + !VT.isScalableVector()) return SDValue(); SDLoc DL(Node); diff --git a/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll index 7030e5435f723e..8d40a9ef54dca9 100644 --- a/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll @@ -508,21 +508,24 @@ define <2 x bfloat> @test_round(<2 x bfloat> %a) #0 { ; CHECK-LABEL: test_copysign( ; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_copysign_param_0]; ; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_copysign_param_1]; -; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] -; CHECK-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] -; CHECK-DAG: abs.bf16 [[AW1:%rs[0-9]+]], [[A1]]; -; CHECK-DAG: neg.bf16 [[AY1:%rs[0-9]+]], [[AW1]]; -; CHECK-DAG: shr.u16 [[BS1:%rs[0-9]+]], [[B1]], 15; -; CHECK-DAG: and.b16 [[BR1:%rs[0-9]+]], [[BS1]], 1; -; CHECK-DAG: setp.eq.b16 [[P1:%p[0-9]+]], [[BR1]], 1; -; CHECK-DAG: selp.b16 [[RS1:%rs[0-9]+]], [[AY1]], [[AW1]], [[P1]] -; CHECK-DAG: abs.bf16 [[AW0:%rs[0-9]+]], [[A0]]; -; CHECK-DAG: neg.bf16 [[AY0:%rs[0-9]+]], [[AW0]]; -; CHECK-DAG: shr.u16 [[BS0:%rs[0-9]+]], [[B0]], 15; -; CHECK-DAG: and.b16 [[BR0:%rs[0-9]+]], [[BS0]], 1; -; CHECK-DAG: setp.eq.b16 [[P0:%p[0-9]+]], [[BR0]], 1; -; CHECK-DAG: selp.b16 [[RS0:%rs[0-9]+]], [[AY0]], [[AW0]], [[P0]] -; CHECK-DAG: mov.b32 [[R:%r[0-9]+]], {[[RS0]], [[RS1]]} +; SM80-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; SM80-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] +; SM80-DAG: abs.bf16 [[AW1:%rs[0-9]+]], [[A1]]; +; SM80-DAG: neg.bf16 [[AY1:%rs[0-9]+]], [[AW1]]; +; SM80-DAG: shr.u16 [[BS1:%rs[0-9]+]], [[B1]], 15; +; SM80-DAG: and.b16 [[BR1:%rs[0-9]+]], [[BS1]], 1; +; SM80-DAG: setp.eq.b16 [[P1:%p[0-9]+]], [[BR1]], 1; +; SM80-DAG: selp.b16 [[RS1:%rs[0-9]+]], [[AY1]], [[AW1]], [[P1]] +; SM80-DAG: abs.bf16 [[AW0:%rs[0-9]+]], [[A0]]; +; SM80-DAG: neg.bf16 [[AY0:%rs[0-9]+]], [[AW0]]; +; SM80-DAG: shr.u16 [[BS0:%rs[0-9]+]], [[B0]], 15; +; SM80-DAG: and.b16 [[BR0:%rs[0-9]+]], [[BS0]], 1; +; SM80-DAG: setp.eq.b16 [[P0:%p[0-9]+]], [[BR0]], 1; +; SM80-DAG: selp.b16 [[RS0:%rs[0-9]+]], [[AY0]], [[AW0]], [[P0]] +; SM80-DAG: mov.b32 [[R:%r[0-9]+]], {[[RS0]], [[RS1]]} +; SM90-DAG: and.b32 [[R1:%r[0-9]+]], [[B]], -2147450880; +; SM90-DAG: and.b32 [[R2:%r[0-9]+]], [[A]], 2147450879; +; SM90-DAG: or.b32 [[R:%r[0-9]+]], [[R2]], [[R1]]; ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; define <2 x bfloat> @test_copysign(<2 x bfloat> %a, <2 x bfloat> %b) #0 { diff --git a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll index 464b3a754804fe..b41f63b783d390 100644 --- a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll @@ -1184,14 +1184,15 @@ define <2 x half> @test_fma(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0 { ; CHECK-LABEL: test_fabs( ; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_fabs_param_0]; -; CHECK: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] -; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]]; -; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]]; -; CHECK-DAG: abs.f32 [[RF0:%f[0-9]+]], [[AF0]]; -; CHECK-DAG: abs.f32 [[RF1:%f[0-9]+]], [[AF1]]; -; CHECK-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[RF0]]; -; CHECK-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[RF1]]; -; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} +; CHECK-NOF16: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]]; +; CHECK-NOF16-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]]; +; CHECK-NOF16-DAG: abs.f32 [[RF0:%f[0-9]+]], [[AF0]]; +; CHECK-NOF16-DAG: abs.f32 [[RF1:%f[0-9]+]], [[AF1]]; +; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[RF0]]; +; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[RF1]]; +; CHECK-NOF16: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} +; CHECK-F16: and.b32 [[R:%r[0-9]+]], [[A]], 2147450879; ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; define <2 x half> @test_fabs(<2 x half> %a) #0 { @@ -1244,15 +1245,18 @@ define <2 x half> @test_maxnum(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-LABEL: test_copysign( ; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_copysign_param_0]; ; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_copysign_param_1]; -; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] -; CHECK-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] -; CHECK-DAG: and.b16 [[AX0:%rs[0-9]+]], [[A0]], 32767; -; CHECK-DAG: and.b16 [[AX1:%rs[0-9]+]], [[A1]], 32767; -; CHECK-DAG: and.b16 [[BX0:%rs[0-9]+]], [[B0]], -32768; -; CHECK-DAG: and.b16 [[BX1:%rs[0-9]+]], [[B1]], -32768; -; CHECK-DAG: or.b16 [[R0:%rs[0-9]+]], [[AX0]], [[BX0]]; -; CHECK-DAG: or.b16 [[R1:%rs[0-9]+]], [[AX1]], [[BX1]]; -; CHECK-DAG: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: and.b16 [[AX0:%rs[0-9]+]], [[A0]], 32767; +; CHECK-NOF16-DAG: and.b16 [[AX1:%rs[0-9]+]], [[A1]], 32767; +; CHECK-NOF16-DAG: and.b16 [[BX0:%rs[0-9]+]], [[B0]], -32768; +; CHECK-NOF16-DAG: and.b16 [[BX1:%rs[0-9]+]], [[B1]], -32768; +; CHECK-NOF16-DAG: or.b16 [[R0:%rs[0-9]+]], [[AX0]], [[BX0]]; +; CHECK-NOF16-DAG: or.b16 [[R1:%rs[0-9]+]], [[AX1]], [[BX1]]; +; CHECK-NOF16-DAG: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} +; CHECK-F16-DAG: and.b32 [[R0:%r[0-9]+]], [[B]], -2147450880; +; CHECK-F16-DAG: and.b32 [[R1:%r[0-9]+]], [[A]], 2147450879; +; CHECK-F16-DAG: or.b32 [[R:%r[0-9]+]], [[R1]], [[R0]] ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; define <2 x half> @test_copysign(<2 x half> %a, <2 x half> %b) #0 { @@ -1263,18 +1267,24 @@ define <2 x half> @test_copysign(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-LABEL: test_copysign_f32( ; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_copysign_f32_param_0]; ; CHECK-DAG: ld.param.v2.f32 {[[B0:%f[0-9]+]], [[B1:%f[0-9]+]]}, [test_copysign_f32_param_1]; -; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] -; CHECK-DAG: mov.b32 [[BI0:%r[0-9]+]], [[B0]]; -; CHECK-DAG: mov.b32 [[BI1:%r[0-9]+]], [[B1]]; -; CHECK-DAG: and.b16 [[AI0:%rs[0-9]+]], [[A0]], 32767; -; CHECK-DAG: and.b16 [[AI1:%rs[0-9]+]], [[A1]], 32767; -; CHECK-DAG: and.b32 [[BX0:%r[0-9]+]], [[BI0]], -2147483648; -; CHECK-DAG: and.b32 [[BX1:%r[0-9]+]], [[BI1]], -2147483648; -; CHECK-DAG: mov.b32 {tmp, [[BZ0:%rs[0-9]+]]}, [[BX0]]; } -; CHECK-DAG: mov.b32 {tmp, [[BZ1:%rs[0-9]+]]}, [[BX1]]; } -; CHECK-DAG: or.b16 [[R0:%rs[0-9]+]], [[AI0]], [[BZ0]]; -; CHECK-DAG: or.b16 [[R1:%rs[0-9]+]], [[AI1]], [[BZ1]]; -; CHECK-DAG: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 [[BI0:%r[0-9]+]], [[B0]]; +; CHECK-NOF16-DAG: mov.b32 [[BI1:%r[0-9]+]], [[B1]]; +; CHECK-NOF16-DAG: and.b16 [[AI0:%rs[0-9]+]], [[A0]], 32767; +; CHECK-NOF16-DAG: and.b16 [[AI1:%rs[0-9]+]], [[A1]], 32767; +; CHECK-NOF16-DAG: and.b32 [[BX0:%r[0-9]+]], [[BI0]], -2147483648; +; CHECK-NOF16-DAG: and.b32 [[BX1:%r[0-9]+]], [[BI1]], -2147483648; +; CHECK-NOF16-DAG: mov.b32 {tmp, [[BZ0:%rs[0-9]+]]}, [[BX0]]; } +; CHECK-NOF16-DAG: mov.b32 {tmp, [[BZ1:%rs[0-9]+]]}, [[BX1]]; } +; CHECK-NOF16-DAG: or.b16 [[R0:%rs[0-9]+]], [[AI0]], [[BZ0]]; +; CHECK-NOF16-DAG: or.b16 [[R1:%rs[0-9]+]], [[AI1]], [[BZ1]]; +; CHECK-NOF16-DAG: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} +; CHECK-F16-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[B1]]; +; CHECK-F16-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[B0]]; +; CHECK-F16-DAG: mov.b32 [[R2:%r[0-9]+]], {[[R1]], [[R0]]}; +; CHECK-F16-DAG: and.b32 [[R3:%r[0-9]+]], [[R2]], -2147450880; +; CHECK-F16-DAG: and.b32 [[R4:%r[0-9]+]], [[A]], 2147450879; +; CHECK-F16-DAG: or.b32 [[R:%r[0-9]+]], [[R4]], [[R3]] ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; define <2 x half> @test_copysign_f32(<2 x half> %a, <2 x float> %b) #0 { @@ -1286,20 +1296,26 @@ define <2 x half> @test_copysign_f32(<2 x half> %a, <2 x float> %b) #0 { ; CHECK-LABEL: test_copysign_f64( ; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_copysign_f64_param_0]; ; CHECK-DAG: ld.param.v2.f64 {[[B0:%fd[0-9]+]], [[B1:%fd[0-9]+]]}, [test_copysign_f64_param_1]; -; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] -; CHECK-DAG: mov.b64 [[BI0:%rd[0-9]+]], [[B0]]; -; CHECK-DAG: mov.b64 [[BI1:%rd[0-9]+]], [[B1]]; -; CHECK-DAG: and.b16 [[AI0:%rs[0-9]+]], [[A0]], 32767; -; CHECK-DAG: and.b16 [[AI1:%rs[0-9]+]], [[A1]], 32767; -; CHECK-DAG: and.b64 [[BX0:%rd[0-9]+]], [[BI0]], -9223372036854775808; -; CHECK-DAG: and.b64 [[BX1:%rd[0-9]+]], [[BI1]], -9223372036854775808; -; CHECK-DAG: shr.u64 [[BY0:%rd[0-9]+]], [[BX0]], 48; -; CHECK-DAG: shr.u64 [[BY1:%rd[0-9]+]], [[BX1]], 48; -; CHECK-DAG: cvt.u16.u64 [[BZ0:%rs[0-9]+]], [[BY0]]; -; CHECK-DAG: cvt.u16.u64 [[BZ1:%rs[0-9]+]], [[BY1]]; -; CHECK-DAG: or.b16 [[R0:%rs[0-9]+]], [[AI0]], [[BZ0]]; -; CHECK-DAG: or.b16 [[R1:%rs[0-9]+]], [[AI1]], [[BZ1]]; -; CHECK-DAG: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b64 [[BI0:%rd[0-9]+]], [[B0]]; +; CHECK-NOF16-DAG: mov.b64 [[BI1:%rd[0-9]+]], [[B1]]; +; CHECK-NOF16-DAG: and.b16 [[AI0:%rs[0-9]+]], [[A0]], 32767; +; CHECK-NOF16-DAG: and.b16 [[AI1:%rs[0-9]+]], [[A1]], 32767; +; CHECK-NOF16-DAG: and.b64 [[BX0:%rd[0-9]+]], [[BI0]], -9223372036854775808; +; CHECK-NOF16-DAG: and.b64 [[BX1:%rd[0-9]+]], [[BI1]], -9223372036854775808; +; CHECK-NOF16-DAG: shr.u64 [[BY0:%rd[0-9]+]], [[BX0]], 48; +; CHECK-NOF16-DAG: shr.u64 [[BY1:%rd[0-9]+]], [[BX1]], 48; +; CHECK-NOF16-DAG: cvt.u16.u64 [[BZ0:%rs[0-9]+]], [[BY0]]; +; CHECK-NOF16-DAG: cvt.u16.u64 [[BZ1:%rs[0-9]+]], [[BY1]]; +; CHECK-NOF16-DAG: or.b16 [[R0:%rs[0-9]+]], [[AI0]], [[BZ0]]; +; CHECK-NOF16-DAG: or.b16 [[R1:%rs[0-9]+]], [[AI1]], [[BZ1]]; +; CHECK-NOF16-DAG: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} +; CHECK-F16-DAG: cvt.rn.f16.f64 [[R0:%rs[0-9]+]], [[B1]]; +; CHECK-F16-DAG: cvt.rn.f16.f64 [[R1:%rs[0-9]+]], [[B0]]; +; CHECK-F16-DAG: mov.b32 [[R2:%r[0-9]+]], {[[R1]], [[R0]]}; +; CHECK-F16-DAG: and.b32 [[R3:%r[0-9]+]], [[R2]], -2147450880; +; CHECK-F16-DAG: and.b32 [[R4:%r[0-9]+]], [[A]], 2147450879; +; CHECK-F16-DAG: or.b32 [[R:%r[0-9]+]], [[R4]], [[R3]]; ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; define <2 x half> @test_copysign_f64(<2 x half> %a, <2 x double> %b) #0 { @@ -1311,16 +1327,22 @@ define <2 x half> @test_copysign_f64(<2 x half> %a, <2 x double> %b) #0 { ; CHECK-LABEL: test_copysign_extended( ; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_copysign_extended_param_0]; ; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_copysign_extended_param_1]; -; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] -; CHECK-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] -; CHECK-DAG: and.b16 [[AX0:%rs[0-9]+]], [[A0]], 32767; -; CHECK-DAG: and.b16 [[AX1:%rs[0-9]+]], [[A1]], 32767; -; CHECK-DAG: and.b16 [[BX0:%rs[0-9]+]], [[B0]], -32768; -; CHECK-DAG: and.b16 [[BX1:%rs[0-9]+]], [[B1]], -32768; -; CHECK-DAG: or.b16 [[R0:%rs[0-9]+]], [[AX0]], [[BX0]]; -; CHECK-DAG: or.b16 [[R1:%rs[0-9]+]], [[AX1]], [[BX1]]; -; CHECK-DAG: cvt.f32.f16 [[XR0:%f[0-9]+]], [[R0]]; -; CHECK-DAG: cvt.f32.f16 [[XR1:%f[0-9]+]], [[R1]]; +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: and.b16 [[AX0:%rs[0-9]+]], [[A0]], 32767; +; CHECK-NOF16-DAG: and.b16 [[AX1:%rs[0-9]+]], [[A1]], 32767; +; CHECK-NOF16-DAG: and.b16 [[BX0:%rs[0-9]+]], [[B0]], -32768; +; CHECK-NOF16-DAG: and.b16 [[BX1:%rs[0-9]+]], [[B1]], -32768; +; CHECK-NOF16-DAG: or.b16 [[R0:%rs[0-9]+]], [[AX0]], [[BX0]]; +; CHECK-NOF16-DAG: or.b16 [[R1:%rs[0-9]+]], [[AX1]], [[BX1]]; +; CHECK-NOF16-DAG: cvt.f32.f16 [[XR0:%f[0-9]+]], [[R0]]; +; CHECK-NOF16-DAG: cvt.f32.f16 [[XR1:%f[0-9]+]], [[R1]]; +; CHECK-F16-DAG: and.b32 [[R0:%r[0-9]+]], [[B]], -2147450880; +; CHECK-F16-DAG: and.b32 [[R1:%r[0-9]+]], [[A]], 2147450879; +; CHECK-F16-DAG: or.b32 [[R2:%r[0-9]+]], [[R1]], [[R0]] +; CHECK-F16-DAG: mov.b32 {[[R3:%rs[0-9]+]], [[R4:%rs[0-9]+]]}, [[R2]] +; CHECK-F16-DAG: cvt.f32.f16 [[XR0:%f[0-9]+]], [[R3]] +; CHECK-F16-DAG: cvt.f32.f16 [[XR1:%f[0-9]+]], [[R4]] ; CHECK: st.param.v2.f32 [func_retval0+0], {[[XR0]], [[XR1]]}; ; CHECK: ret; define <2 x float> @test_copysign_extended(<2 x half> %a, <2 x half> %b) #0 { diff --git a/llvm/test/CodeGen/PowerPC/vec_abs.ll b/llvm/test/CodeGen/PowerPC/vec_abs.ll index 50dcfc3faf62e9..b900f0ea29c4a6 100644 --- a/llvm/test/CodeGen/PowerPC/vec_abs.ll +++ b/llvm/test/CodeGen/PowerPC/vec_abs.ll @@ -19,10 +19,9 @@ declare <4 x float> @llvm.fabs.v4f32(<4 x float>) #1 ; CHECK: xvabssp ; CHECK: blr -; CHECK-NOVSX: fabs -; CHECK-NOVSX: fabs -; CHECK-NOVSX: fabs -; CHECK-NOVSX: fabs +; CHECK-NOVSX: vspltisb +; CHECK-NOVSX: vslw +; CHECK-NOVSX: vandc ; CHECK-NOVSX: blr define <4 x float> @test2_float(<4 x float> %aa) #0 { @@ -40,11 +39,8 @@ define <4 x float> @test2_float(<4 x float> %aa) #0 { ; CHECK: xvnabssp ; CHECK: blr ; CHECK-NOVSX: vspltisb -; CHECK-NOVSX: fabs -; CHECK-NOVSX: fabs -; CHECK-NOVSX: fabs -; CHECK-NOVSX: fabs -; CHECK-NOVSX: vxor +; CHECK-NOVSX: vslw +; CHECK-NOVSX: vor ; CHECK-NOVSX: blr define <2 x double> @test_double(<2 x double> %aa) #0 { diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll index d665d23dec68a4..69faf269ae3db6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll @@ -508,85 +508,15 @@ define void @fabs_v8f16(ptr %x) { ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; -; ZVFHMIN-RV32-LABEL: fabs_v8f16: -; ZVFHMIN-RV32: # %bb.0: -; ZVFHMIN-RV32-NEXT: addi sp, sp, -16 -; ZVFHMIN-RV32-NEXT: .cfi_def_cfa_offset 16 -; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, mu -; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0) -; ZVFHMIN-RV32-NEXT: mv a1, sp -; ZVFHMIN-RV32-NEXT: vse16.v v8, (a1) -; ZVFHMIN-RV32-NEXT: lhu a1, 2(sp) -; ZVFHMIN-RV32-NEXT: lui a2, 8 -; ZVFHMIN-RV32-NEXT: lhu a3, 0(sp) -; ZVFHMIN-RV32-NEXT: addi a2, a2, -1 -; ZVFHMIN-RV32-NEXT: and a1, a1, a2 -; ZVFHMIN-RV32-NEXT: lhu a4, 4(sp) -; ZVFHMIN-RV32-NEXT: and a3, a3, a2 -; ZVFHMIN-RV32-NEXT: vmv.v.x v8, a3 -; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-RV32-NEXT: and a4, a4, a2 -; ZVFHMIN-RV32-NEXT: lhu a1, 6(sp) -; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-RV32-NEXT: lhu a3, 10(sp) -; ZVFHMIN-RV32-NEXT: lhu a4, 8(sp) -; ZVFHMIN-RV32-NEXT: and a1, a1, a2 -; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-RV32-NEXT: and a3, a3, a2 -; ZVFHMIN-RV32-NEXT: and a4, a4, a2 -; ZVFHMIN-RV32-NEXT: lhu a1, 12(sp) -; ZVFHMIN-RV32-NEXT: vmv.v.x v9, a4 -; ZVFHMIN-RV32-NEXT: lhu a4, 14(sp) -; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a3 -; ZVFHMIN-RV32-NEXT: and a1, a1, a2 -; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-RV32-NEXT: and a2, a4, a2 -; ZVFHMIN-RV32-NEXT: vmv.v.i v0, 15 -; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a2 -; ZVFHMIN-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t -; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0) -; ZVFHMIN-RV32-NEXT: addi sp, sp, 16 -; ZVFHMIN-RV32-NEXT: ret -; -; ZVFHMIN-RV64-LABEL: fabs_v8f16: -; ZVFHMIN-RV64: # %bb.0: -; ZVFHMIN-RV64-NEXT: addi sp, sp, -16 -; ZVFHMIN-RV64-NEXT: .cfi_def_cfa_offset 16 -; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, mu -; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0) -; ZVFHMIN-RV64-NEXT: mv a1, sp -; ZVFHMIN-RV64-NEXT: vse16.v v8, (a1) -; ZVFHMIN-RV64-NEXT: lhu a1, 2(sp) -; ZVFHMIN-RV64-NEXT: lui a2, 8 -; ZVFHMIN-RV64-NEXT: lhu a3, 0(sp) -; ZVFHMIN-RV64-NEXT: addiw a2, a2, -1 -; ZVFHMIN-RV64-NEXT: and a1, a1, a2 -; ZVFHMIN-RV64-NEXT: lhu a4, 4(sp) -; ZVFHMIN-RV64-NEXT: and a3, a3, a2 -; ZVFHMIN-RV64-NEXT: vmv.v.x v8, a3 -; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-RV64-NEXT: and a4, a4, a2 -; ZVFHMIN-RV64-NEXT: lhu a1, 6(sp) -; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-RV64-NEXT: lhu a3, 10(sp) -; ZVFHMIN-RV64-NEXT: lhu a4, 8(sp) -; ZVFHMIN-RV64-NEXT: and a1, a1, a2 -; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-RV64-NEXT: and a3, a3, a2 -; ZVFHMIN-RV64-NEXT: and a4, a4, a2 -; ZVFHMIN-RV64-NEXT: lhu a1, 12(sp) -; ZVFHMIN-RV64-NEXT: vmv.v.x v9, a4 -; ZVFHMIN-RV64-NEXT: lhu a4, 14(sp) -; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a3 -; ZVFHMIN-RV64-NEXT: and a1, a1, a2 -; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-RV64-NEXT: and a2, a4, a2 -; ZVFHMIN-RV64-NEXT: vmv.v.i v0, 15 -; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a2 -; ZVFHMIN-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t -; ZVFHMIN-RV64-NEXT: vse16.v v9, (a0) -; ZVFHMIN-RV64-NEXT: addi sp, sp, 16 -; ZVFHMIN-RV64-NEXT: ret +; ZVFHMIN-LABEL: fabs_v8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: vse16.v v8, (a0) +; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = call <8 x half> @llvm.fabs.v8f16(<8 x half> %a) store <8 x half> %b, ptr %x @@ -603,89 +533,15 @@ define void @fabs_v6f16(ptr %x) { ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; -; ZVFHMIN-RV32-LABEL: fabs_v6f16: -; ZVFHMIN-RV32: # %bb.0: -; ZVFHMIN-RV32-NEXT: addi sp, sp, -16 -; ZVFHMIN-RV32-NEXT: .cfi_def_cfa_offset 16 -; ZVFHMIN-RV32-NEXT: vsetivli zero, 6, e16, mf2, ta, ma -; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0) -; ZVFHMIN-RV32-NEXT: mv a1, sp -; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMIN-RV32-NEXT: vse16.v v8, (a1) -; ZVFHMIN-RV32-NEXT: lhu a1, 2(sp) -; ZVFHMIN-RV32-NEXT: lui a2, 8 -; ZVFHMIN-RV32-NEXT: lhu a3, 0(sp) -; ZVFHMIN-RV32-NEXT: addi a2, a2, -1 -; ZVFHMIN-RV32-NEXT: and a1, a1, a2 -; ZVFHMIN-RV32-NEXT: lhu a4, 4(sp) -; ZVFHMIN-RV32-NEXT: and a3, a3, a2 -; ZVFHMIN-RV32-NEXT: vmv.v.x v8, a3 -; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-RV32-NEXT: and a4, a4, a2 -; ZVFHMIN-RV32-NEXT: lhu a1, 6(sp) -; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-RV32-NEXT: lhu a3, 10(sp) -; ZVFHMIN-RV32-NEXT: lhu a4, 8(sp) -; ZVFHMIN-RV32-NEXT: and a1, a1, a2 -; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-RV32-NEXT: and a3, a3, a2 -; ZVFHMIN-RV32-NEXT: and a4, a4, a2 -; ZVFHMIN-RV32-NEXT: lhu a1, 12(sp) -; ZVFHMIN-RV32-NEXT: vmv.v.x v9, a4 -; ZVFHMIN-RV32-NEXT: lhu a4, 14(sp) -; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a3 -; ZVFHMIN-RV32-NEXT: and a1, a1, a2 -; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-RV32-NEXT: and a2, a4, a2 -; ZVFHMIN-RV32-NEXT: vmv.v.i v0, 15 -; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a2 -; ZVFHMIN-RV32-NEXT: vsetivli zero, 6, e16, mf2, ta, mu -; ZVFHMIN-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t -; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0) -; ZVFHMIN-RV32-NEXT: addi sp, sp, 16 -; ZVFHMIN-RV32-NEXT: ret -; -; ZVFHMIN-RV64-LABEL: fabs_v6f16: -; ZVFHMIN-RV64: # %bb.0: -; ZVFHMIN-RV64-NEXT: addi sp, sp, -16 -; ZVFHMIN-RV64-NEXT: .cfi_def_cfa_offset 16 -; ZVFHMIN-RV64-NEXT: vsetivli zero, 6, e16, mf2, ta, ma -; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0) -; ZVFHMIN-RV64-NEXT: mv a1, sp -; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMIN-RV64-NEXT: vse16.v v8, (a1) -; ZVFHMIN-RV64-NEXT: lhu a1, 2(sp) -; ZVFHMIN-RV64-NEXT: lui a2, 8 -; ZVFHMIN-RV64-NEXT: lhu a3, 0(sp) -; ZVFHMIN-RV64-NEXT: addiw a2, a2, -1 -; ZVFHMIN-RV64-NEXT: and a1, a1, a2 -; ZVFHMIN-RV64-NEXT: lhu a4, 4(sp) -; ZVFHMIN-RV64-NEXT: and a3, a3, a2 -; ZVFHMIN-RV64-NEXT: vmv.v.x v8, a3 -; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-RV64-NEXT: and a4, a4, a2 -; ZVFHMIN-RV64-NEXT: lhu a1, 6(sp) -; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-RV64-NEXT: lhu a3, 10(sp) -; ZVFHMIN-RV64-NEXT: lhu a4, 8(sp) -; ZVFHMIN-RV64-NEXT: and a1, a1, a2 -; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-RV64-NEXT: and a3, a3, a2 -; ZVFHMIN-RV64-NEXT: and a4, a4, a2 -; ZVFHMIN-RV64-NEXT: lhu a1, 12(sp) -; ZVFHMIN-RV64-NEXT: vmv.v.x v9, a4 -; ZVFHMIN-RV64-NEXT: lhu a4, 14(sp) -; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a3 -; ZVFHMIN-RV64-NEXT: and a1, a1, a2 -; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-RV64-NEXT: and a2, a4, a2 -; ZVFHMIN-RV64-NEXT: vmv.v.i v0, 15 -; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a2 -; ZVFHMIN-RV64-NEXT: vsetivli zero, 6, e16, mf2, ta, mu -; ZVFHMIN-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t -; ZVFHMIN-RV64-NEXT: vse16.v v9, (a0) -; ZVFHMIN-RV64-NEXT: addi sp, sp, 16 -; ZVFHMIN-RV64-NEXT: ret +; ZVFHMIN-LABEL: fabs_v6f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 6, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: vse16.v v8, (a0) +; ZVFHMIN-NEXT: ret %a = load <6 x half>, ptr %x %b = call <6 x half> @llvm.fabs.v6f16(<6 x half> %a) store <6 x half> %b, ptr %x @@ -741,255 +597,18 @@ define void @copysign_v8f16(ptr %x, ptr %y) { ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; -; ZVFHMIN-ZFH-RV32-LABEL: copysign_v8f16: -; ZVFHMIN-ZFH-RV32: # %bb.0: -; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, -32 -; ZVFHMIN-ZFH-RV32-NEXT: .cfi_def_cfa_offset 32 -; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, mu -; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a1) -; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v9, (a0) -; ZVFHMIN-ZFH-RV32-NEXT: addi a1, sp, 16 -; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a1) -; ZVFHMIN-ZFH-RV32-NEXT: mv a1, sp -; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v9, (a1) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 18(sp) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 2(sp) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa3, 16(sp) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa2, 0(sp) -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa4, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa2, fa3 -; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 20(sp) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa3, 4(sp) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa2, 22(sp) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa1, 6(sp) -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa3, fa4 -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa1, fa2 -; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 26(sp) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa3, 10(sp) -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 24(sp) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa2, 8(sp) -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa4, fa3, fa4 -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a5, fa4 -; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.x v8, a2 -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa2, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 28(sp) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 12(sp) -; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a3 -; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa4, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 30(sp) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 14(sp) -; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.x v9, a2 -; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a5 -; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa4, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.i v0, 15 -; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFH-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t -; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v9, (a0) -; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, 32 -; ZVFHMIN-ZFH-RV32-NEXT: ret -; -; ZVFHMIN-ZFH-RV64-LABEL: copysign_v8f16: -; ZVFHMIN-ZFH-RV64: # %bb.0: -; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, -32 -; ZVFHMIN-ZFH-RV64-NEXT: .cfi_def_cfa_offset 32 -; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, mu -; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v8, (a1) -; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v9, (a0) -; ZVFHMIN-ZFH-RV64-NEXT: addi a1, sp, 16 -; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a1) -; ZVFHMIN-ZFH-RV64-NEXT: mv a1, sp -; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v9, (a1) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 18(sp) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 2(sp) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa3, 16(sp) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa2, 0(sp) -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa4, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa2, fa3 -; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 20(sp) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa3, 4(sp) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa2, 22(sp) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa1, 6(sp) -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa3, fa4 -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa1, fa2 -; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 26(sp) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa3, 10(sp) -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 24(sp) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa2, 8(sp) -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa4, fa3, fa4 -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a5, fa4 -; ZVFHMIN-ZFH-RV64-NEXT: vmv.v.x v8, a2 -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa2, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 28(sp) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 12(sp) -; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a3 -; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa4, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 30(sp) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 14(sp) -; ZVFHMIN-ZFH-RV64-NEXT: vmv.v.x v9, a2 -; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v9, v9, a5 -; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa4, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: vmv.v.i v0, 15 -; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFH-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t -; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v9, (a0) -; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, 32 -; ZVFHMIN-ZFH-RV64-NEXT: ret -; -; ZVFHMIN-ZFHIN-RV32-LABEL: copysign_v8f16: -; ZVFHMIN-ZFHIN-RV32: # %bb.0: -; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, -32 -; ZVFHMIN-ZFHIN-RV32-NEXT: .cfi_def_cfa_offset 32 -; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, mu -; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a1) -; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v9, (a0) -; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 16 -; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1) -; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp -; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a1) -; ZVFHMIN-ZFHIN-RV32-NEXT: lh a2, 18(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: lui a1, 1048568 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a2, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a4, 2(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: lui a5, 8 -; ZVFHMIN-ZFHIN-RV32-NEXT: addi a2, a5, -1 -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 16(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 0(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a4, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, a7, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a6, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 20(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 4(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a6, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a7, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 22(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 6(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a4, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a6, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a7, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 26(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a4, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a3, 10(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 24(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 8(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a3, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a7, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a5, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: lh a5, 28(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 12(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v9, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a6, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: lh a4, 30(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 14(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a3, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a4, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a6, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.i v0, 15 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t -; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a0) -; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, 32 -; ZVFHMIN-ZFHIN-RV32-NEXT: ret -; -; ZVFHMIN-ZFHIN-RV64-LABEL: copysign_v8f16: -; ZVFHMIN-ZFHIN-RV64: # %bb.0: -; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, -32 -; ZVFHMIN-ZFHIN-RV64-NEXT: .cfi_def_cfa_offset 32 -; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, mu -; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a1) -; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v9, (a0) -; ZVFHMIN-ZFHIN-RV64-NEXT: addi a1, sp, 16 -; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1) -; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp -; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a1) -; ZVFHMIN-ZFHIN-RV64-NEXT: lh a2, 18(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: lui a1, 1048568 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a2, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a4, 2(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: lui a5, 8 -; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a2, a5, -1 -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 16(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 0(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a4, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, a7, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a6, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 20(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 4(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v8, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a6, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a7, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 22(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 6(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a4, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a6, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a7, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 26(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a4, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a3, 10(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 24(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 8(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a3, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a7, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a5, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: lh a5, 28(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 12(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v9, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a6, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: lh a4, 30(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 14(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a3, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a4, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a6, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.i v0, 15 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t -; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a0) -; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, 32 -; ZVFHMIN-ZFHIN-RV64-NEXT: ret +; ZVFHMIN-LABEL: copysign_v8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vle16.v v8, (a1) +; ZVFHMIN-NEXT: vle16.v v9, (a0) +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vand.vx v9, v9, a1 +; ZVFHMIN-NEXT: vor.vv v8, v9, v8 +; ZVFHMIN-NEXT: vse16.v v8, (a0) +; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = load <8 x half>, ptr %y %c = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %b) @@ -1008,263 +627,20 @@ define void @copysign_v6f16(ptr %x, ptr %y) { ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; -; ZVFHMIN-ZFH-RV32-LABEL: copysign_v6f16: -; ZVFHMIN-ZFH-RV32: # %bb.0: -; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, -32 -; ZVFHMIN-ZFH-RV32-NEXT: .cfi_def_cfa_offset 32 -; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 6, e16, mf2, ta, ma -; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a1) -; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v9, (a0) -; ZVFHMIN-ZFH-RV32-NEXT: addi a1, sp, 16 -; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a1) -; ZVFHMIN-ZFH-RV32-NEXT: mv a1, sp -; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v9, (a1) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 18(sp) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 2(sp) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa3, 16(sp) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa2, 0(sp) -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa4, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa2, fa3 -; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 20(sp) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa3, 4(sp) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa2, 22(sp) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa1, 6(sp) -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa3, fa4 -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa1, fa2 -; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 26(sp) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa3, 10(sp) -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 24(sp) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa2, 8(sp) -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa4, fa3, fa4 -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a5, fa4 -; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.x v8, a2 -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa2, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 28(sp) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 12(sp) -; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a3 -; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa4, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 30(sp) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 14(sp) -; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.x v9, a2 -; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a5 -; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa4, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.i v0, 15 -; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 6, e16, mf2, ta, mu -; ZVFHMIN-ZFH-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t -; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v9, (a0) -; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, 32 -; ZVFHMIN-ZFH-RV32-NEXT: ret -; -; ZVFHMIN-ZFH-RV64-LABEL: copysign_v6f16: -; ZVFHMIN-ZFH-RV64: # %bb.0: -; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, -32 -; ZVFHMIN-ZFH-RV64-NEXT: .cfi_def_cfa_offset 32 -; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 6, e16, mf2, ta, ma -; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v8, (a1) -; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v9, (a0) -; ZVFHMIN-ZFH-RV64-NEXT: addi a1, sp, 16 -; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a1) -; ZVFHMIN-ZFH-RV64-NEXT: mv a1, sp -; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v9, (a1) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 18(sp) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 2(sp) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa3, 16(sp) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa2, 0(sp) -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa4, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa2, fa3 -; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 20(sp) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa3, 4(sp) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa2, 22(sp) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa1, 6(sp) -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa3, fa4 -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa1, fa2 -; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 26(sp) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa3, 10(sp) -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 24(sp) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa2, 8(sp) -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa4, fa3, fa4 -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a5, fa4 -; ZVFHMIN-ZFH-RV64-NEXT: vmv.v.x v8, a2 -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa2, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 28(sp) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 12(sp) -; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a3 -; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa4, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 30(sp) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 14(sp) -; ZVFHMIN-ZFH-RV64-NEXT: vmv.v.x v9, a2 -; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v9, v9, a5 -; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa4, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: vmv.v.i v0, 15 -; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 6, e16, mf2, ta, mu -; ZVFHMIN-ZFH-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t -; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v9, (a0) -; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, 32 -; ZVFHMIN-ZFH-RV64-NEXT: ret -; -; ZVFHMIN-ZFHIN-RV32-LABEL: copysign_v6f16: -; ZVFHMIN-ZFHIN-RV32: # %bb.0: -; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, -32 -; ZVFHMIN-ZFHIN-RV32-NEXT: .cfi_def_cfa_offset 32 -; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 6, e16, mf2, ta, ma -; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a1) -; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v9, (a0) -; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 16 -; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1) -; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp -; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a1) -; ZVFHMIN-ZFHIN-RV32-NEXT: lh a2, 18(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: lui a1, 1048568 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a2, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a4, 2(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: lui a5, 8 -; ZVFHMIN-ZFHIN-RV32-NEXT: addi a2, a5, -1 -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 16(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 0(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a4, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, a7, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a6, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 20(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 4(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a6, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a7, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 22(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 6(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a4, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a6, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a7, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 26(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a4, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a3, 10(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 24(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 8(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a3, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a7, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a5, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: lh a5, 28(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 12(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v9, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a6, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: lh a4, 30(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 14(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a3, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a4, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a6, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.i v0, 15 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 6, e16, mf2, ta, mu -; ZVFHMIN-ZFHIN-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t -; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a0) -; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, 32 -; ZVFHMIN-ZFHIN-RV32-NEXT: ret -; -; ZVFHMIN-ZFHIN-RV64-LABEL: copysign_v6f16: -; ZVFHMIN-ZFHIN-RV64: # %bb.0: -; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, -32 -; ZVFHMIN-ZFHIN-RV64-NEXT: .cfi_def_cfa_offset 32 -; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 6, e16, mf2, ta, ma -; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a1) -; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v9, (a0) -; ZVFHMIN-ZFHIN-RV64-NEXT: addi a1, sp, 16 -; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1) -; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp -; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a1) -; ZVFHMIN-ZFHIN-RV64-NEXT: lh a2, 18(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: lui a1, 1048568 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a2, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a4, 2(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: lui a5, 8 -; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a2, a5, -1 -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 16(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 0(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a4, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, a7, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a6, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 20(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 4(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v8, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a6, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a7, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 22(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 6(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a4, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a6, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a7, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 26(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a4, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a3, 10(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 24(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 8(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a3, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a7, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a5, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: lh a5, 28(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 12(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v9, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a6, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: lh a4, 30(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 14(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a3, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a4, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a6, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.i v0, 15 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 6, e16, mf2, ta, mu -; ZVFHMIN-ZFHIN-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t -; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a0) -; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, 32 -; ZVFHMIN-ZFHIN-RV64-NEXT: ret +; ZVFHMIN-LABEL: copysign_v6f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 6, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vle16.v v8, (a1) +; ZVFHMIN-NEXT: vle16.v v9, (a0) +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vand.vx v9, v9, a1 +; ZVFHMIN-NEXT: vsetivli zero, 6, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vor.vv v8, v9, v8 +; ZVFHMIN-NEXT: vse16.v v8, (a0) +; ZVFHMIN-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = call <6 x half> @llvm.copysign.v6f16(<6 x half> %a, <6 x half> %b) @@ -1325,199 +701,19 @@ define void @copysign_vf_v8f16(ptr %x, half %y) { ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; -; ZVFHMIN-ZFH-RV32-LABEL: copysign_vf_v8f16: -; ZVFHMIN-ZFH-RV32: # %bb.0: -; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, -16 -; ZVFHMIN-ZFH-RV32-NEXT: .cfi_def_cfa_offset 16 -; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, mu -; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a0) -; ZVFHMIN-ZFH-RV32-NEXT: mv a1, sp -; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a1) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 2(sp) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 0(sp) -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa5, fa0 -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 4(sp) -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa4, fa4, fa0 -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a2, fa4 -; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 6(sp) -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa5, fa0 -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 10(sp) -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa4, fa4, fa0 -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a4, fa4 -; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 8(sp) -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa5, fa0 -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.x v8, a2 -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa4, fa0 -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 12(sp) -; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a3 -; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa5, fa0 -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 14(sp) -; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.x v9, a2 -; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a5 -; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa5, fa0 -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.i v0, 15 -; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFH-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t -; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v9, (a0) -; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, 16 -; ZVFHMIN-ZFH-RV32-NEXT: ret -; -; ZVFHMIN-ZFH-RV64-LABEL: copysign_vf_v8f16: -; ZVFHMIN-ZFH-RV64: # %bb.0: -; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, -16 -; ZVFHMIN-ZFH-RV64-NEXT: .cfi_def_cfa_offset 16 -; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, mu -; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v8, (a0) -; ZVFHMIN-ZFH-RV64-NEXT: mv a1, sp -; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a1) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 2(sp) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 0(sp) -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa5, fa0 -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 4(sp) -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa4, fa4, fa0 -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a2, fa4 -; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 6(sp) -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa5, fa0 -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 10(sp) -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa4, fa4, fa0 -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a4, fa4 -; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 8(sp) -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa5, fa0 -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a5, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: vmv.v.x v8, a2 -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa4, fa0 -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 12(sp) -; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a3 -; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa5, fa0 -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 14(sp) -; ZVFHMIN-ZFH-RV64-NEXT: vmv.v.x v9, a2 -; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v9, v9, a5 -; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa5, fa0 -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: vmv.v.i v0, 15 -; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFH-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t -; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v9, (a0) -; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, 16 -; ZVFHMIN-ZFH-RV64-NEXT: ret -; -; ZVFHMIN-ZFHIN-RV32-LABEL: copysign_vf_v8f16: -; ZVFHMIN-ZFHIN-RV32: # %bb.0: -; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, -16 -; ZVFHMIN-ZFHIN-RV32-NEXT: .cfi_def_cfa_offset 16 -; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, mu -; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a0) -; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp -; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-ZFHIN-RV32-NEXT: lui a2, 1048568 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a2, 2(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: lui a3, 8 -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a4, 0(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: addi a3, a3, -1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a2, a2, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a5, 4(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a2, 6(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a5, a5, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a4, 10(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a5, 8(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a2, a2, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a5, a5, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v9, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a2, 12(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a4, 14(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a2, a2, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a4, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a3, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.i v0, 15 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t -; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a0) -; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, 16 -; ZVFHMIN-ZFHIN-RV32-NEXT: ret -; -; ZVFHMIN-ZFHIN-RV64-LABEL: copysign_vf_v8f16: -; ZVFHMIN-ZFHIN-RV64: # %bb.0: -; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, -16 -; ZVFHMIN-ZFHIN-RV64-NEXT: .cfi_def_cfa_offset 16 -; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, mu -; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a0) -; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp -; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-ZFHIN-RV64-NEXT: lui a2, 1048568 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a2, 2(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: lui a3, 8 -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a4, 0(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a3, a3, -1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a2, a2, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a5, 4(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v8, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a2, 6(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a5, a5, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a4, 10(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a5, 8(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a2, a2, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a5, a5, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v9, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a2, 12(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a4, 14(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a2, a2, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a4, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a3, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.i v0, 15 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t -; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a0) -; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, 16 -; ZVFHMIN-ZFHIN-RV64-NEXT: ret +; ZVFHMIN-LABEL: copysign_vf_v8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: fmv.x.h a1, fa0 +; ZVFHMIN-NEXT: vmv.v.x v9, a1 +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a2, a1, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a2 +; ZVFHMIN-NEXT: vand.vx v9, v9, a1 +; ZVFHMIN-NEXT: vor.vv v8, v8, v9 +; ZVFHMIN-NEXT: vse16.v v8, (a0) +; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = insertelement <8 x half> poison, half %y, i32 0 %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer @@ -1535,211 +731,25 @@ define void @copysign_vf_v6f16(ptr %x, half %y) { ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; -; ZVFHMIN-ZFH-RV32-LABEL: copysign_vf_v6f16: -; ZVFHMIN-ZFH-RV32: # %bb.0: -; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, -16 -; ZVFHMIN-ZFH-RV32-NEXT: .cfi_def_cfa_offset 16 -; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 6, e16, mf2, ta, ma -; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a0) -; ZVFHMIN-ZFH-RV32-NEXT: mv a1, sp -; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a1) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 2(sp) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 0(sp) -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa5, fa0 -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 4(sp) -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa4, fa4, fa0 -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a2, fa4 -; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 6(sp) -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa5, fa0 -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 10(sp) -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa4, fa4, fa0 -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a4, fa4 -; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.x v8, a2 -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa5, fa0 -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 8(sp) -; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a3 -; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa5, fa0 -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 12(sp) -; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.x v9, a1 -; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a2 -; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 14(sp) -; ZVFHMIN-ZFH-RV32-NEXT: fmv.h fa5, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa4, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.i v0, 15 -; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 6, e16, mf2, ta, mu -; ZVFHMIN-ZFH-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t -; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v9, (a0) -; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, 16 -; ZVFHMIN-ZFH-RV32-NEXT: ret -; -; ZVFHMIN-ZFH-RV64-LABEL: copysign_vf_v6f16: -; ZVFHMIN-ZFH-RV64: # %bb.0: -; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, -16 -; ZVFHMIN-ZFH-RV64-NEXT: .cfi_def_cfa_offset 16 -; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 6, e16, mf2, ta, ma -; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v8, (a0) -; ZVFHMIN-ZFH-RV64-NEXT: mv a1, sp -; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a1) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 2(sp) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 0(sp) -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa5, fa0 -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 4(sp) -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa4, fa4, fa0 -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a2, fa4 -; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 6(sp) -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa5, fa0 -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 10(sp) -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa4, fa4, fa0 -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a4, fa4 -; ZVFHMIN-ZFH-RV64-NEXT: vmv.v.x v8, a2 -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa5, fa0 -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 8(sp) -; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a3 -; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa5, fa0 -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 12(sp) -; ZVFHMIN-ZFH-RV64-NEXT: vmv.v.x v9, a1 -; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v9, v9, a2 -; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 14(sp) -; ZVFHMIN-ZFH-RV64-NEXT: fmv.h fa5, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa4, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: vmv.v.i v0, 15 -; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 6, e16, mf2, ta, mu -; ZVFHMIN-ZFH-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t -; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v9, (a0) -; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, 16 -; ZVFHMIN-ZFH-RV64-NEXT: ret -; -; ZVFHMIN-ZFHIN-RV32-LABEL: copysign_vf_v6f16: -; ZVFHMIN-ZFHIN-RV32: # %bb.0: -; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, -16 -; ZVFHMIN-ZFHIN-RV32-NEXT: .cfi_def_cfa_offset 16 -; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 6, e16, mf2, ta, ma -; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a0) -; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp -; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-ZFHIN-RV32-NEXT: lui a2, 1048568 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a3, 2(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: lui a4, 8 -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a5, 0(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: addi a4, a4, -1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a3, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a5, a5, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 4(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a3, 6(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a6, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a5, a5, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a5, 10(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a3, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 8(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a5, a5, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a6, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a3, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v9, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a1, 12(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a3, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a3, 14(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a1, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a2, a3, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.i v0, 15 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 6, e16, mf2, ta, mu -; ZVFHMIN-ZFHIN-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t -; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a0) -; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, 16 -; ZVFHMIN-ZFHIN-RV32-NEXT: ret -; -; ZVFHMIN-ZFHIN-RV64-LABEL: copysign_vf_v6f16: -; ZVFHMIN-ZFHIN-RV64: # %bb.0: -; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, -16 -; ZVFHMIN-ZFHIN-RV64-NEXT: .cfi_def_cfa_offset 16 -; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 6, e16, mf2, ta, ma -; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a0) -; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp -; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa0 -; ZVFHMIN-ZFHIN-RV64-NEXT: lui a2, 1048568 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a3, 2(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: lui a4, 8 -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a5, 0(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a4, a4, -1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a3, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a5, a5, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 4(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v8, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a3, 6(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a6, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a5, a5, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a5, 10(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a3, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 8(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a5, a5, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a6, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a3, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v9, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a1, 12(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a3, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a3, 14(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a1, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a2, a3, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.i v0, 15 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 6, e16, mf2, ta, mu -; ZVFHMIN-ZFHIN-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t -; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a0) -; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, 16 -; ZVFHMIN-ZFHIN-RV64-NEXT: ret +; ZVFHMIN-LABEL: copysign_vf_v6f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 6, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: fmv.x.h a1, fa5 +; ZVFHMIN-NEXT: li a2, 192 +; ZVFHMIN-NEXT: vmv.s.x v0, a2 +; ZVFHMIN-NEXT: fmv.x.h a2, fa0 +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vmv.v.x v9, a2 +; ZVFHMIN-NEXT: vmerge.vxm v9, v9, a1, v0 +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vand.vx v9, v9, a1 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: vsetivli zero, 6, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vor.vv v8, v8, v9 +; ZVFHMIN-NEXT: vse16.v v8, (a0) +; ZVFHMIN-NEXT: ret %a = load <6 x half>, ptr %x %b = insertelement <6 x half> poison, half %y, i32 0 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer @@ -1798,261 +808,19 @@ define void @copysign_neg_v8f16(ptr %x, ptr %y) { ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; -; ZVFHMIN-ZFH-RV32-LABEL: copysign_neg_v8f16: -; ZVFHMIN-ZFH-RV32: # %bb.0: -; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, -32 -; ZVFHMIN-ZFH-RV32-NEXT: .cfi_def_cfa_offset 32 -; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, mu -; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a1) -; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v9, (a0) -; ZVFHMIN-ZFH-RV32-NEXT: lui a1, 8 -; ZVFHMIN-ZFH-RV32-NEXT: vxor.vx v8, v8, a1 -; ZVFHMIN-ZFH-RV32-NEXT: mv a1, sp -; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v9, (a1) -; ZVFHMIN-ZFH-RV32-NEXT: addi a1, sp, 16 -; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a1) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 2(sp) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 18(sp) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa3, 0(sp) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa2, 16(sp) -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa5, fa4 -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa3, fa2 -; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 4(sp) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa3, 20(sp) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa2, 6(sp) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa1, 22(sp) -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa4, fa3 -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa2, fa1 -; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 10(sp) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa3, 26(sp) -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 8(sp) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa2, 24(sp) -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa4, fa4, fa3 -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a5, fa4 -; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.x v8, a2 -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa5, fa2 -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 12(sp) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 28(sp) -; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a3 -; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa5, fa4 -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 14(sp) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 30(sp) -; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.x v9, a2 -; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a5 -; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa5, fa4 -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.i v0, 15 -; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFH-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t -; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v9, (a0) -; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, 32 -; ZVFHMIN-ZFH-RV32-NEXT: ret -; -; ZVFHMIN-ZFH-RV64-LABEL: copysign_neg_v8f16: -; ZVFHMIN-ZFH-RV64: # %bb.0: -; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, -32 -; ZVFHMIN-ZFH-RV64-NEXT: .cfi_def_cfa_offset 32 -; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, mu -; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v8, (a1) -; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v9, (a0) -; ZVFHMIN-ZFH-RV64-NEXT: lui a1, 8 -; ZVFHMIN-ZFH-RV64-NEXT: vxor.vx v8, v8, a1 -; ZVFHMIN-ZFH-RV64-NEXT: mv a1, sp -; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v9, (a1) -; ZVFHMIN-ZFH-RV64-NEXT: addi a1, sp, 16 -; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a1) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 2(sp) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 18(sp) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa3, 0(sp) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa2, 16(sp) -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa5, fa4 -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa3, fa2 -; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 4(sp) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa3, 20(sp) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa2, 6(sp) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa1, 22(sp) -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa4, fa3 -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa2, fa1 -; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 10(sp) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa3, 26(sp) -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 8(sp) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa2, 24(sp) -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa4, fa4, fa3 -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a5, fa4 -; ZVFHMIN-ZFH-RV64-NEXT: vmv.v.x v8, a2 -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa5, fa2 -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 12(sp) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 28(sp) -; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a3 -; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa5, fa4 -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 14(sp) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 30(sp) -; ZVFHMIN-ZFH-RV64-NEXT: vmv.v.x v9, a2 -; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v9, v9, a5 -; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa5, fa4 -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: vmv.v.i v0, 15 -; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFH-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t -; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v9, (a0) -; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, 32 -; ZVFHMIN-ZFH-RV64-NEXT: ret -; -; ZVFHMIN-ZFHIN-RV32-LABEL: copysign_neg_v8f16: -; ZVFHMIN-ZFHIN-RV32: # %bb.0: -; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, -32 -; ZVFHMIN-ZFHIN-RV32-NEXT: .cfi_def_cfa_offset 32 -; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, mu -; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a1) -; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v9, (a0) -; ZVFHMIN-ZFHIN-RV32-NEXT: lui a3, 8 -; ZVFHMIN-ZFHIN-RV32-NEXT: vxor.vx v8, v8, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp -; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a1) -; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 16 -; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1) -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a2, 2(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, a3, -1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a2, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: lh a5, 18(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: lui a2, 1048568 -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 0(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 16(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a6, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, a7, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a5, a5, a6 -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 4(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: lh a7, 20(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a7, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 6(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: lh a7, 22(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a7, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 10(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: lh a4, 26(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a6, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 8(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 24(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a5, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a6, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a7, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a5, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a5, 12(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 28(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v9, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a6, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a4, 14(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 30(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a5, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a4, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a6, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a1, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.i v0, 15 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t -; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a0) -; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, 32 -; ZVFHMIN-ZFHIN-RV32-NEXT: ret -; -; ZVFHMIN-ZFHIN-RV64-LABEL: copysign_neg_v8f16: -; ZVFHMIN-ZFHIN-RV64: # %bb.0: -; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, -32 -; ZVFHMIN-ZFHIN-RV64-NEXT: .cfi_def_cfa_offset 32 -; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, mu -; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a1) -; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v9, (a0) -; ZVFHMIN-ZFHIN-RV64-NEXT: lui a3, 8 -; ZVFHMIN-ZFHIN-RV64-NEXT: vxor.vx v8, v8, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp -; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a1) -; ZVFHMIN-ZFHIN-RV64-NEXT: addi a1, sp, 16 -; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1) -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a2, 2(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a1, a3, -1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a2, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: lh a5, 18(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: lui a2, 1048568 -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 0(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 16(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a6, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, a7, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a5, a5, a6 -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 4(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: lh a7, 20(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v8, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a7, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 6(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: lh a7, 22(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a7, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 10(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: lh a4, 26(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a6, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 8(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 24(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a5, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a6, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a7, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a5, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a5, 12(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 28(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v9, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a6, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a4, 14(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 30(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a5, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a4, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a6, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a1, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.i v0, 15 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t -; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a0) -; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, 32 -; ZVFHMIN-ZFHIN-RV64-NEXT: ret +; ZVFHMIN-LABEL: copysign_neg_v8f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vle16.v v8, (a1) +; ZVFHMIN-NEXT: vle16.v v9, (a0) +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 +; ZVFHMIN-NEXT: addi a2, a1, -1 +; ZVFHMIN-NEXT: vand.vx v9, v9, a2 +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: vor.vv v8, v9, v8 +; ZVFHMIN-NEXT: vse16.v v8, (a0) +; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = load <8 x half>, ptr %y %c = fneg <8 x half> %b @@ -2071,269 +839,21 @@ define void @copysign_neg_v6f16(ptr %x, ptr %y) { ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; -; ZVFHMIN-ZFH-RV32-LABEL: copysign_neg_v6f16: -; ZVFHMIN-ZFH-RV32: # %bb.0: -; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, -32 -; ZVFHMIN-ZFH-RV32-NEXT: .cfi_def_cfa_offset 32 -; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 6, e16, mf2, ta, ma -; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a1) -; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v9, (a0) -; ZVFHMIN-ZFH-RV32-NEXT: lui a1, 8 -; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMIN-ZFH-RV32-NEXT: vxor.vx v8, v8, a1 -; ZVFHMIN-ZFH-RV32-NEXT: mv a1, sp -; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v9, (a1) -; ZVFHMIN-ZFH-RV32-NEXT: addi a1, sp, 16 -; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a1) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 2(sp) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 18(sp) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa3, 0(sp) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa2, 16(sp) -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa5, fa4 -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa3, fa2 -; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 4(sp) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa3, 20(sp) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa2, 6(sp) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa1, 22(sp) -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa4, fa3 -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa2, fa1 -; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 10(sp) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa3, 26(sp) -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 8(sp) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa2, 24(sp) -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa4, fa4, fa3 -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a5, fa4 -; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.x v8, a2 -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa5, fa2 -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 12(sp) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 28(sp) -; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a3 -; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa5, fa4 -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 14(sp) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 30(sp) -; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.x v9, a2 -; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a5 -; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa5, fa4 -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.i v0, 15 -; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 6, e16, mf2, ta, mu -; ZVFHMIN-ZFH-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t -; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v9, (a0) -; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, 32 -; ZVFHMIN-ZFH-RV32-NEXT: ret -; -; ZVFHMIN-ZFH-RV64-LABEL: copysign_neg_v6f16: -; ZVFHMIN-ZFH-RV64: # %bb.0: -; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, -32 -; ZVFHMIN-ZFH-RV64-NEXT: .cfi_def_cfa_offset 32 -; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 6, e16, mf2, ta, ma -; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v8, (a1) -; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v9, (a0) -; ZVFHMIN-ZFH-RV64-NEXT: lui a1, 8 -; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMIN-ZFH-RV64-NEXT: vxor.vx v8, v8, a1 -; ZVFHMIN-ZFH-RV64-NEXT: mv a1, sp -; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v9, (a1) -; ZVFHMIN-ZFH-RV64-NEXT: addi a1, sp, 16 -; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a1) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 2(sp) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 18(sp) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa3, 0(sp) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa2, 16(sp) -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa5, fa4 -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa3, fa2 -; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 4(sp) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa3, 20(sp) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa2, 6(sp) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa1, 22(sp) -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa4, fa3 -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa2, fa1 -; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 10(sp) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa3, 26(sp) -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a4, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 8(sp) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa2, 24(sp) -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa4, fa4, fa3 -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a5, fa4 -; ZVFHMIN-ZFH-RV64-NEXT: vmv.v.x v8, a2 -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa5, fa2 -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 12(sp) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 28(sp) -; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a3 -; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa5, fa4 -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 14(sp) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 30(sp) -; ZVFHMIN-ZFH-RV64-NEXT: vmv.v.x v9, a2 -; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v9, v9, a5 -; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa5, fa4 -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: vmv.v.i v0, 15 -; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 6, e16, mf2, ta, mu -; ZVFHMIN-ZFH-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t -; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v9, (a0) -; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, 32 -; ZVFHMIN-ZFH-RV64-NEXT: ret -; -; ZVFHMIN-ZFHIN-RV32-LABEL: copysign_neg_v6f16: -; ZVFHMIN-ZFHIN-RV32: # %bb.0: -; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, -32 -; ZVFHMIN-ZFHIN-RV32-NEXT: .cfi_def_cfa_offset 32 -; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 6, e16, mf2, ta, ma -; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a1) -; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v9, (a0) -; ZVFHMIN-ZFHIN-RV32-NEXT: lui a3, 8 -; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMIN-ZFHIN-RV32-NEXT: vxor.vx v8, v8, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp -; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a1) -; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 16 -; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1) -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a2, 2(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, a3, -1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a2, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: lh a5, 18(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: lui a2, 1048568 -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 0(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 16(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a6, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, a7, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a5, a5, a6 -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 4(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: lh a7, 20(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a7, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 6(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: lh a7, 22(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a7, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 10(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: lh a4, 26(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a6, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 8(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 24(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a5, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a6, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a7, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a5, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a5, 12(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 28(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v9, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a6, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a4, 14(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 30(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a5, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a4, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a6, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a1, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.i v0, 15 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 6, e16, mf2, ta, mu -; ZVFHMIN-ZFHIN-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t -; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a0) -; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, 32 -; ZVFHMIN-ZFHIN-RV32-NEXT: ret -; -; ZVFHMIN-ZFHIN-RV64-LABEL: copysign_neg_v6f16: -; ZVFHMIN-ZFHIN-RV64: # %bb.0: -; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, -32 -; ZVFHMIN-ZFHIN-RV64-NEXT: .cfi_def_cfa_offset 32 -; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 6, e16, mf2, ta, ma -; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a1) -; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v9, (a0) -; ZVFHMIN-ZFHIN-RV64-NEXT: lui a3, 8 -; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma -; ZVFHMIN-ZFHIN-RV64-NEXT: vxor.vx v8, v8, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp -; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a1) -; ZVFHMIN-ZFHIN-RV64-NEXT: addi a1, sp, 16 -; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1) -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a2, 2(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a1, a3, -1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a2, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: lh a5, 18(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: lui a2, 1048568 -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 0(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 16(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a6, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a6, a7, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a5, a5, a6 -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 4(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: lh a7, 20(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v8, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a7, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 6(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: lh a7, 22(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a7, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 10(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a4, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: lh a4, 26(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a6, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 8(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 24(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a4, a5, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a6, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a7, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a5, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a5, 12(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 28(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v9, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a6, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a4, 14(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 30(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a5, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a4, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a6, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a1, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.i v0, 15 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 6, e16, mf2, ta, mu -; ZVFHMIN-ZFHIN-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t -; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a0) -; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, 32 -; ZVFHMIN-ZFHIN-RV64-NEXT: ret +; ZVFHMIN-LABEL: copysign_neg_v6f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 6, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vle16.v v8, (a1) +; ZVFHMIN-NEXT: vle16.v v9, (a0) +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 +; ZVFHMIN-NEXT: addi a2, a1, -1 +; ZVFHMIN-NEXT: vand.vx v9, v9, a2 +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: vsetivli zero, 6, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vor.vv v8, v9, v8 +; ZVFHMIN-NEXT: vse16.v v8, (a0) +; ZVFHMIN-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y %c = fneg <6 x half> %b @@ -2396,169 +916,20 @@ define void @copysign_neg_trunc_v4f16_v4f32(ptr %x, ptr %y) { ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; -; ZVFHMIN-ZFH-RV32-LABEL: copysign_neg_trunc_v4f16_v4f32: -; ZVFHMIN-ZFH-RV32: # %bb.0: -; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, -16 -; ZVFHMIN-ZFH-RV32-NEXT: .cfi_def_cfa_offset 16 -; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a0) -; ZVFHMIN-ZFH-RV32-NEXT: vle32.v v9, (a1) -; ZVFHMIN-ZFH-RV32-NEXT: mv a1, sp -; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a1) -; ZVFHMIN-ZFH-RV32-NEXT: vfncvt.f.f.w v8, v9 -; ZVFHMIN-ZFH-RV32-NEXT: lui a1, 8 -; ZVFHMIN-ZFH-RV32-NEXT: vxor.vx v8, v8, a1 -; ZVFHMIN-ZFH-RV32-NEXT: addi a1, sp, 8 -; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a1) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 2(sp) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 10(sp) -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa5, fa4 -; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 0(sp) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa3, 8(sp) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa2, 4(sp) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa1, 12(sp) -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa4, fa3 -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa2, fa1 -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 6(sp) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 14(sp) -; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.x v8, a2 -; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a3 -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa5, fa4 -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a0) -; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, 16 -; ZVFHMIN-ZFH-RV32-NEXT: ret -; -; ZVFHMIN-ZFH-RV64-LABEL: copysign_neg_trunc_v4f16_v4f32: -; ZVFHMIN-ZFH-RV64: # %bb.0: -; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, -16 -; ZVFHMIN-ZFH-RV64-NEXT: .cfi_def_cfa_offset 16 -; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v8, (a0) -; ZVFHMIN-ZFH-RV64-NEXT: vle32.v v9, (a1) -; ZVFHMIN-ZFH-RV64-NEXT: mv a1, sp -; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a1) -; ZVFHMIN-ZFH-RV64-NEXT: vfncvt.f.f.w v8, v9 -; ZVFHMIN-ZFH-RV64-NEXT: lui a1, 8 -; ZVFHMIN-ZFH-RV64-NEXT: vxor.vx v8, v8, a1 -; ZVFHMIN-ZFH-RV64-NEXT: addi a1, sp, 8 -; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a1) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 2(sp) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 10(sp) -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa5, fa4 -; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 0(sp) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa3, 8(sp) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa2, 4(sp) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa1, 12(sp) -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa4, fa3 -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa2, fa1 -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 6(sp) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 14(sp) -; ZVFHMIN-ZFH-RV64-NEXT: vmv.v.x v8, a2 -; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a3 -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa5, fa4 -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a0) -; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, 16 -; ZVFHMIN-ZFH-RV64-NEXT: ret -; -; ZVFHMIN-ZFHIN-RV32-LABEL: copysign_neg_trunc_v4f16_v4f32: -; ZVFHMIN-ZFHIN-RV32: # %bb.0: -; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, -16 -; ZVFHMIN-ZFHIN-RV32-NEXT: .cfi_def_cfa_offset 16 -; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a0) -; ZVFHMIN-ZFHIN-RV32-NEXT: vle32.v v9, (a1) -; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp -; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1) -; ZVFHMIN-ZFHIN-RV32-NEXT: vfncvt.f.f.w v8, v9 -; ZVFHMIN-ZFHIN-RV32-NEXT: lui a1, 8 -; ZVFHMIN-ZFHIN-RV32-NEXT: vxor.vx v8, v8, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: addi a2, sp, 8 -; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a2) -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a2, 2(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: addi a3, a1, -1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: lh a4, 10(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: lui a5, 1048568 -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 0(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 8(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a2, a2, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a7, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a4, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a4, 4(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 12(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a6, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a2, 6(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 14(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a4, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a6, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a0) -; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, 16 -; ZVFHMIN-ZFHIN-RV32-NEXT: ret -; -; ZVFHMIN-ZFHIN-RV64-LABEL: copysign_neg_trunc_v4f16_v4f32: -; ZVFHMIN-ZFHIN-RV64: # %bb.0: -; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, -16 -; ZVFHMIN-ZFHIN-RV64-NEXT: .cfi_def_cfa_offset 16 -; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a0) -; ZVFHMIN-ZFHIN-RV64-NEXT: vle32.v v9, (a1) -; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp -; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1) -; ZVFHMIN-ZFHIN-RV64-NEXT: vfncvt.f.f.w v8, v9 -; ZVFHMIN-ZFHIN-RV64-NEXT: lui a1, 8 -; ZVFHMIN-ZFHIN-RV64-NEXT: vxor.vx v8, v8, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: addi a2, sp, 8 -; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a2) -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a2, 2(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a3, a1, -1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: lh a4, 10(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: lui a5, 1048568 -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 0(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 8(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a2, a2, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a7, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a4, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a4, 4(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 12(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v8, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a6, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a2, 6(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 14(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a4, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a6, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a0) -; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, 16 -; ZVFHMIN-ZFHIN-RV64-NEXT: ret +; ZVFHMIN-LABEL: copysign_neg_trunc_v4f16_v4f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: vle32.v v9, (a1) +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a2, a1, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a2 +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vxor.vx v9, v10, a1 +; ZVFHMIN-NEXT: vand.vx v9, v9, a1 +; ZVFHMIN-NEXT: vor.vv v8, v8, v9 +; ZVFHMIN-NEXT: vse16.v v8, (a0) +; ZVFHMIN-NEXT: ret %a = load <4 x half>, ptr %x %b = load <4 x float>, ptr %y %c = fneg <4 x float> %b @@ -2582,177 +953,22 @@ define void @copysign_neg_trunc_v3f16_v3f32(ptr %x, ptr %y) { ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; -; ZVFHMIN-ZFH-RV32-LABEL: copysign_neg_trunc_v3f16_v3f32: -; ZVFHMIN-ZFH-RV32: # %bb.0: -; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, -16 -; ZVFHMIN-ZFH-RV32-NEXT: .cfi_def_cfa_offset 16 -; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 3, e16, mf4, ta, ma -; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a0) -; ZVFHMIN-ZFH-RV32-NEXT: vle32.v v9, (a1) -; ZVFHMIN-ZFH-RV32-NEXT: mv a1, sp -; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a1) -; ZVFHMIN-ZFH-RV32-NEXT: vfncvt.f.f.w v8, v9 -; ZVFHMIN-ZFH-RV32-NEXT: lui a1, 8 -; ZVFHMIN-ZFH-RV32-NEXT: vxor.vx v8, v8, a1 -; ZVFHMIN-ZFH-RV32-NEXT: addi a1, sp, 8 -; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a1) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 2(sp) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 10(sp) -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa5, fa4 -; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 0(sp) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa3, 8(sp) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa2, 4(sp) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa1, 12(sp) -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa4, fa3 -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa2, fa1 -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 6(sp) -; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 14(sp) -; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.x v8, a2 -; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a3 -; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa5, fa4 -; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 3, e16, mf4, ta, ma -; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a0) -; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, 16 -; ZVFHMIN-ZFH-RV32-NEXT: ret -; -; ZVFHMIN-ZFH-RV64-LABEL: copysign_neg_trunc_v3f16_v3f32: -; ZVFHMIN-ZFH-RV64: # %bb.0: -; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, -16 -; ZVFHMIN-ZFH-RV64-NEXT: .cfi_def_cfa_offset 16 -; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 3, e16, mf4, ta, ma -; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v8, (a0) -; ZVFHMIN-ZFH-RV64-NEXT: vle32.v v9, (a1) -; ZVFHMIN-ZFH-RV64-NEXT: mv a1, sp -; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a1) -; ZVFHMIN-ZFH-RV64-NEXT: vfncvt.f.f.w v8, v9 -; ZVFHMIN-ZFH-RV64-NEXT: lui a1, 8 -; ZVFHMIN-ZFH-RV64-NEXT: vxor.vx v8, v8, a1 -; ZVFHMIN-ZFH-RV64-NEXT: addi a1, sp, 8 -; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a1) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 2(sp) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 10(sp) -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa5, fa4 -; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 0(sp) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa3, 8(sp) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa2, 4(sp) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa1, 12(sp) -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa4, fa3 -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a2, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa2, fa1 -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a3, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 6(sp) -; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 14(sp) -; ZVFHMIN-ZFH-RV64-NEXT: vmv.v.x v8, a2 -; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a3 -; ZVFHMIN-ZFH-RV64-NEXT: fsgnj.h fa5, fa5, fa4 -; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa5 -; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 3, e16, mf4, ta, ma -; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a0) -; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, 16 -; ZVFHMIN-ZFH-RV64-NEXT: ret -; -; ZVFHMIN-ZFHIN-RV32-LABEL: copysign_neg_trunc_v3f16_v3f32: -; ZVFHMIN-ZFHIN-RV32: # %bb.0: -; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, -16 -; ZVFHMIN-ZFHIN-RV32-NEXT: .cfi_def_cfa_offset 16 -; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 3, e16, mf4, ta, ma -; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a0) -; ZVFHMIN-ZFHIN-RV32-NEXT: vle32.v v9, (a1) -; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp -; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1) -; ZVFHMIN-ZFHIN-RV32-NEXT: vfncvt.f.f.w v8, v9 -; ZVFHMIN-ZFHIN-RV32-NEXT: lui a1, 8 -; ZVFHMIN-ZFHIN-RV32-NEXT: vxor.vx v8, v8, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: addi a2, sp, 8 -; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a2) -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a2, 2(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: addi a3, a1, -1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: lh a4, 10(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: lui a5, 1048568 -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a6, 0(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a7, 8(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a2, a2, a4 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a7, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a4, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a4, 4(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 12(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a2 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a6, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: lhu a2, 6(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: lh a6, 14(sp) -; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a4, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a3 -; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a6, a5 -; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 3, e16, mf4, ta, ma -; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a0) -; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, 16 -; ZVFHMIN-ZFHIN-RV32-NEXT: ret -; -; ZVFHMIN-ZFHIN-RV64-LABEL: copysign_neg_trunc_v3f16_v3f32: -; ZVFHMIN-ZFHIN-RV64: # %bb.0: -; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, -16 -; ZVFHMIN-ZFHIN-RV64-NEXT: .cfi_def_cfa_offset 16 -; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 3, e16, mf4, ta, ma -; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a0) -; ZVFHMIN-ZFHIN-RV64-NEXT: vle32.v v9, (a1) -; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp -; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 4, e16, mf4, ta, ma -; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1) -; ZVFHMIN-ZFHIN-RV64-NEXT: vfncvt.f.f.w v8, v9 -; ZVFHMIN-ZFHIN-RV64-NEXT: lui a1, 8 -; ZVFHMIN-ZFHIN-RV64-NEXT: vxor.vx v8, v8, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: addi a2, sp, 8 -; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a2) -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a2, 2(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a3, a1, -1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: lh a4, 10(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: lui a5, 1048568 -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a6, 0(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a7, 8(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a2, a2, a4 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a7, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a4, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a4, 4(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 12(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v8, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a2 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a4, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a6, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: lhu a2, 6(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: lh a6, 14(sp) -; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a4, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a3 -; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a6, a5 -; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a1 -; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 3, e16, mf4, ta, ma -; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a0) -; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, 16 -; ZVFHMIN-ZFHIN-RV64-NEXT: ret +; ZVFHMIN-LABEL: copysign_neg_trunc_v3f16_v3f32: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 3, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: vle32.v v9, (a1) +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a2, a1, -1 +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a2 +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vxor.vx v9, v10, a1 +; ZVFHMIN-NEXT: vand.vx v9, v9, a1 +; ZVFHMIN-NEXT: vsetivli zero, 3, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vor.vv v8, v8, v9 +; ZVFHMIN-NEXT: vse16.v v8, (a0) +; ZVFHMIN-NEXT: ret %a = load <3 x half>, ptr %x %b = load <3 x float>, ptr %y %c = fneg <3 x float> %b @@ -5954,3 +4170,10 @@ define void @fnmadd_fmuladd_v2f64(ptr %x, ptr %y, ptr %z) { store <2 x double> %d, ptr %x ret void } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; ZVFHMIN-RV32: {{.*}} +; ZVFHMIN-RV64: {{.*}} +; ZVFHMIN-ZFH-RV32: {{.*}} +; ZVFHMIN-ZFH-RV64: {{.*}} +; ZVFHMIN-ZFHIN-RV32: {{.*}} +; ZVFHMIN-ZFHIN-RV64: {{.*}} diff --git a/llvm/test/CodeGen/Thumb2/mve-fmath.ll b/llvm/test/CodeGen/Thumb2/mve-fmath.ll index d747da76a45fae..ad8921d2f7b026 100644 --- a/llvm/test/CodeGen/Thumb2/mve-fmath.ll +++ b/llvm/test/CodeGen/Thumb2/mve-fmath.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s -; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s -; RUN: llc -early-live-intervals -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s -; RUN: llc -early-live-intervals -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,FULLFP16 +; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,MVEFP +; RUN: llc -early-live-intervals -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,FULLFP16 +; RUN: llc -early-live-intervals -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,MVEFP define arm_aapcs_vfpcc <4 x float> @sqrt_float32_t(<4 x float> %src) { ; CHECK-LABEL: sqrt_float32_t: @@ -1091,107 +1091,123 @@ entry: } define arm_aapcs_vfpcc <4 x float> @copysign_float32_t(<4 x float> %src1, <4 x float> %src2) { -; CHECK-LABEL: copysign_float32_t: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r7, lr} -; CHECK-NEXT: push {r4, r5, r7, lr} -; CHECK-NEXT: vmov r12, r1, d2 -; CHECK-NEXT: vmov r2, lr, d3 -; CHECK-NEXT: vmov r3, r0, d0 -; CHECK-NEXT: vmov r4, r5, d1 -; CHECK-NEXT: lsrs r1, r1, #31 -; CHECK-NEXT: bfi r0, r1, #31, #1 -; CHECK-NEXT: lsrs r1, r2, #31 -; CHECK-NEXT: bfi r4, r1, #31, #1 -; CHECK-NEXT: lsr.w r1, lr, #31 -; CHECK-NEXT: bfi r5, r1, #31, #1 -; CHECK-NEXT: lsr.w r1, r12, #31 -; CHECK-NEXT: bfi r3, r1, #31, #1 -; CHECK-NEXT: vmov s2, r4 -; CHECK-NEXT: vmov s3, r5 -; CHECK-NEXT: vmov s1, r0 -; CHECK-NEXT: vmov s0, r3 -; CHECK-NEXT: pop {r4, r5, r7, pc} +; FULLFP16-LABEL: copysign_float32_t: +; FULLFP16: @ %bb.0: @ %entry +; FULLFP16-NEXT: .save {r4, r5, r7, lr} +; FULLFP16-NEXT: push {r4, r5, r7, lr} +; FULLFP16-NEXT: vmov r12, r1, d2 +; FULLFP16-NEXT: vmov r2, lr, d3 +; FULLFP16-NEXT: vmov r3, r0, d0 +; FULLFP16-NEXT: vmov r4, r5, d1 +; FULLFP16-NEXT: lsrs r1, r1, #31 +; FULLFP16-NEXT: bfi r0, r1, #31, #1 +; FULLFP16-NEXT: lsrs r1, r2, #31 +; FULLFP16-NEXT: bfi r4, r1, #31, #1 +; FULLFP16-NEXT: lsr.w r1, lr, #31 +; FULLFP16-NEXT: bfi r5, r1, #31, #1 +; FULLFP16-NEXT: lsr.w r1, r12, #31 +; FULLFP16-NEXT: bfi r3, r1, #31, #1 +; FULLFP16-NEXT: vmov s2, r4 +; FULLFP16-NEXT: vmov s3, r5 +; FULLFP16-NEXT: vmov s1, r0 +; FULLFP16-NEXT: vmov s0, r3 +; FULLFP16-NEXT: pop {r4, r5, r7, pc} +; +; MVEFP-LABEL: copysign_float32_t: +; MVEFP: @ %bb.0: @ %entry +; MVEFP-NEXT: vmov.i32 q2, #0x80000000 +; MVEFP-NEXT: vbic.i32 q0, #0x80000000 +; MVEFP-NEXT: vand q1, q1, q2 +; MVEFP-NEXT: vorr q0, q0, q1 +; MVEFP-NEXT: bx lr entry: %0 = call fast <4 x float> @llvm.copysign.v4f32(<4 x float> %src1, <4 x float> %src2) ret <4 x float> %0 } define arm_aapcs_vfpcc <8 x half> @copysign_float16_t(<8 x half> %src1, <8 x half> %src2) { -; CHECK-LABEL: copysign_float16_t: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .pad #32 -; CHECK-NEXT: sub sp, #32 -; CHECK-NEXT: vmovx.f16 s8, s4 -; CHECK-NEXT: vstr.16 s8, [sp, #24] -; CHECK-NEXT: vstr.16 s4, [sp, #28] -; CHECK-NEXT: vmovx.f16 s4, s5 -; CHECK-NEXT: vstr.16 s4, [sp, #16] -; CHECK-NEXT: vmovx.f16 s4, s6 -; CHECK-NEXT: vstr.16 s5, [sp, #20] -; CHECK-NEXT: vstr.16 s4, [sp, #8] -; CHECK-NEXT: vmovx.f16 s4, s7 -; CHECK-NEXT: vstr.16 s6, [sp, #12] -; CHECK-NEXT: vstr.16 s4, [sp] -; CHECK-NEXT: vstr.16 s7, [sp, #4] -; CHECK-NEXT: ldrb.w r0, [sp, #25] -; CHECK-NEXT: vmovx.f16 s4, s0 -; CHECK-NEXT: vabs.f16 s4, s4 -; CHECK-NEXT: vneg.f16 s6, s4 -; CHECK-NEXT: lsls r0, r0, #24 -; CHECK-NEXT: it pl -; CHECK-NEXT: vmovpl.f32 s6, s4 -; CHECK-NEXT: ldrb.w r0, [sp, #29] -; CHECK-NEXT: vabs.f16 s4, s0 -; CHECK-NEXT: vneg.f16 s0, s4 -; CHECK-NEXT: lsls r0, r0, #24 -; CHECK-NEXT: it pl -; CHECK-NEXT: vmovpl.f32 s0, s4 -; CHECK-NEXT: ldrb.w r0, [sp, #17] -; CHECK-NEXT: vmovx.f16 s4, s1 -; CHECK-NEXT: vabs.f16 s4, s4 -; CHECK-NEXT: vins.f16 s0, s6 -; CHECK-NEXT: vneg.f16 s6, s4 -; CHECK-NEXT: lsls r0, r0, #24 -; CHECK-NEXT: it pl -; CHECK-NEXT: vmovpl.f32 s6, s4 -; CHECK-NEXT: ldrb.w r0, [sp, #21] -; CHECK-NEXT: vabs.f16 s4, s1 -; CHECK-NEXT: vneg.f16 s1, s4 -; CHECK-NEXT: lsls r0, r0, #24 -; CHECK-NEXT: it pl -; CHECK-NEXT: vmovpl.f32 s1, s4 -; CHECK-NEXT: ldrb.w r0, [sp, #9] -; CHECK-NEXT: vmovx.f16 s4, s2 -; CHECK-NEXT: vabs.f16 s4, s4 -; CHECK-NEXT: vins.f16 s1, s6 -; CHECK-NEXT: vneg.f16 s6, s4 -; CHECK-NEXT: lsls r0, r0, #24 -; CHECK-NEXT: it pl -; CHECK-NEXT: vmovpl.f32 s6, s4 -; CHECK-NEXT: ldrb.w r0, [sp, #13] -; CHECK-NEXT: vabs.f16 s4, s2 -; CHECK-NEXT: vneg.f16 s2, s4 -; CHECK-NEXT: lsls r0, r0, #24 -; CHECK-NEXT: it pl -; CHECK-NEXT: vmovpl.f32 s2, s4 -; CHECK-NEXT: ldrb.w r0, [sp, #1] -; CHECK-NEXT: vmovx.f16 s4, s3 -; CHECK-NEXT: vabs.f16 s4, s4 -; CHECK-NEXT: vins.f16 s2, s6 -; CHECK-NEXT: vneg.f16 s6, s4 -; CHECK-NEXT: lsls r0, r0, #24 -; CHECK-NEXT: it pl -; CHECK-NEXT: vmovpl.f32 s6, s4 -; CHECK-NEXT: ldrb.w r0, [sp, #5] -; CHECK-NEXT: vabs.f16 s4, s3 -; CHECK-NEXT: vneg.f16 s3, s4 -; CHECK-NEXT: lsls r0, r0, #24 -; CHECK-NEXT: it pl -; CHECK-NEXT: vmovpl.f32 s3, s4 -; CHECK-NEXT: vins.f16 s3, s6 -; CHECK-NEXT: add sp, #32 -; CHECK-NEXT: bx lr +; FULLFP16-LABEL: copysign_float16_t: +; FULLFP16: @ %bb.0: @ %entry +; FULLFP16-NEXT: .pad #32 +; FULLFP16-NEXT: sub sp, #32 +; FULLFP16-NEXT: vmovx.f16 s8, s4 +; FULLFP16-NEXT: vstr.16 s8, [sp, #24] +; FULLFP16-NEXT: vstr.16 s4, [sp, #28] +; FULLFP16-NEXT: vmovx.f16 s4, s5 +; FULLFP16-NEXT: vstr.16 s4, [sp, #16] +; FULLFP16-NEXT: vmovx.f16 s4, s6 +; FULLFP16-NEXT: vstr.16 s5, [sp, #20] +; FULLFP16-NEXT: vstr.16 s4, [sp, #8] +; FULLFP16-NEXT: vmovx.f16 s4, s7 +; FULLFP16-NEXT: vstr.16 s6, [sp, #12] +; FULLFP16-NEXT: vstr.16 s4, [sp] +; FULLFP16-NEXT: vstr.16 s7, [sp, #4] +; FULLFP16-NEXT: ldrb.w r0, [sp, #25] +; FULLFP16-NEXT: vmovx.f16 s4, s0 +; FULLFP16-NEXT: vabs.f16 s4, s4 +; FULLFP16-NEXT: vneg.f16 s6, s4 +; FULLFP16-NEXT: lsls r0, r0, #24 +; FULLFP16-NEXT: it pl +; FULLFP16-NEXT: vmovpl.f32 s6, s4 +; FULLFP16-NEXT: ldrb.w r0, [sp, #29] +; FULLFP16-NEXT: vabs.f16 s4, s0 +; FULLFP16-NEXT: vneg.f16 s0, s4 +; FULLFP16-NEXT: lsls r0, r0, #24 +; FULLFP16-NEXT: it pl +; FULLFP16-NEXT: vmovpl.f32 s0, s4 +; FULLFP16-NEXT: ldrb.w r0, [sp, #17] +; FULLFP16-NEXT: vmovx.f16 s4, s1 +; FULLFP16-NEXT: vabs.f16 s4, s4 +; FULLFP16-NEXT: vins.f16 s0, s6 +; FULLFP16-NEXT: vneg.f16 s6, s4 +; FULLFP16-NEXT: lsls r0, r0, #24 +; FULLFP16-NEXT: it pl +; FULLFP16-NEXT: vmovpl.f32 s6, s4 +; FULLFP16-NEXT: ldrb.w r0, [sp, #21] +; FULLFP16-NEXT: vabs.f16 s4, s1 +; FULLFP16-NEXT: vneg.f16 s1, s4 +; FULLFP16-NEXT: lsls r0, r0, #24 +; FULLFP16-NEXT: it pl +; FULLFP16-NEXT: vmovpl.f32 s1, s4 +; FULLFP16-NEXT: ldrb.w r0, [sp, #9] +; FULLFP16-NEXT: vmovx.f16 s4, s2 +; FULLFP16-NEXT: vabs.f16 s4, s4 +; FULLFP16-NEXT: vins.f16 s1, s6 +; FULLFP16-NEXT: vneg.f16 s6, s4 +; FULLFP16-NEXT: lsls r0, r0, #24 +; FULLFP16-NEXT: it pl +; FULLFP16-NEXT: vmovpl.f32 s6, s4 +; FULLFP16-NEXT: ldrb.w r0, [sp, #13] +; FULLFP16-NEXT: vabs.f16 s4, s2 +; FULLFP16-NEXT: vneg.f16 s2, s4 +; FULLFP16-NEXT: lsls r0, r0, #24 +; FULLFP16-NEXT: it pl +; FULLFP16-NEXT: vmovpl.f32 s2, s4 +; FULLFP16-NEXT: ldrb.w r0, [sp, #1] +; FULLFP16-NEXT: vmovx.f16 s4, s3 +; FULLFP16-NEXT: vabs.f16 s4, s4 +; FULLFP16-NEXT: vins.f16 s2, s6 +; FULLFP16-NEXT: vneg.f16 s6, s4 +; FULLFP16-NEXT: lsls r0, r0, #24 +; FULLFP16-NEXT: it pl +; FULLFP16-NEXT: vmovpl.f32 s6, s4 +; FULLFP16-NEXT: ldrb.w r0, [sp, #5] +; FULLFP16-NEXT: vabs.f16 s4, s3 +; FULLFP16-NEXT: vneg.f16 s3, s4 +; FULLFP16-NEXT: lsls r0, r0, #24 +; FULLFP16-NEXT: it pl +; FULLFP16-NEXT: vmovpl.f32 s3, s4 +; FULLFP16-NEXT: vins.f16 s3, s6 +; FULLFP16-NEXT: add sp, #32 +; FULLFP16-NEXT: bx lr +; +; MVEFP-LABEL: copysign_float16_t: +; MVEFP: @ %bb.0: @ %entry +; MVEFP-NEXT: vmov.i16 q2, #0x8000 +; MVEFP-NEXT: vbic.i16 q0, #0x8000 +; MVEFP-NEXT: vand q1, q1, q2 +; MVEFP-NEXT: vorr q0, q0, q1 +; MVEFP-NEXT: bx lr entry: %0 = call fast <8 x half> @llvm.copysign.v8f16(<8 x half> %src1, <8 x half> %src2) ret <8 x half> %0 diff --git a/llvm/test/CodeGen/WebAssembly/simd-unsupported.ll b/llvm/test/CodeGen/WebAssembly/simd-unsupported.ll index 1d6e073271efa2..4660e1bce1ee65 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-unsupported.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-unsupported.ll @@ -354,7 +354,11 @@ define <2 x i64> @rotr_v2i64(<2 x i64> %x, <2 x i64> %y) { ; ============================================================================== ; CHECK-LABEL: copysign_v4f32: -; CHECK: f32.copysign +; CHECK: v128.const +; CHECK-NEXT: v128.and +; CHECK-NEXT: v128.const +; CHECK-NEXT: v128.and +; CHECK-NEXT: v128.or declare <4 x float> @llvm.copysign.v4f32(<4 x float>, <4 x float>) define <4 x float> @copysign_v4f32(<4 x float> %x, <4 x float> %y) { %v = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> %y) @@ -454,7 +458,11 @@ define <4 x float> @round_v4f32(<4 x float> %x) { ; ============================================================================== ; CHECK-LABEL: copysign_v2f64: -; CHECK: f64.copysign +; CHECK: v128.const +; CHECK-NEXT: v128.and +; CHECK-NEXT: v128.const +; CHECK-NEXT: v128.and +; CHECK-NEXT: v128.or declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>) define <2 x double> @copysign_v2f64(<2 x double> %x, <2 x double> %y) { %v = call <2 x double> @llvm.copysign.v2f64(<2 x double> %x, <2 x double> %y)