From b21e3282864c9f7ad656c64bc375f5869ef76d19 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 12 Feb 2024 10:25:13 +0100 Subject: [PATCH 01/59] [IR] Add ptradd fast path in accumulateConstantOffset() (NFC) For getelementptr i8 (aka ptradd) we can skip the whole logic and directly use the offset. As we're now canonicalizing to this form, it's pretty common and worth having a fast-path for. --- llvm/lib/IR/Operator.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/llvm/lib/IR/Operator.cpp b/llvm/lib/IR/Operator.cpp index 16a89534b4b3ec..caf8fe654a36dc 100644 --- a/llvm/lib/IR/Operator.cpp +++ b/llvm/lib/IR/Operator.cpp @@ -108,6 +108,15 @@ bool GEPOperator::accumulateConstantOffset( bool GEPOperator::accumulateConstantOffset( Type *SourceType, ArrayRef Index, const DataLayout &DL, APInt &Offset, function_ref ExternalAnalysis) { + // Fast path for canonical getelementptr i8 form. + if (SourceType->isIntegerTy(8) && !ExternalAnalysis) { + if (auto *CI = dyn_cast(Index.front())) { + Offset += CI->getValue().sextOrTrunc(Offset.getBitWidth()); + return true; + } + return false; + } + bool UsedExternalAnalysis = false; auto AccumulateOffset = [&](APInt Index, uint64_t Size) -> bool { Index = Index.sextOrTrunc(Offset.getBitWidth()); From 7bc079c85219ad6e954fb6071cd108151203c85e Mon Sep 17 00:00:00 2001 From: Alexey Bataev <5361294+alexey-bataev@users.noreply.github.com> Date: Mon, 12 Feb 2024 07:09:49 -0500 Subject: [PATCH 02/59] [TTI]Fallback to SingleSrcPermute shuffle kind, if no direct estimation for extract subvector. Many targets do not have cost for extractsubvector shuffle kind, but have the costs for single source permute. If there are no costs estimation for extractsubvector, better to switchto single source permute for better cost estimation. Reviewers: RKSimon, davemgreen, arsenm Reviewed By: RKSimon Pull Request: https://github.com/llvm/llvm-project/pull/79837 --- .../AArch64/AArch64TargetTransformInfo.cpp | 7 + .../AMDGPU/AMDGPUTargetTransformInfo.cpp | 7 + .../lib/Target/ARM/ARMTargetTransformInfo.cpp | 7 + .../lib/Target/X86/X86TargetTransformInfo.cpp | 2 + .../Analysis/CostModel/AArch64/reduce-fadd.ll | 48 +- .../CostModel/AArch64/reduce-minmax.ll | 8 +- .../X86/shuffle-extract_subvector-codesize.ll | 1338 ++++++++++++----- .../X86/shuffle-extract_subvector-latency.ll | 1338 ++++++++++++----- .../shuffle-extract_subvector-sizelatency.ll | 1338 ++++++++++++----- .../X86/shuffle-extract_subvector.ll | 1338 ++++++++++++----- .../dot-product-float.ll | 44 +- 11 files changed, 3799 insertions(+), 1676 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index cdd2750521d2c9..af0b6873d170dd 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -3809,6 +3809,10 @@ InstructionCost AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, } Kind = improveShuffleKindFromMask(Kind, Mask, Tp, Index, SubTp); + // Treat extractsubvector as single op permutation. + bool IsExtractSubvector = Kind == TTI::SK_ExtractSubvector; + if (IsExtractSubvector && LT.second.isFixedLengthVector()) + Kind = TTI::SK_PermuteSingleSrc; // Check for broadcast loads, which are supported by the LD1R instruction. // In terms of code-size, the shuffle vector is free when a load + dup get @@ -3971,6 +3975,9 @@ InstructionCost AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, } } + // Restore optimal kind. + if (IsExtractSubvector) + Kind = TTI::SK_ExtractSubvector; return BaseT::getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index ebe0b8551b236a..31077dbc0b2cc4 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -1129,6 +1129,10 @@ InstructionCost GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, int Index, VectorType *SubTp, ArrayRef Args) { Kind = improveShuffleKindFromMask(Kind, Mask, VT, Index, SubTp); + // Treat extractsubvector as single op permutation. + bool IsExtractSubvector = Kind == TTI::SK_ExtractSubvector; + if (IsExtractSubvector) + Kind = TTI::SK_PermuteSingleSrc; if (ST->hasVOP3PInsts()) { if (cast(VT)->getNumElements() == 2 && @@ -1146,6 +1150,9 @@ InstructionCost GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, } } } + // Restore optimal kind. + if (IsExtractSubvector) + Kind = TTI::SK_ExtractSubvector; return BaseT::getShuffleCost(Kind, VT, Mask, CostKind, Index, SubTp); } diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index a1b11016de3332..3be894ad3bef2c 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -1214,6 +1214,10 @@ InstructionCost ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, int Index, VectorType *SubTp, ArrayRef Args) { Kind = improveShuffleKindFromMask(Kind, Mask, Tp, Index, SubTp); + // Treat extractsubvector as single op permutation. + bool IsExtractSubvector = Kind == TTI::SK_ExtractSubvector; + if (IsExtractSubvector) + Kind = TTI::SK_PermuteSingleSrc; if (ST->hasNEON()) { if (Kind == TTI::SK_Broadcast) { static const CostTblEntry NEONDupTbl[] = { @@ -1308,6 +1312,9 @@ InstructionCost ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, } } + // Restore optimal kind. + if (IsExtractSubvector) + Kind = TTI::SK_ExtractSubvector; int BaseCost = ST->hasMVEIntegerOps() && Tp->isVectorTy() ? ST->getMVEVectorCostFactor(TTI::TCK_RecipThroughput) : 1; diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index cd40b1d3b09332..be1a0948308999 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -1540,6 +1540,8 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, return ExtractCost + 2; // worst case pshufhw + pshufd } } + // If the extract subvector is not optimal, treat it as single op shuffle. + Kind = TTI::SK_PermuteSingleSrc; } // Subvector insertions are cheap if the subvectors are aligned. diff --git a/llvm/test/Analysis/CostModel/AArch64/reduce-fadd.ll b/llvm/test/Analysis/CostModel/AArch64/reduce-fadd.ll index 737437282d9bd2..954a836e44f283 100644 --- a/llvm/test/Analysis/CostModel/AArch64/reduce-fadd.ll +++ b/llvm/test/Analysis/CostModel/AArch64/reduce-fadd.ll @@ -59,19 +59,19 @@ define void @fast_fp_reductions() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %fadd_v8f16 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %fadd_v8f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %fadd_v11f16 = call fast half @llvm.vector.reduce.fadd.v11f16(half 0xH0000, <11 x half> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %fadd_v13f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v13f16(half 0xH0000, <13 x half> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %fadd_v13f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v13f16(half 0xH0000, <13 x half> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f32 = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %fadd_v8f32 = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %fadd_v8f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %fadd_v13f32 = call fast float @llvm.vector.reduce.fadd.v13f32(float 0.000000e+00, <13 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %fadd_v8f32 = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %fadd_v8f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %fadd_v13f32 = call fast float @llvm.vector.reduce.fadd.v13f32(float 0.000000e+00, <13 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %fadd_v5f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v5f32(float 0.000000e+00, <5 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fadd_v2f64 = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fadd_v2f64_reassoc = call reassoc double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fadd_v4f64 = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fadd_v4f64_reassoc = call reassoc double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %fadd_v7f64 = call fast double @llvm.vector.reduce.fadd.v7f64(double 0.000000e+00, <7 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %fadd_v9f64_reassoc = call reassoc double @llvm.vector.reduce.fadd.v9f64(double 0.000000e+00, <9 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %fadd_v4f64 = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %fadd_v4f64_reassoc = call reassoc double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %fadd_v7f64 = call fast double @llvm.vector.reduce.fadd.v7f64(double 0.000000e+00, <7 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %fadd_v9f64_reassoc = call reassoc double @llvm.vector.reduce.fadd.v9f64(double 0.000000e+00, <9 x double> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fadd_v4f8 = call reassoc bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR8000, <4 x bfloat> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %fadd_v4f128 = call reassoc fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void @@ -82,19 +82,19 @@ define void @fast_fp_reductions() { ; FP16-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %fadd_v8f16 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef) ; FP16-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %fadd_v8f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef) ; FP16-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %fadd_v11f16 = call fast half @llvm.vector.reduce.fadd.v11f16(half 0xH0000, <11 x half> undef) -; FP16-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %fadd_v13f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v13f16(half 0xH0000, <13 x half> undef) +; FP16-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %fadd_v13f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v13f16(half 0xH0000, <13 x half> undef) ; FP16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f32 = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef) ; FP16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef) -; FP16-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %fadd_v8f32 = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef) -; FP16-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %fadd_v8f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef) -; FP16-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %fadd_v13f32 = call fast float @llvm.vector.reduce.fadd.v13f32(float 0.000000e+00, <13 x float> undef) +; FP16-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %fadd_v8f32 = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef) +; FP16-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %fadd_v8f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef) +; FP16-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %fadd_v13f32 = call fast float @llvm.vector.reduce.fadd.v13f32(float 0.000000e+00, <13 x float> undef) ; FP16-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %fadd_v5f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v5f32(float 0.000000e+00, <5 x float> undef) ; FP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fadd_v2f64 = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef) ; FP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fadd_v2f64_reassoc = call reassoc double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef) -; FP16-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fadd_v4f64 = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef) -; FP16-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fadd_v4f64_reassoc = call reassoc double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef) -; FP16-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %fadd_v7f64 = call fast double @llvm.vector.reduce.fadd.v7f64(double 0.000000e+00, <7 x double> undef) -; FP16-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %fadd_v9f64_reassoc = call reassoc double @llvm.vector.reduce.fadd.v9f64(double 0.000000e+00, <9 x double> undef) +; FP16-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %fadd_v4f64 = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef) +; FP16-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %fadd_v4f64_reassoc = call reassoc double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef) +; FP16-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %fadd_v7f64 = call fast double @llvm.vector.reduce.fadd.v7f64(double 0.000000e+00, <7 x double> undef) +; FP16-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %fadd_v9f64_reassoc = call reassoc double @llvm.vector.reduce.fadd.v9f64(double 0.000000e+00, <9 x double> undef) ; FP16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fadd_v4f8 = call reassoc bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR8000, <4 x bfloat> undef) ; FP16-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %fadd_v4f128 = call reassoc fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef) ; FP16-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void @@ -105,19 +105,19 @@ define void @fast_fp_reductions() { ; BF16-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %fadd_v8f16 = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef) ; BF16-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %fadd_v8f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef) ; BF16-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %fadd_v11f16 = call fast half @llvm.vector.reduce.fadd.v11f16(half 0xH0000, <11 x half> undef) -; BF16-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %fadd_v13f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v13f16(half 0xH0000, <13 x half> undef) +; BF16-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %fadd_v13f16_reassoc = call reassoc half @llvm.vector.reduce.fadd.v13f16(half 0xH0000, <13 x half> undef) ; BF16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f32 = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef) ; BF16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef) -; BF16-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %fadd_v8f32 = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef) -; BF16-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %fadd_v8f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef) -; BF16-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %fadd_v13f32 = call fast float @llvm.vector.reduce.fadd.v13f32(float 0.000000e+00, <13 x float> undef) +; BF16-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %fadd_v8f32 = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef) +; BF16-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %fadd_v8f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef) +; BF16-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %fadd_v13f32 = call fast float @llvm.vector.reduce.fadd.v13f32(float 0.000000e+00, <13 x float> undef) ; BF16-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %fadd_v5f32_reassoc = call reassoc float @llvm.vector.reduce.fadd.v5f32(float 0.000000e+00, <5 x float> undef) ; BF16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fadd_v2f64 = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef) ; BF16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fadd_v2f64_reassoc = call reassoc double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef) -; BF16-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fadd_v4f64 = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef) -; BF16-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fadd_v4f64_reassoc = call reassoc double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef) -; BF16-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %fadd_v7f64 = call fast double @llvm.vector.reduce.fadd.v7f64(double 0.000000e+00, <7 x double> undef) -; BF16-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %fadd_v9f64_reassoc = call reassoc double @llvm.vector.reduce.fadd.v9f64(double 0.000000e+00, <9 x double> undef) +; BF16-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %fadd_v4f64 = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef) +; BF16-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %fadd_v4f64_reassoc = call reassoc double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef) +; BF16-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %fadd_v7f64 = call fast double @llvm.vector.reduce.fadd.v7f64(double 0.000000e+00, <7 x double> undef) +; BF16-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %fadd_v9f64_reassoc = call reassoc double @llvm.vector.reduce.fadd.v9f64(double 0.000000e+00, <9 x double> undef) ; BF16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f8 = call reassoc bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR8000, <4 x bfloat> undef) ; BF16-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %fadd_v4f128 = call reassoc fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef) ; BF16-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void diff --git a/llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll b/llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll index dcca118871418d..cf03026ceb75ad 100644 --- a/llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll +++ b/llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll @@ -165,11 +165,11 @@ define void @reduce_fmin16() { ; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2f16 = call half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef) ; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V4f16 = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef) ; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 174 for instruction: %V8f16 = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef) -; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %V16f16 = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef) +; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V16f16 = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef) ; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2f16m = call half @llvm.vector.reduce.fminimum.v2f16(<2 x half> undef) ; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V4f16m = call half @llvm.vector.reduce.fminimum.v4f16(<4 x half> undef) ; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 174 for instruction: %V8f16m = call half @llvm.vector.reduce.fminimum.v8f16(<8 x half> undef) -; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %V16f16m = call half @llvm.vector.reduce.fminimum.v16f16(<16 x half> undef) +; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V16f16m = call half @llvm.vector.reduce.fminimum.v16f16(<16 x half> undef) ; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-F16-LABEL: 'reduce_fmin16' @@ -199,11 +199,11 @@ define void @reduce_fmax16() { ; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2f16 = call half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef) ; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V4f16 = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef) ; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 174 for instruction: %V8f16 = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef) -; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %V16f16 = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef) +; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V16f16 = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef) ; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2f16m = call half @llvm.vector.reduce.fmaximum.v2f16(<2 x half> undef) ; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V4f16m = call half @llvm.vector.reduce.fmaximum.v4f16(<4 x half> undef) ; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 174 for instruction: %V8f16m = call half @llvm.vector.reduce.fmaximum.v8f16(<8 x half> undef) -; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %V16f16m = call half @llvm.vector.reduce.fmaximum.v16f16(<16 x half> undef) +; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V16f16m = call half @llvm.vector.reduce.fmaximum.v16f16(<16 x half> undef) ; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-F16-LABEL: 'reduce_fmax16' diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-codesize.ll b/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-codesize.ll index 7e897f41ee456f..9a86fbe105ec29 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-codesize.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-codesize.ll @@ -2,15 +2,15 @@ ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+sse2 | FileCheck %s -check-prefixes=SSE,SSE2 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+ssse3 | FileCheck %s -check-prefixes=SSE,SSSE3 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+sse4.2 | FileCheck %s -check-prefixes=SSE,SSE42 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx | FileCheck %s -check-prefixes=AVX -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx2 | FileCheck %s -check-prefixes=AVX -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx | FileCheck %s -check-prefixes=AVX,AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx2 | FileCheck %s -check-prefixes=AVX,AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512,AVX512VBMI ; ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=slm | FileCheck %s --check-prefixes=SSE,SLM ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=goldmont | FileCheck %s --check-prefixes=SSE,GLM -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=btver2 | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=btver2 | FileCheck %s --check-prefixes=AVX,AVX1 ; ; Verify the cost model for extract_subector style shuffles. @@ -38,7 +38,7 @@ define void @test_vXf64(<4 x double> %src256, <8 x double> %src512) { ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void @@ -51,7 +51,7 @@ define void @test_vXf64(<4 x double> %src256, <8 x double> %src512) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void @@ -90,7 +90,7 @@ define void @test_vXi64(<4 x i64> %src256, <8 x i64> %src512) { ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_45 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V512_2345 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2345 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; @@ -102,7 +102,7 @@ define void @test_vXi64(<4 x i64> %src256, <8 x i64> %src512) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V512_2345 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2345 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; @@ -238,9 +238,9 @@ define void @test_vXi16(<4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> @@ -262,9 +262,9 @@ define void @test_vXi16(<4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> @@ -297,9 +297,9 @@ define void @test_vXi16(<4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> @@ -321,9 +321,9 @@ define void @test_vXi16(<4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> @@ -356,9 +356,9 @@ define void @test_vXi16(<4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> @@ -380,9 +380,9 @@ define void @test_vXi16(<4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> @@ -397,123 +397,300 @@ define void @test_vXi16(<4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; -; AVX-LABEL: 'test_vXi16' -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; AVX1-LABEL: 'test_vXi16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; -; AVX512-LABEL: 'test_vXi16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; AVX2-LABEL: 'test_vXi16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512F-LABEL: 'test_vXi16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BW-LABEL: 'test_vXi16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMI-LABEL: 'test_vXi16' +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'test_vXi16' ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> @@ -533,9 +710,9 @@ define void @test_vXi16(<4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> @@ -557,9 +734,9 @@ define void @test_vXi16(<4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> @@ -592,9 +769,9 @@ define void @test_vXi16(<4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> @@ -616,9 +793,9 @@ define void @test_vXi16(<4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> @@ -709,9 +886,9 @@ define void @test_vXi8(<8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <6 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> @@ -733,9 +910,9 @@ define void @test_vXi8(<8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <6 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> @@ -828,9 +1005,9 @@ define void @test_vXi8(<8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <6 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> @@ -852,9 +1029,9 @@ define void @test_vXi8(<8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <6 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> @@ -947,9 +1124,9 @@ define void @test_vXi8(<8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <6 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> @@ -971,9 +1148,9 @@ define void @test_vXi8(<8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <6 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> @@ -1050,243 +1227,600 @@ define void @test_vXi8(<8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <6 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; -; AVX-LABEL: 'test_vXi8' -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; AVX1-LABEL: 'test_vXi8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; -; AVX512-LABEL: 'test_vXi8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; AVX2-LABEL: 'test_vXi8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512F-LABEL: 'test_vXi8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BW-LABEL: 'test_vXi8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMI-LABEL: 'test_vXi8' +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'test_vXi8' ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> @@ -1304,9 +1838,9 @@ define void @test_vXi8(<8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <6 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> @@ -1328,9 +1862,9 @@ define void @test_vXi8(<8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <6 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> @@ -1423,9 +1957,9 @@ define void @test_vXi8(<8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <6 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> @@ -1447,9 +1981,9 @@ define void @test_vXi8(<8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <6 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-latency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-latency.ll index 94e26227539bc4..393dec82428b34 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-latency.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-latency.ll @@ -2,15 +2,15 @@ ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+sse2 | FileCheck %s -check-prefixes=SSE,SSE2 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+ssse3 | FileCheck %s -check-prefixes=SSE,SSSE3 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+sse4.2 | FileCheck %s -check-prefixes=SSE,SSE42 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx | FileCheck %s -check-prefixes=AVX -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx2 | FileCheck %s -check-prefixes=AVX -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx | FileCheck %s -check-prefixes=AVX,AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx2 | FileCheck %s -check-prefixes=AVX,AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512,AVX512VBMI ; ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=slm | FileCheck %s --check-prefixes=SSE,SLM ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=goldmont | FileCheck %s --check-prefixes=SSE,GLM -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=btver2 | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=btver2 | FileCheck %s --check-prefixes=AVX,AVX1 ; ; Verify the cost model for extract_subector style shuffles. @@ -38,7 +38,7 @@ define void @test_vXf64(<4 x double> %src256, <8 x double> %src512) { ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void @@ -51,7 +51,7 @@ define void @test_vXf64(<4 x double> %src256, <8 x double> %src512) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void @@ -90,7 +90,7 @@ define void @test_vXi64(<4 x i64> %src256, <8 x i64> %src512) { ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_45 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V512_2345 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2345 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; @@ -102,7 +102,7 @@ define void @test_vXi64(<4 x i64> %src256, <8 x i64> %src512) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V512_2345 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512_2345 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; @@ -238,9 +238,9 @@ define void @test_vXi16(<4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> @@ -262,9 +262,9 @@ define void @test_vXi16(<4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> @@ -297,9 +297,9 @@ define void @test_vXi16(<4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> @@ -321,9 +321,9 @@ define void @test_vXi16(<4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> @@ -356,9 +356,9 @@ define void @test_vXi16(<4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> @@ -380,9 +380,9 @@ define void @test_vXi16(<4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> @@ -397,123 +397,300 @@ define void @test_vXi16(<4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; -; AVX-LABEL: 'test_vXi16' -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; AVX1-LABEL: 'test_vXi16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; -; AVX512-LABEL: 'test_vXi16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; AVX2-LABEL: 'test_vXi16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512F-LABEL: 'test_vXi16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BW-LABEL: 'test_vXi16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMI-LABEL: 'test_vXi16' +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'test_vXi16' ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> @@ -533,9 +710,9 @@ define void @test_vXi16(<4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> @@ -557,9 +734,9 @@ define void @test_vXi16(<4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> @@ -592,9 +769,9 @@ define void @test_vXi16(<4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> @@ -616,9 +793,9 @@ define void @test_vXi16(<4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> @@ -709,9 +886,9 @@ define void @test_vXi8(<8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <6 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> @@ -733,9 +910,9 @@ define void @test_vXi8(<8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <6 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> @@ -828,9 +1005,9 @@ define void @test_vXi8(<8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <6 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> @@ -852,9 +1029,9 @@ define void @test_vXi8(<8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <6 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> @@ -947,9 +1124,9 @@ define void @test_vXi8(<8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <6 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> @@ -971,9 +1148,9 @@ define void @test_vXi8(<8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <6 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> @@ -1050,243 +1227,600 @@ define void @test_vXi8(<8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <6 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; -; AVX-LABEL: 'test_vXi8' -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; AVX1-LABEL: 'test_vXi8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; -; AVX512-LABEL: 'test_vXi8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; AVX2-LABEL: 'test_vXi8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512F-LABEL: 'test_vXi8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BW-LABEL: 'test_vXi8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMI-LABEL: 'test_vXi8' +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'test_vXi8' ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> @@ -1304,9 +1838,9 @@ define void @test_vXi8(<8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <6 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> @@ -1328,9 +1862,9 @@ define void @test_vXi8(<8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <6 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> @@ -1423,9 +1957,9 @@ define void @test_vXi8(<8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <6 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> @@ -1447,9 +1981,9 @@ define void @test_vXi8(<8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <6 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-sizelatency.ll index 787a96f3bfde88..63bb07bf4fd894 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-sizelatency.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector-sizelatency.ll @@ -2,15 +2,15 @@ ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse2 | FileCheck %s -check-prefixes=SSE,SSE2 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+ssse3 | FileCheck %s -check-prefixes=SSE,SSSE3 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse4.2 | FileCheck %s -check-prefixes=SSE,SSE42 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx | FileCheck %s -check-prefixes=AVX -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx2 | FileCheck %s -check-prefixes=AVX -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx | FileCheck %s -check-prefixes=AVX,AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx2 | FileCheck %s -check-prefixes=AVX,AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512,AVX512VBMI ; ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=slm | FileCheck %s --check-prefixes=SSE,SLM ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=goldmont | FileCheck %s --check-prefixes=SSE,GLM -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=btver2 | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=btver2 | FileCheck %s --check-prefixes=AVX,AVX1 ; ; Verify the cost model for extract_subector style shuffles. @@ -38,7 +38,7 @@ define void @test_vXf64(<4 x double> %src256, <8 x double> %src512) { ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void @@ -51,7 +51,7 @@ define void @test_vXf64(<4 x double> %src256, <8 x double> %src512) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void @@ -90,7 +90,7 @@ define void @test_vXi64(<4 x i64> %src256, <8 x i64> %src512) { ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_45 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V512_2345 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2345 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; @@ -102,7 +102,7 @@ define void @test_vXi64(<4 x i64> %src256, <8 x i64> %src512) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V512_2345 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2345 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; @@ -238,9 +238,9 @@ define void @test_vXi16(<4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> @@ -262,9 +262,9 @@ define void @test_vXi16(<4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> @@ -297,9 +297,9 @@ define void @test_vXi16(<4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> @@ -321,9 +321,9 @@ define void @test_vXi16(<4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> @@ -356,9 +356,9 @@ define void @test_vXi16(<4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> @@ -380,9 +380,9 @@ define void @test_vXi16(<4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> @@ -397,123 +397,300 @@ define void @test_vXi16(<4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; -; AVX-LABEL: 'test_vXi16' -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; AVX1-LABEL: 'test_vXi16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; -; AVX512-LABEL: 'test_vXi16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; AVX2-LABEL: 'test_vXi16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512F-LABEL: 'test_vXi16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BW-LABEL: 'test_vXi16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMI-LABEL: 'test_vXi16' +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'test_vXi16' ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> @@ -533,9 +710,9 @@ define void @test_vXi16(<4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> @@ -557,9 +734,9 @@ define void @test_vXi16(<4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> @@ -592,9 +769,9 @@ define void @test_vXi16(<4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> @@ -616,9 +793,9 @@ define void @test_vXi16(<4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> @@ -709,9 +886,9 @@ define void @test_vXi8(<8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <6 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> @@ -733,9 +910,9 @@ define void @test_vXi8(<8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <6 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> @@ -828,9 +1005,9 @@ define void @test_vXi8(<8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <6 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> @@ -852,9 +1029,9 @@ define void @test_vXi8(<8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <6 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> @@ -947,9 +1124,9 @@ define void @test_vXi8(<8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <6 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> @@ -971,9 +1148,9 @@ define void @test_vXi8(<8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <6 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> @@ -1050,243 +1227,600 @@ define void @test_vXi8(<8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <6 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; -; AVX-LABEL: 'test_vXi8' -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; AVX1-LABEL: 'test_vXi8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; -; AVX512-LABEL: 'test_vXi8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; AVX2-LABEL: 'test_vXi8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512F-LABEL: 'test_vXi8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512BW-LABEL: 'test_vXi8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512VBMI-LABEL: 'test_vXi8' +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SLM-LABEL: 'test_vXi8' ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> @@ -1304,9 +1838,9 @@ define void @test_vXi8(<8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <6 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> @@ -1328,9 +1862,9 @@ define void @test_vXi8(<8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <6 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> @@ -1423,9 +1957,9 @@ define void @test_vXi8(<8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <6 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> @@ -1447,9 +1981,9 @@ define void @test_vXi8(<8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <6 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector.ll b/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector.ll index 58e99c9ed65233..b521a759484670 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector.ll @@ -2,15 +2,15 @@ ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+sse2 | FileCheck %s -check-prefixes=SSE,SSE2 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+ssse3 | FileCheck %s -check-prefixes=SSE,SSSE3 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+sse4.2 | FileCheck %s -check-prefixes=SSE,SSE42 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+avx | FileCheck %s -check-prefixes=AVX -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+avx2 | FileCheck %s -check-prefixes=AVX -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+avx | FileCheck %s -check-prefixes=AVX,AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+avx2 | FileCheck %s -check-prefixes=AVX,AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512,AVX512VBMI ; ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mcpu=slm | FileCheck %s --check-prefixes=SSE,SLM ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mcpu=goldmont | FileCheck %s --check-prefixes=SSE,GLM -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mcpu=btver2 | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print" 2>&1 -disable-output -mcpu=btver2 | FileCheck %s --check-prefixes=AVX,AVX1 ; ; Verify the cost model for extract_subector style shuffles. @@ -38,7 +38,7 @@ define void @test_vXf64(<4 x double> %src256, <8 x double> %src512) { ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void @@ -51,7 +51,7 @@ define void @test_vXf64(<4 x double> %src256, <8 x double> %src512) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> undef, <2 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2345 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %V512_567u = shufflevector <8 x double> %src512, <8 x double> undef, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void @@ -90,7 +90,7 @@ define void @test_vXi64(<4 x i64> %src256, <8 x i64> %src512) { ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_45 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V512_2345 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2345 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; @@ -102,7 +102,7 @@ define void @test_vXi64(<4 x i64> %src256, <8 x i64> %src512) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x i64> %src512, <8 x i64> undef, <2 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_0123 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V512_2345 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2345 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> undef, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; @@ -238,9 +238,9 @@ define void @test_vXi16(<4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> @@ -262,9 +262,9 @@ define void @test_vXi16(<4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> @@ -297,9 +297,9 @@ define void @test_vXi16(<4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> @@ -321,9 +321,9 @@ define void @test_vXi16(<4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> @@ -356,9 +356,9 @@ define void @test_vXi16(<4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> @@ -380,9 +380,9 @@ define void @test_vXi16(<4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> @@ -397,123 +397,300 @@ define void @test_vXi16(<4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; -; AVX-LABEL: 'test_vXi16' -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; AVX1-LABEL: 'test_vXi16' +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; -; AVX512-LABEL: 'test_vXi16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; AVX2-LABEL: 'test_vXi16' +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512F-LABEL: 'test_vXi16' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512BW-LABEL: 'test_vXi16' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512VBMI-LABEL: 'test_vXi16' +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'test_vXi16' ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> @@ -533,9 +710,9 @@ define void @test_vXi16(<4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> @@ -557,9 +734,9 @@ define void @test_vXi16(<4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> @@ -592,9 +769,9 @@ define void @test_vXi16(<4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> @@ -616,9 +793,9 @@ define void @test_vXi16(<4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> @@ -709,9 +886,9 @@ define void @test_vXi8(<8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <6 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> @@ -733,9 +910,9 @@ define void @test_vXi8(<8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <6 ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> @@ -828,9 +1005,9 @@ define void @test_vXi8(<8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <6 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> @@ -852,9 +1029,9 @@ define void @test_vXi8(<8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <6 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> @@ -947,9 +1124,9 @@ define void @test_vXi8(<8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <6 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> @@ -971,9 +1148,9 @@ define void @test_vXi8(<8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <6 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> @@ -1050,243 +1227,600 @@ define void @test_vXi8(<8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <6 ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; -; AVX-LABEL: 'test_vXi8' -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; AVX1-LABEL: 'test_vXi8' +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; -; AVX512-LABEL: 'test_vXi8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; AVX2-LABEL: 'test_vXi8' +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512F-LABEL: 'test_vXi8' +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512BW-LABEL: 'test_vXi8' +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512VBMI-LABEL: 'test_vXi8' +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'test_vXi8' ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> @@ -1304,9 +1838,9 @@ define void @test_vXi8(<8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <6 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> @@ -1328,9 +1862,9 @@ define void @test_vXi8(<8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <6 ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> @@ -1423,9 +1957,9 @@ define void @test_vXi8(<8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <6 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> @@ -1447,9 +1981,9 @@ define void @test_vXi8(<8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <6 ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/dot-product-float.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/dot-product-float.ll index 4ea6d7f4283909..97faa23cb516de 100644 --- a/llvm/test/Transforms/LowerMatrixIntrinsics/dot-product-float.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/dot-product-float.ll @@ -88,46 +88,10 @@ declare <1 x float> @llvm.matrix.multiply.v1f32.v7f32.v7f32(<7 x float>, <7 x fl define <1 x double> @dotproduct_double_v6(<6 x double> %a, <6 x double> %b) { ; CHECK-LABEL: @dotproduct_double_v6( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[SPLIT:%.*]] = shufflevector <6 x double> [[A:%.*]], <6 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[SPLIT1:%.*]] = shufflevector <6 x double> [[A]], <6 x double> poison, <1 x i32> -; CHECK-NEXT: [[SPLIT2:%.*]] = shufflevector <6 x double> [[A]], <6 x double> poison, <1 x i32> -; CHECK-NEXT: [[SPLIT3:%.*]] = shufflevector <6 x double> [[A]], <6 x double> poison, <1 x i32> -; CHECK-NEXT: [[SPLIT4:%.*]] = shufflevector <6 x double> [[A]], <6 x double> poison, <1 x i32> -; CHECK-NEXT: [[SPLIT5:%.*]] = shufflevector <6 x double> [[A]], <6 x double> poison, <1 x i32> -; CHECK-NEXT: [[SPLIT6:%.*]] = shufflevector <6 x double> [[B:%.*]], <6 x double> poison, <6 x i32> -; CHECK-NEXT: [[BLOCK:%.*]] = shufflevector <1 x double> [[SPLIT]], <1 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP0:%.*]] = extractelement <6 x double> [[SPLIT6]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x double> poison, double [[TMP0]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT]], <1 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = fmul fast <1 x double> [[BLOCK]], [[SPLAT_SPLAT]] -; CHECK-NEXT: [[BLOCK7:%.*]] = shufflevector <1 x double> [[SPLIT1]], <1 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <6 x double> [[SPLIT6]], i64 1 -; CHECK-NEXT: [[SPLAT_SPLATINSERT8:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLAT9:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT8]], <1 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call fast <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK7]], <1 x double> [[SPLAT_SPLAT9]], <1 x double> [[TMP1]]) -; CHECK-NEXT: [[BLOCK10:%.*]] = shufflevector <1 x double> [[SPLIT2]], <1 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <6 x double> [[SPLIT6]], i64 2 -; CHECK-NEXT: [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x double> poison, double [[TMP4]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT11]], <1 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = call fast <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK10]], <1 x double> [[SPLAT_SPLAT12]], <1 x double> [[TMP3]]) -; CHECK-NEXT: [[BLOCK13:%.*]] = shufflevector <1 x double> [[SPLIT3]], <1 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <6 x double> [[SPLIT6]], i64 3 -; CHECK-NEXT: [[SPLAT_SPLATINSERT14:%.*]] = insertelement <1 x double> poison, double [[TMP6]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLAT15:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT14]], <1 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = call fast <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK13]], <1 x double> [[SPLAT_SPLAT15]], <1 x double> [[TMP5]]) -; CHECK-NEXT: [[BLOCK16:%.*]] = shufflevector <1 x double> [[SPLIT4]], <1 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <6 x double> [[SPLIT6]], i64 4 -; CHECK-NEXT: [[SPLAT_SPLATINSERT17:%.*]] = insertelement <1 x double> poison, double [[TMP8]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLAT18:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT17]], <1 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP9:%.*]] = call fast <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK16]], <1 x double> [[SPLAT_SPLAT18]], <1 x double> [[TMP7]]) -; CHECK-NEXT: [[BLOCK19:%.*]] = shufflevector <1 x double> [[SPLIT5]], <1 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <6 x double> [[SPLIT6]], i64 5 -; CHECK-NEXT: [[SPLAT_SPLATINSERT20:%.*]] = insertelement <1 x double> poison, double [[TMP10]], i64 0 -; CHECK-NEXT: [[SPLAT_SPLAT21:%.*]] = shufflevector <1 x double> [[SPLAT_SPLATINSERT20]], <1 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP11:%.*]] = call fast <1 x double> @llvm.fmuladd.v1f64(<1 x double> [[BLOCK19]], <1 x double> [[SPLAT_SPLAT21]], <1 x double> [[TMP9]]) -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <1 x double> [[TMP11]], <1 x double> poison, <1 x i32> zeroinitializer -; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <1 x double> poison, <1 x double> [[TMP12]], <1 x i32> -; CHECK-NEXT: ret <1 x double> [[TMP13]] +; CHECK-NEXT: [[TMP0:%.*]] = fmul <6 x double> [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = call fast double @llvm.vector.reduce.fadd.v6f64(double 0.000000e+00, <6 x double> [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <1 x double> poison, double [[TMP1]], i64 0 +; CHECK-NEXT: ret <1 x double> [[TMP2]] ; entry: %c = tail call fast <1 x double> @llvm.matrix.multiply.v1f64.v6f64.v6f64(<6 x double> %a, <6 x double> %b, i32 1, i32 6, i32 1) From 268799cc59ef7af237278d058c6e2ee2c8eb227d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= Date: Mon, 12 Feb 2024 13:11:11 +0100 Subject: [PATCH 03/59] [clang][Interp][NFC] Refactor Pointer::toAPValue() Use early-out style rather than letting control flow through the entire function. --- clang/lib/AST/Interp/Pointer.cpp | 94 +++++++++++++------------------- 1 file changed, 39 insertions(+), 55 deletions(-) diff --git a/clang/lib/AST/Interp/Pointer.cpp b/clang/lib/AST/Interp/Pointer.cpp index 316a7ed5fa1bb8..dd8868d0b1ec22 100644 --- a/clang/lib/AST/Interp/Pointer.cpp +++ b/clang/lib/AST/Interp/Pointer.cpp @@ -83,63 +83,47 @@ void Pointer::operator=(Pointer &&P) { } APValue Pointer::toAPValue() const { - APValue::LValueBase Base; llvm::SmallVector Path; - CharUnits Offset; - bool IsNullPtr; - bool IsOnePastEnd; - - if (isZero()) { - Base = static_cast(nullptr); - IsNullPtr = true; - IsOnePastEnd = false; - Offset = CharUnits::Zero(); - } else { - // Build the lvalue base from the block. - const Descriptor *Desc = getDeclDesc(); - if (auto *VD = Desc->asValueDecl()) - Base = VD; - else if (auto *E = Desc->asExpr()) - Base = E; - else - llvm_unreachable("Invalid allocation type"); - - // Not a null pointer. - IsNullPtr = false; - - if (isUnknownSizeArray()) { - IsOnePastEnd = false; - Offset = CharUnits::Zero(); - } else if (Desc->asExpr()) { - // Pointer pointing to a an expression. - IsOnePastEnd = false; - Offset = CharUnits::Zero(); + + if (isZero()) + return APValue(static_cast(nullptr), CharUnits::Zero(), Path, + false, /*IsNullPtr=*/false); + + // Build the lvalue base from the block. + const Descriptor *Desc = getDeclDesc(); + APValue::LValueBase Base; + if (auto *VD = Desc->asValueDecl()) + Base = VD; + else if (auto *E = Desc->asExpr()) + Base = E; + else + llvm_unreachable("Invalid allocation type"); + + if (isUnknownSizeArray() || Desc->asExpr()) + return APValue(Base, CharUnits::Zero(), Path, false, false); + + // TODO: compute the offset into the object. + CharUnits Offset = CharUnits::Zero(); + bool IsOnePastEnd = isOnePastEnd(); + + // Build the path into the object. + Pointer Ptr = *this; + while (Ptr.isField() || Ptr.isArrayElement()) { + if (Ptr.isArrayElement()) { + Path.push_back(APValue::LValuePathEntry::ArrayIndex(Ptr.getIndex())); + Ptr = Ptr.getArray(); } else { - // TODO: compute the offset into the object. - Offset = CharUnits::Zero(); - - // Build the path into the object. - Pointer Ptr = *this; - while (Ptr.isField() || Ptr.isArrayElement()) { - if (Ptr.isArrayElement()) { - Path.push_back(APValue::LValuePathEntry::ArrayIndex(Ptr.getIndex())); - Ptr = Ptr.getArray(); - } else { - // TODO: figure out if base is virtual - bool IsVirtual = false; - - // Create a path entry for the field. - const Descriptor *Desc = Ptr.getFieldDesc(); - if (const auto *BaseOrMember = Desc->asDecl()) { - Path.push_back(APValue::LValuePathEntry({BaseOrMember, IsVirtual})); - Ptr = Ptr.getBase(); - continue; - } - llvm_unreachable("Invalid field type"); - } + // TODO: figure out if base is virtual + bool IsVirtual = false; + + // Create a path entry for the field. + const Descriptor *Desc = Ptr.getFieldDesc(); + if (const auto *BaseOrMember = Desc->asDecl()) { + Path.push_back(APValue::LValuePathEntry({BaseOrMember, IsVirtual})); + Ptr = Ptr.getBase(); + continue; } - - IsOnePastEnd = isOnePastEnd(); + llvm_unreachable("Invalid field type"); } } @@ -149,7 +133,7 @@ APValue Pointer::toAPValue() const { // Just invert the order of the elements. std::reverse(Path.begin(), Path.end()); - return APValue(Base, Offset, Path, IsOnePastEnd, IsNullPtr); + return APValue(Base, Offset, Path, IsOnePastEnd, /*IsNullPtr=*/false); } std::string Pointer::toDiagnosticString(const ASTContext &Ctx) const { From 0940f9083e68bda78bcbb323c2968a4294092e21 Mon Sep 17 00:00:00 2001 From: Alexey Bataev <5361294+alexey-bataev@users.noreply.github.com> Date: Mon, 12 Feb 2024 07:41:42 -0500 Subject: [PATCH 04/59] [SLP]Add support for strided loads. Added basic support for strided loads support in SLP vectorizer. Supports constant strides only. If the strided load must be reversed, applies -stride to avoid extra reverse shuffle. Reviewers: preames, lukel97 Reviewed By: preames Pull Request: https://github.com/llvm/llvm-project/pull/80310 --- .../Transforms/Vectorize/SLPVectorizer.cpp | 288 ++++----- .../SLPVectorizer/RISCV/complex-loads.ll | 546 +++++++++--------- .../RISCV/strided-loads-vectorized.ll | 131 +---- .../strided-loads-with-external-use-ptr.ll | 4 +- .../SLPVectorizer/RISCV/strided-loads.ll | 13 +- .../X86/gep-nodes-with-non-gep-inst.ll | 2 +- .../X86/remark_gather-load-redux-cost.ll | 2 +- 7 files changed, 456 insertions(+), 530 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index c0b7298f78005d..c94fb71ab220ba 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -175,6 +175,15 @@ static cl::opt RootLookAheadMaxDepth( "slp-max-root-look-ahead-depth", cl::init(2), cl::Hidden, cl::desc("The maximum look-ahead depth for searching best rooting option")); +static cl::opt MinProfitableStridedLoads( + "slp-min-strided-loads", cl::init(2), cl::Hidden, + cl::desc("The minimum number of loads, which should be considered strided, " + "if the stride is > 1 or is runtime value")); + +static cl::opt MaxProfitableLoadStride( + "slp-max-stride", cl::init(8), cl::Hidden, + cl::desc("The maximum stride, considered to be profitable.")); + static cl::opt ViewSLPTree("view-slp-tree", cl::Hidden, cl::desc("Display the SLP trees with Graphviz")); @@ -2575,7 +2584,7 @@ class BoUpSLP { enum EntryState { Vectorize, ScatterVectorize, - PossibleStridedVectorize, + StridedVectorize, NeedToGather }; EntryState State; @@ -2753,8 +2762,8 @@ class BoUpSLP { case ScatterVectorize: dbgs() << "ScatterVectorize\n"; break; - case PossibleStridedVectorize: - dbgs() << "PossibleStridedVectorize\n"; + case StridedVectorize: + dbgs() << "StridedVectorize\n"; break; case NeedToGather: dbgs() << "NeedToGather\n"; @@ -3680,7 +3689,7 @@ template <> struct DOTGraphTraits : public DefaultDOTGraphTraits { if (Entry->State == TreeEntry::NeedToGather) return "color=red"; if (Entry->State == TreeEntry::ScatterVectorize || - Entry->State == TreeEntry::PossibleStridedVectorize) + Entry->State == TreeEntry::StridedVectorize) return "color=blue"; return ""; } @@ -3842,12 +3851,7 @@ BoUpSLP::findReusedOrderedScalars(const BoUpSLP::TreeEntry &TE) { namespace { /// Tracks the state we can represent the loads in the given sequence. -enum class LoadsState { - Gather, - Vectorize, - ScatterVectorize, - PossibleStridedVectorize -}; +enum class LoadsState { Gather, Vectorize, ScatterVectorize, StridedVectorize }; } // anonymous namespace static bool arePointersCompatible(Value *Ptr1, Value *Ptr2, @@ -3878,6 +3882,14 @@ static Align computeCommonAlignment(ArrayRef VL) { return CommonAlignment; } +/// Check if \p Order represents reverse order. +static bool isReverseOrder(ArrayRef Order) { + unsigned Sz = Order.size(); + return !Order.empty() && all_of(enumerate(Order), [&](const auto &Pair) { + return Pair.value() == Sz || Sz - Pair.index() - 1 == Pair.value(); + }); +} + /// Checks if the given array of loads can be represented as a vectorized, /// scatter or just simple gather. static LoadsState canVectorizeLoads(ArrayRef VL, const Value *VL0, @@ -3900,7 +3912,8 @@ static LoadsState canVectorizeLoads(ArrayRef VL, const Value *VL0, // Make sure all loads in the bundle are simple - we can't vectorize // atomic or volatile loads. PointerOps.clear(); - PointerOps.resize(VL.size()); + const unsigned Sz = VL.size(); + PointerOps.resize(Sz); auto *POIter = PointerOps.begin(); for (Value *V : VL) { auto *L = cast(V); @@ -3911,12 +3924,12 @@ static LoadsState canVectorizeLoads(ArrayRef VL, const Value *VL0, } Order.clear(); + auto *VecTy = FixedVectorType::get(ScalarTy, Sz); // Check the order of pointer operands or that all pointers are the same. bool IsSorted = sortPtrAccesses(PointerOps, ScalarTy, DL, SE, Order); if (IsSorted || all_of(PointerOps, [&](Value *P) { return arePointersCompatible(P, PointerOps.front(), TLI); })) { - bool IsPossibleStrided = false; if (IsSorted) { Value *Ptr0; Value *PtrN; @@ -3930,30 +3943,71 @@ static LoadsState canVectorizeLoads(ArrayRef VL, const Value *VL0, std::optional Diff = getPointersDiff(ScalarTy, Ptr0, ScalarTy, PtrN, DL, SE); // Check that the sorted loads are consecutive. - if (static_cast(*Diff) == VL.size() - 1) + if (static_cast(*Diff) == Sz - 1) return LoadsState::Vectorize; // Simple check if not a strided access - clear order. - IsPossibleStrided = *Diff % (VL.size() - 1) == 0; + bool IsPossibleStrided = *Diff % (Sz - 1) == 0; + // Try to generate strided load node if: + // 1. Target with strided load support is detected. + // 2. The number of loads is greater than MinProfitableStridedLoads, + // or the potential stride <= MaxProfitableLoadStride and the + // potential stride is power-of-2 (to avoid perf regressions for the very + // small number of loads) and max distance > number of loads, or potential + // stride is -1. + // 3. The loads are ordered, or number of unordered loads <= + // MaxProfitableUnorderedLoads, or loads are in reversed order. + // (this check is to avoid extra costs for very expensive shuffles). + if (IsPossibleStrided && (((Sz > MinProfitableStridedLoads || + (static_cast(std::abs(*Diff)) <= + MaxProfitableLoadStride * Sz && + isPowerOf2_32(std::abs(*Diff)))) && + static_cast(std::abs(*Diff)) > Sz) || + *Diff == -(static_cast(Sz) - 1))) { + int Stride = *Diff / static_cast(Sz - 1); + if (*Diff == Stride * static_cast(Sz - 1)) { + Align Alignment = + cast(Order.empty() ? VL.front() : VL[Order.front()]) + ->getAlign(); + if (TTI.isLegalStridedLoadStore(VecTy, Alignment)) { + // Iterate through all pointers and check if all distances are + // unique multiple of Dist. + SmallSet Dists; + for (Value *Ptr : PointerOps) { + int Dist = 0; + if (Ptr == PtrN) + Dist = *Diff; + else if (Ptr != Ptr0) + Dist = *getPointersDiff(ScalarTy, Ptr0, ScalarTy, Ptr, DL, SE); + // If the strides are not the same or repeated, we can't + // vectorize. + if (((Dist / Stride) * Stride) != Dist || + !Dists.insert(Dist).second) + break; + } + if (Dists.size() == Sz) + return LoadsState::StridedVectorize; + } + } + } } // TODO: need to improve analysis of the pointers, if not all of them are // GEPs or have > 2 operands, we end up with a gather node, which just // increases the cost. Loop *L = LI.getLoopFor(cast(VL0)->getParent()); bool ProfitableGatherPointers = - static_cast(count_if(PointerOps, [L](Value *V) { - return L && L->isLoopInvariant(V); - })) <= VL.size() / 2 && VL.size() > 2; + static_cast(count_if( + PointerOps, + [L](Value *V) { return L && L->isLoopInvariant(V); })) <= Sz / 2 && + Sz > 2; if (ProfitableGatherPointers || all_of(PointerOps, [IsSorted](Value *P) { auto *GEP = dyn_cast(P); return (IsSorted && !GEP && doesNotNeedToBeScheduled(P)) || (GEP && GEP->getNumOperands() == 2); })) { Align CommonAlignment = computeCommonAlignment(VL); - auto *VecTy = FixedVectorType::get(ScalarTy, VL.size()); if (TTI.isLegalMaskedGather(VecTy, CommonAlignment) && !TTI.forceScalarizeMaskedGather(VecTy, CommonAlignment)) - return IsPossibleStrided ? LoadsState::PossibleStridedVectorize - : LoadsState::ScatterVectorize; + return LoadsState::ScatterVectorize; } } @@ -4160,7 +4214,7 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) { return std::move(ResOrder); } if ((TE.State == TreeEntry::Vectorize || - TE.State == TreeEntry::PossibleStridedVectorize) && + TE.State == TreeEntry::StridedVectorize) && (isa(TE.getMainOp()) || (TopToBottom && isa(TE.getMainOp()))) && !TE.isAltShuffle()) @@ -4418,7 +4472,7 @@ void BoUpSLP::reorderTopToBottom() { } VFToOrderedEntries[TE->getVectorFactor()].insert(TE.get()); if (!(TE->State == TreeEntry::Vectorize || - TE->State == TreeEntry::PossibleStridedVectorize) || + TE->State == TreeEntry::StridedVectorize) || !TE->ReuseShuffleIndices.empty()) GathersToOrders.try_emplace(TE.get(), *CurrentOrder); if (TE->State == TreeEntry::Vectorize && @@ -4442,9 +4496,6 @@ void BoUpSLP::reorderTopToBottom() { MapVector> OrdersUses; - // Last chance orders - scatter vectorize. Try to use their orders if no - // other orders or the order is counted already. - SmallVector StridedVectorizeOrders; SmallPtrSet VisitedOps; for (const TreeEntry *OpTE : OrderedEntries) { // No need to reorder this nodes, still need to extend and to use shuffle, @@ -4491,11 +4542,6 @@ void BoUpSLP::reorderTopToBottom() { if (Order.empty()) continue; } - // Postpone scatter orders. - if (OpTE->State == TreeEntry::PossibleStridedVectorize) { - StridedVectorizeOrders.push_back(Order); - continue; - } // Stores actually store the mask, not the order, need to invert. if (OpTE->State == TreeEntry::Vectorize && !OpTE->isAltShuffle() && OpTE->getOpcode() == Instruction::Store && !Order.empty()) { @@ -4512,22 +4558,6 @@ void BoUpSLP::reorderTopToBottom() { ++OrdersUses.insert(std::make_pair(Order, 0)).first->second; } } - // Set order of the user node. - if (OrdersUses.empty()) { - if (StridedVectorizeOrders.empty()) - continue; - // Add (potentially!) strided vectorize orders. - for (OrdersType &Order : StridedVectorizeOrders) - ++OrdersUses.insert(std::make_pair(Order, 0)).first->second; - } else { - // Account (potentially!) strided vectorize orders only if it was used - // already. - for (OrdersType &Order : StridedVectorizeOrders) { - auto *It = OrdersUses.find(Order); - if (It != OrdersUses.end()) - ++It->second; - } - } // Choose the most used order. ArrayRef BestOrder = OrdersUses.front().first; unsigned Cnt = OrdersUses.front().second; @@ -4569,7 +4599,7 @@ void BoUpSLP::reorderTopToBottom() { continue; } if ((TE->State == TreeEntry::Vectorize || - TE->State == TreeEntry::PossibleStridedVectorize) && + TE->State == TreeEntry::StridedVectorize) && isa(TE->getMainOp()) && !TE->isAltShuffle()) { @@ -4610,10 +4640,6 @@ bool BoUpSLP::canReorderOperands( })) continue; if (TreeEntry *TE = getVectorizedOperand(UserTE, I)) { - // FIXME: Do not reorder (possible!) strided vectorized nodes, they - // require reordering of the operands, which is not implemented yet. - if (TE->State == TreeEntry::PossibleStridedVectorize) - return false; // Do not reorder if operand node is used by many user nodes. if (any_of(TE->UserTreeIndices, [UserTE](const EdgeInfo &EI) { return EI.UserTE != UserTE; })) @@ -4664,13 +4690,13 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) { SmallVector NonVectorized; for (const std::unique_ptr &TE : VectorizableTree) { if (TE->State != TreeEntry::Vectorize && - TE->State != TreeEntry::PossibleStridedVectorize) + TE->State != TreeEntry::StridedVectorize) NonVectorized.push_back(TE.get()); if (std::optional CurrentOrder = getReorderingData(*TE, /*TopToBottom=*/false)) { OrderedEntries.insert(TE.get()); if (!(TE->State == TreeEntry::Vectorize || - TE->State == TreeEntry::PossibleStridedVectorize) || + TE->State == TreeEntry::StridedVectorize) || !TE->ReuseShuffleIndices.empty()) GathersToOrders.try_emplace(TE.get(), *CurrentOrder); } @@ -4688,7 +4714,7 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) { SmallVector Filtered; for (TreeEntry *TE : OrderedEntries) { if (!(TE->State == TreeEntry::Vectorize || - TE->State == TreeEntry::PossibleStridedVectorize || + TE->State == TreeEntry::StridedVectorize || (TE->State == TreeEntry::NeedToGather && GathersToOrders.count(TE))) || TE->UserTreeIndices.empty() || !TE->ReuseShuffleIndices.empty() || @@ -4733,9 +4759,6 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) { MapVector> OrdersUses; - // Last chance orders - scatter vectorize. Try to use their orders if no - // other orders or the order is counted already. - SmallVector> StridedVectorizeOrders; // Do the analysis for each tree entry only once, otherwise the order of // the same node my be considered several times, though might be not // profitable. @@ -4757,11 +4780,6 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) { Data.second, [OpTE](const std::pair &P) { return P.second == OpTE; }); - // Postpone scatter orders. - if (OpTE->State == TreeEntry::PossibleStridedVectorize) { - StridedVectorizeOrders.emplace_back(Order, NumOps); - continue; - } // Stores actually store the mask, not the order, need to invert. if (OpTE->State == TreeEntry::Vectorize && !OpTE->isAltShuffle() && OpTE->getOpcode() == Instruction::Store && !Order.empty()) { @@ -4819,30 +4837,6 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) { ++Res.first->second; } } - // If no orders - skip current nodes and jump to the next one, if any. - if (OrdersUses.empty()) { - if (StridedVectorizeOrders.empty() || - (Data.first->ReorderIndices.empty() && - Data.first->ReuseShuffleIndices.empty() && - !(IgnoreReorder && - Data.first == VectorizableTree.front().get()))) { - for (const std::pair &Op : Data.second) - OrderedEntries.remove(Op.second); - continue; - } - // Add (potentially!) strided vectorize orders. - for (std::pair &Pair : StridedVectorizeOrders) - OrdersUses.insert(std::make_pair(Pair.first, 0)).first->second += - Pair.second; - } else { - // Account (potentially!) strided vectorize orders only if it was used - // already. - for (std::pair &Pair : StridedVectorizeOrders) { - auto *It = OrdersUses.find(Pair.first); - if (It != OrdersUses.end()) - It->second += Pair.second; - } - } // Choose the best order. ArrayRef BestOrder = OrdersUses.front().first; unsigned Cnt = OrdersUses.front().second; @@ -4878,7 +4872,7 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) { } // Gathers are processed separately. if (TE->State != TreeEntry::Vectorize && - TE->State != TreeEntry::PossibleStridedVectorize && + TE->State != TreeEntry::StridedVectorize && (TE->State != TreeEntry::ScatterVectorize || TE->ReorderIndices.empty())) continue; @@ -4910,7 +4904,7 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) { Data.first->reorderOperands(Mask); if (!isa(Data.first->getMainOp()) || Data.first->isAltShuffle() || - Data.first->State == TreeEntry::PossibleStridedVectorize) { + Data.first->State == TreeEntry::StridedVectorize) { reorderScalars(Data.first->Scalars, Mask); reorderOrder(Data.first->ReorderIndices, MaskOrder, /*BottomOrder=*/true); @@ -4973,7 +4967,6 @@ void BoUpSLP::buildExternalUses( // instructions. If that is the case, the one in FoundLane will // be used. if (UseEntry->State == TreeEntry::ScatterVectorize || - UseEntry->State == TreeEntry::PossibleStridedVectorize || !doesInTreeUserNeedToExtract( Scalar, cast(UseEntry->Scalars.front()), TLI)) { LLVM_DEBUG(dbgs() << "SLP: \tInternal user will be removed:" << *U @@ -5331,8 +5324,8 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState( return TreeEntry::Vectorize; case LoadsState::ScatterVectorize: return TreeEntry::ScatterVectorize; - case LoadsState::PossibleStridedVectorize: - return TreeEntry::PossibleStridedVectorize; + case LoadsState::StridedVectorize: + return TreeEntry::StridedVectorize; case LoadsState::Gather: #ifndef NDEBUG Type *ScalarTy = VL0->getType(); @@ -5753,8 +5746,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, BasicBlock *BB = nullptr; bool IsScatterVectorizeUserTE = UserTreeIdx.UserTE && - (UserTreeIdx.UserTE->State == TreeEntry::ScatterVectorize || - UserTreeIdx.UserTE->State == TreeEntry::PossibleStridedVectorize); + UserTreeIdx.UserTE->State == TreeEntry::ScatterVectorize; bool AreAllSameInsts = (S.getOpcode() && allSameBlock(VL)) || (S.OpValue->getType()->isPointerTy() && IsScatterVectorizeUserTE && @@ -5851,8 +5843,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, // Special processing for sorted pointers for ScatterVectorize node with // constant indeces only. if (AreAllSameInsts && UserTreeIdx.UserTE && - (UserTreeIdx.UserTE->State == TreeEntry::ScatterVectorize || - UserTreeIdx.UserTE->State == TreeEntry::PossibleStridedVectorize) && + UserTreeIdx.UserTE->State == TreeEntry::ScatterVectorize && !(S.getOpcode() && allSameBlock(VL))) { assert(S.OpValue->getType()->isPointerTy() && count_if(VL, [](Value *V) { return isa(V); }) >= @@ -6049,18 +6040,17 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, } TE->setOperandsInOrder(); break; - case TreeEntry::PossibleStridedVectorize: + case TreeEntry::StridedVectorize: // Vectorizing non-consecutive loads with `llvm.masked.gather`. if (CurrentOrder.empty()) { - TE = newTreeEntry(VL, TreeEntry::PossibleStridedVectorize, Bundle, S, + TE = newTreeEntry(VL, TreeEntry::StridedVectorize, Bundle, S, UserTreeIdx, ReuseShuffleIndicies); } else { - TE = newTreeEntry(VL, TreeEntry::PossibleStridedVectorize, Bundle, S, + TE = newTreeEntry(VL, TreeEntry::StridedVectorize, Bundle, S, UserTreeIdx, ReuseShuffleIndicies, CurrentOrder); } TE->setOperandsInOrder(); - buildTree_rec(PointerOps, Depth + 1, {TE, 0}); - LLVM_DEBUG(dbgs() << "SLP: added a vector of non-consecutive loads.\n"); + LLVM_DEBUG(dbgs() << "SLP: added a vector of strided loads.\n"); break; case TreeEntry::ScatterVectorize: // Vectorizing non-consecutive loads with `llvm.masked.gather`. @@ -7091,7 +7081,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis { !isSplat(Gathers)) { InstructionCost BaseCost = R.getGatherCost(Gathers, !Root); SetVector VectorizedLoads; - SmallVector VectorizedStarts; + SmallVector> VectorizedStarts; SmallVector ScatterVectorized; unsigned StartIdx = 0; unsigned VF = VL.size() / 2; @@ -7115,12 +7105,16 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis { switch (LS) { case LoadsState::Vectorize: case LoadsState::ScatterVectorize: - case LoadsState::PossibleStridedVectorize: + case LoadsState::StridedVectorize: // Mark the vectorized loads so that we don't vectorize them // again. // TODO: better handling of loads with reorders. - if (LS == LoadsState::Vectorize && CurrentOrder.empty()) - VectorizedStarts.push_back(Cnt); + if (((LS == LoadsState::Vectorize || + LS == LoadsState::StridedVectorize) && + CurrentOrder.empty()) || + (LS == LoadsState::StridedVectorize && + isReverseOrder(CurrentOrder))) + VectorizedStarts.emplace_back(Cnt, LS); else ScatterVectorized.push_back(Cnt); VectorizedLoads.insert(Slice.begin(), Slice.end()); @@ -7164,16 +7158,20 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis { CostKind, TTI::OperandValueInfo(), LI); } auto *LoadTy = FixedVectorType::get(VL.front()->getType(), VF); - for (unsigned P : VectorizedStarts) { - auto *LI = cast(VL[P]); + for (const std::pair &P : VectorizedStarts) { + auto *LI = cast(VL[P.first]); Align Alignment = LI->getAlign(); GatherCost += - TTI.getMemoryOpCost(Instruction::Load, LoadTy, Alignment, - LI->getPointerAddressSpace(), CostKind, - TTI::OperandValueInfo(), LI); + P.second == LoadsState::Vectorize + ? TTI.getMemoryOpCost(Instruction::Load, LoadTy, Alignment, + LI->getPointerAddressSpace(), CostKind, + TTI::OperandValueInfo(), LI) + : TTI.getStridedMemoryOpCost( + Instruction::Load, LoadTy, LI->getPointerOperand(), + /*VariableMask=*/false, Alignment, CostKind, LI); // Estimate GEP cost. SmallVector PointerOps(VF); - for (auto [I, V] : enumerate(VL.slice(P, VF))) + for (auto [I, V] : enumerate(VL.slice(P.first, VF))) PointerOps[I] = cast(V)->getPointerOperand(); auto [ScalarGEPCost, VectorGEPCost] = getGEPCosts(TTI, PointerOps, LI->getPointerOperand(), @@ -7913,8 +7911,9 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, } InstructionCost CommonCost = 0; SmallVector Mask; + bool IsReverseOrder = isReverseOrder(E->ReorderIndices); if (!E->ReorderIndices.empty() && - E->State != TreeEntry::PossibleStridedVectorize) { + (E->State != TreeEntry::StridedVectorize || !IsReverseOrder)) { SmallVector NewMask; if (E->getOpcode() == Instruction::Store) { // For stores the order is actually a mask. @@ -7932,7 +7931,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, TTI->getShuffleCost(TTI::SK_PermuteSingleSrc, FinalVecTy, Mask); assert((E->State == TreeEntry::Vectorize || E->State == TreeEntry::ScatterVectorize || - E->State == TreeEntry::PossibleStridedVectorize) && + E->State == TreeEntry::StridedVectorize) && "Unhandled state"); assert(E->getOpcode() && ((allSameType(VL) && allSameBlock(VL)) || @@ -7952,7 +7951,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, } auto GetCastContextHint = [&](Value *V) { if (const TreeEntry *OpTE = getTreeEntry(V)) { - if (OpTE->State == TreeEntry::ScatterVectorize) + if (OpTE->State == TreeEntry::ScatterVectorize || + OpTE->State == TreeEntry::StridedVectorize) return TTI::CastContextHint::GatherScatter; if (OpTE->State == TreeEntry::Vectorize && OpTE->getOpcode() == Instruction::Load && !OpTE->isAltShuffle()) { @@ -8028,8 +8028,9 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, // Calculate cost difference from vectorizing set of GEPs. // Negative value means vectorizing is profitable. auto GetGEPCostDiff = [=](ArrayRef Ptrs, Value *BasePtr) { - assert(E->State == TreeEntry::Vectorize && - "Entry state expected to be Vectorize here."); + assert((E->State == TreeEntry::Vectorize || + E->State == TreeEntry::StridedVectorize) && + "Entry state expected to be Vectorize or StridedVectorize here."); InstructionCost ScalarCost = 0; InstructionCost VecCost = 0; std::tie(ScalarCost, VecCost) = getGEPCosts( @@ -8382,10 +8383,14 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, VecLdCost = TTI->getMemoryOpCost( Instruction::Load, VecTy, LI0->getAlign(), LI0->getPointerAddressSpace(), CostKind, TTI::OperandValueInfo()); + } else if (E->State == TreeEntry::StridedVectorize) { + Align CommonAlignment = + computeCommonAlignment(UniqueValues.getArrayRef()); + VecLdCost = TTI->getStridedMemoryOpCost( + Instruction::Load, VecTy, LI0->getPointerOperand(), + /*VariableMask=*/false, CommonAlignment, CostKind); } else { - assert((E->State == TreeEntry::ScatterVectorize || - E->State == TreeEntry::PossibleStridedVectorize) && - "Unknown EntryState"); + assert(E->State == TreeEntry::ScatterVectorize && "Unknown EntryState"); Align CommonAlignment = computeCommonAlignment(UniqueValues.getArrayRef()); VecLdCost = TTI->getGatherScatterOpCost( @@ -8398,8 +8403,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, InstructionCost Cost = GetCostDiff(GetScalarCost, GetVectorCost); // If this node generates masked gather load then it is not a terminal node. // Hence address operand cost is estimated separately. - if (E->State == TreeEntry::ScatterVectorize || - E->State == TreeEntry::PossibleStridedVectorize) + if (E->State == TreeEntry::ScatterVectorize) return Cost; // Estimate cost of GEPs since this tree node is a terminator. @@ -8608,7 +8612,7 @@ bool BoUpSLP::isFullyVectorizableTinyTree(bool ForReduction) const { if (VectorizableTree[0]->State == TreeEntry::NeedToGather || (VectorizableTree[1]->State == TreeEntry::NeedToGather && VectorizableTree[0]->State != TreeEntry::ScatterVectorize && - VectorizableTree[0]->State != TreeEntry::PossibleStridedVectorize)) + VectorizableTree[0]->State != TreeEntry::StridedVectorize)) return false; return true; @@ -10579,11 +10583,6 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis { Value *BoUpSLP::vectorizeOperand(TreeEntry *E, unsigned NodeIdx, bool PostponedPHIs) { ValueList &VL = E->getOperand(NodeIdx); - if (E->State == TreeEntry::PossibleStridedVectorize && - !E->ReorderIndices.empty()) { - SmallVector Mask(E->ReorderIndices.begin(), E->ReorderIndices.end()); - reorderScalars(VL, Mask); - } const unsigned VF = VL.size(); InstructionsState S = getSameOpcode(VL, *TLI); // Special processing for GEPs bundle, which may include non-gep values. @@ -11157,6 +11156,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { return Vec; } + bool IsReverseOrder = isReverseOrder(E->ReorderIndices); auto FinalShuffle = [&](Value *V, const TreeEntry *E, VectorType *VecTy, bool IsSigned) { if (V->getType() != VecTy) @@ -11167,7 +11167,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { ArrayRef(reinterpret_cast(E->ReorderIndices.begin()), E->ReorderIndices.size()); ShuffleBuilder.add(V, Mask); - } else if (E->State == TreeEntry::PossibleStridedVectorize) { + } else if (E->State == TreeEntry::StridedVectorize && IsReverseOrder) { ShuffleBuilder.addOrdered(V, std::nullopt); } else { ShuffleBuilder.addOrdered(V, E->ReorderIndices); @@ -11177,7 +11177,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { assert((E->State == TreeEntry::Vectorize || E->State == TreeEntry::ScatterVectorize || - E->State == TreeEntry::PossibleStridedVectorize) && + E->State == TreeEntry::StridedVectorize) && "Unhandled state"); unsigned ShuffleOrOp = E->isAltShuffle() ? (unsigned)Instruction::ShuffleVector : E->getOpcode(); @@ -11642,10 +11642,29 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { Value *PO = LI->getPointerOperand(); if (E->State == TreeEntry::Vectorize) { NewLI = Builder.CreateAlignedLoad(VecTy, PO, LI->getAlign()); + } else if (E->State == TreeEntry::StridedVectorize) { + Value *Ptr0 = cast(E->Scalars.front())->getPointerOperand(); + Value *PtrN = cast(E->Scalars.back())->getPointerOperand(); + PO = IsReverseOrder ? PtrN : Ptr0; + std::optional Diff = getPointersDiff( + VL0->getType(), Ptr0, VL0->getType(), PtrN, *DL, *SE); + Type *StrideTy = DL->getIndexType(PO->getType()); + int Stride = *Diff / (static_cast(E->Scalars.size()) - 1); + Value *StrideVal = + ConstantInt::get(StrideTy, (IsReverseOrder ? -1 : 1) * Stride * + DL->getTypeAllocSize(ScalarTy)); + Align CommonAlignment = computeCommonAlignment(E->Scalars); + auto *Inst = Builder.CreateIntrinsic( + Intrinsic::experimental_vp_strided_load, + {VecTy, PO->getType(), StrideTy}, + {PO, StrideVal, Builder.getAllOnesMask(VecTy->getElementCount()), + Builder.getInt32(E->Scalars.size())}); + Inst->addParamAttr( + /*ArgNo=*/0, + Attribute::getWithAlignment(Inst->getContext(), CommonAlignment)); + NewLI = Inst; } else { - assert((E->State == TreeEntry::ScatterVectorize || - E->State == TreeEntry::PossibleStridedVectorize) && - "Unhandled state"); + assert(E->State == TreeEntry::ScatterVectorize && "Unhandled state"); Value *VecPtr = vectorizeOperand(E, 0, PostponedPHIs); if (E->VectorizedValue) { LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n"); @@ -12069,8 +12088,11 @@ Value *BoUpSLP::vectorizeTree( [&](llvm::User *U) { TreeEntry *UseEntry = getTreeEntry(U); return UseEntry && - UseEntry->State == TreeEntry::Vectorize && - E->State == TreeEntry::Vectorize && + (UseEntry->State == TreeEntry::Vectorize || + UseEntry->State == + TreeEntry::StridedVectorize) && + (E->State == TreeEntry::Vectorize || + E->State == TreeEntry::StridedVectorize) && doesInTreeUserNeedToExtract( Scalar, cast(UseEntry->Scalars.front()), diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll index dc5fb917886347..e167b6a47af592 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 -; RUN: opt -S -mtriple riscv64-unknown-linux-gnu < %s --passes=slp-vectorizer -mattr=+v -slp-threshold=-80 | FileCheck %s +; RUN: opt -S -mtriple riscv64-unknown-linux-gnu < %s --passes=slp-vectorizer -mattr=+v -slp-threshold=-40 | FileCheck %s define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.ptr, ptr %add.ptr64) { ; CHECK-LABEL: define i32 @test( @@ -67,305 +67,303 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; CHECK-NEXT: [[TMP48:%.*]] = extractelement <2 x i32> [[TMP47]], i32 0 ; CHECK-NEXT: [[TMP49:%.*]] = extractelement <2 x i32> [[TMP47]], i32 1 ; CHECK-NEXT: [[SUB59_2:%.*]] = sub i32 [[TMP48]], [[TMP49]] -; CHECK-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr i8, ptr null, i64 4 ; CHECK-NEXT: [[TMP50:%.*]] = load i8, ptr null, align 1 ; CHECK-NEXT: [[ARRAYIDX20_3:%.*]] = getelementptr i8, ptr null, i64 2 ; CHECK-NEXT: [[ARRAYIDX22_3:%.*]] = getelementptr i8, ptr null, i64 2 ; CHECK-NEXT: [[TMP51:%.*]] = load i8, ptr null, align 1 -; CHECK-NEXT: [[TMP52:%.*]] = insertelement <2 x ptr> , ptr [[ARRAYIDX20_3]], i32 1 +; CHECK-NEXT: [[TMP52:%.*]] = insertelement <2 x ptr> , ptr [[ARRAYIDX20_3]], i32 0 ; CHECK-NEXT: [[TMP53:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> [[TMP52]], i32 1, <2 x i1> , <2 x i8> poison) ; CHECK-NEXT: [[TMP54:%.*]] = zext <2 x i8> [[TMP53]] to <2 x i32> -; CHECK-NEXT: [[TMP55:%.*]] = insertelement <2 x ptr> , ptr [[ARRAYIDX22_3]], i32 1 +; CHECK-NEXT: [[TMP55:%.*]] = insertelement <2 x ptr> , ptr [[ARRAYIDX22_3]], i32 0 ; CHECK-NEXT: [[TMP56:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> [[TMP55]], i32 1, <2 x i1> , <2 x i8> poison) ; CHECK-NEXT: [[TMP57:%.*]] = zext <2 x i8> [[TMP56]] to <2 x i32> ; CHECK-NEXT: [[TMP58:%.*]] = sub <2 x i32> [[TMP54]], [[TMP57]] -; CHECK-NEXT: [[TMP59:%.*]] = insertelement <2 x ptr> , ptr [[ARRAYIDX3_3]], i32 0 -; CHECK-NEXT: [[TMP60:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> [[TMP59]], i32 1, <2 x i1> , <2 x i8> poison) -; CHECK-NEXT: [[TMP61:%.*]] = zext <2 x i8> [[TMP60]] to <2 x i32> -; CHECK-NEXT: [[TMP62:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> getelementptr (i8, <2 x ptr> zeroinitializer, <2 x i64> ), i32 1, <2 x i1> , <2 x i8> poison) -; CHECK-NEXT: [[TMP63:%.*]] = zext <2 x i8> [[TMP62]] to <2 x i32> -; CHECK-NEXT: [[TMP64:%.*]] = sub <2 x i32> [[TMP61]], [[TMP63]] -; CHECK-NEXT: [[TMP65:%.*]] = shl <2 x i32> [[TMP64]], -; CHECK-NEXT: [[TMP66:%.*]] = add <2 x i32> [[TMP65]], [[TMP58]] -; CHECK-NEXT: [[TMP67:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> getelementptr (i8, <2 x ptr> zeroinitializer, <2 x i64> ), i32 1, <2 x i1> , <2 x i8> poison) -; CHECK-NEXT: [[TMP68:%.*]] = zext <2 x i8> [[TMP67]] to <2 x i32> -; CHECK-NEXT: [[TMP69:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> getelementptr (i8, <2 x ptr> zeroinitializer, <2 x i64> ), i32 1, <2 x i1> , <2 x i8> poison) -; CHECK-NEXT: [[TMP70:%.*]] = zext <2 x i8> [[TMP69]] to <2 x i32> -; CHECK-NEXT: [[TMP71:%.*]] = sub <2 x i32> [[TMP68]], [[TMP70]] -; CHECK-NEXT: [[TMP72:%.*]] = insertelement <2 x i8> poison, i8 [[TMP50]], i32 0 -; CHECK-NEXT: [[TMP73:%.*]] = insertelement <2 x i8> [[TMP72]], i8 [[TMP51]], i32 1 -; CHECK-NEXT: [[TMP74:%.*]] = zext <2 x i8> [[TMP73]] to <2 x i32> -; CHECK-NEXT: [[TMP75:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> getelementptr (i8, <2 x ptr> zeroinitializer, <2 x i64> ), i32 1, <2 x i1> , <2 x i8> poison) -; CHECK-NEXT: [[TMP76:%.*]] = zext <2 x i8> [[TMP75]] to <2 x i32> -; CHECK-NEXT: [[TMP77:%.*]] = sub <2 x i32> [[TMP74]], [[TMP76]] -; CHECK-NEXT: [[TMP78:%.*]] = shl <2 x i32> [[TMP77]], -; CHECK-NEXT: [[TMP79:%.*]] = add <2 x i32> [[TMP78]], [[TMP71]] -; CHECK-NEXT: [[TMP80:%.*]] = sub <2 x i32> [[TMP66]], [[TMP79]] -; CHECK-NEXT: [[TMP81:%.*]] = shufflevector <2 x i32> [[TMP79]], <2 x i32> [[TMP46]], <2 x i32> -; CHECK-NEXT: [[TMP82:%.*]] = shufflevector <2 x i32> [[TMP66]], <2 x i32> [[TMP30]], <2 x i32> -; CHECK-NEXT: [[TMP83:%.*]] = add <2 x i32> [[TMP81]], [[TMP82]] -; CHECK-NEXT: [[TMP84:%.*]] = shufflevector <2 x i32> [[TMP79]], <2 x i32> [[TMP46]], <2 x i32> -; CHECK-NEXT: [[TMP85:%.*]] = shufflevector <2 x i32> [[TMP66]], <2 x i32> [[TMP30]], <2 x i32> -; CHECK-NEXT: [[TMP86:%.*]] = add <2 x i32> [[TMP84]], [[TMP85]] -; CHECK-NEXT: [[TMP87:%.*]] = add <2 x i32> [[TMP86]], [[TMP83]] -; CHECK-NEXT: [[TMP88:%.*]] = sub <2 x i32> [[TMP83]], [[TMP86]] -; CHECK-NEXT: [[TMP89:%.*]] = extractelement <2 x i32> [[TMP80]], i32 0 -; CHECK-NEXT: [[TMP90:%.*]] = extractelement <2 x i32> [[TMP80]], i32 1 -; CHECK-NEXT: [[SUB59_3:%.*]] = sub i32 [[TMP89]], [[TMP90]] -; CHECK-NEXT: [[TMP91:%.*]] = extractelement <2 x i32> [[TMP87]], i32 0 -; CHECK-NEXT: [[TMP92:%.*]] = extractelement <2 x i32> [[TMP87]], i32 1 -; CHECK-NEXT: [[ADD94:%.*]] = add i32 [[TMP91]], [[TMP92]] -; CHECK-NEXT: [[SUB102:%.*]] = sub i32 [[TMP92]], [[TMP91]] -; CHECK-NEXT: [[TMP93:%.*]] = extractelement <2 x i32> [[TMP54]], i32 0 -; CHECK-NEXT: [[SHR_I:%.*]] = lshr i32 [[TMP93]], 15 +; CHECK-NEXT: [[TMP59:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 null, i64 4, <2 x i1> , i32 2) +; CHECK-NEXT: [[TMP60:%.*]] = zext <2 x i8> [[TMP59]] to <2 x i32> +; CHECK-NEXT: [[TMP61:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> getelementptr (i8, <2 x ptr> zeroinitializer, <2 x i64> ), i32 1, <2 x i1> , <2 x i8> poison) +; CHECK-NEXT: [[TMP62:%.*]] = zext <2 x i8> [[TMP61]] to <2 x i32> +; CHECK-NEXT: [[TMP63:%.*]] = sub <2 x i32> [[TMP60]], [[TMP62]] +; CHECK-NEXT: [[TMP64:%.*]] = shl <2 x i32> [[TMP63]], +; CHECK-NEXT: [[TMP65:%.*]] = add <2 x i32> [[TMP64]], [[TMP58]] +; CHECK-NEXT: [[TMP66:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> getelementptr (i8, <2 x ptr> zeroinitializer, <2 x i64> ), i32 1, <2 x i1> , <2 x i8> poison) +; CHECK-NEXT: [[TMP67:%.*]] = zext <2 x i8> [[TMP66]] to <2 x i32> +; CHECK-NEXT: [[TMP68:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> getelementptr (i8, <2 x ptr> zeroinitializer, <2 x i64> ), i32 1, <2 x i1> , <2 x i8> poison) +; CHECK-NEXT: [[TMP69:%.*]] = zext <2 x i8> [[TMP68]] to <2 x i32> +; CHECK-NEXT: [[TMP70:%.*]] = sub <2 x i32> [[TMP67]], [[TMP69]] +; CHECK-NEXT: [[TMP71:%.*]] = insertelement <2 x i8> poison, i8 [[TMP51]], i32 0 +; CHECK-NEXT: [[TMP72:%.*]] = insertelement <2 x i8> [[TMP71]], i8 [[TMP50]], i32 1 +; CHECK-NEXT: [[TMP73:%.*]] = zext <2 x i8> [[TMP72]] to <2 x i32> +; CHECK-NEXT: [[TMP74:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> getelementptr (i8, <2 x ptr> zeroinitializer, <2 x i64> ), i32 1, <2 x i1> , <2 x i8> poison) +; CHECK-NEXT: [[TMP75:%.*]] = zext <2 x i8> [[TMP74]] to <2 x i32> +; CHECK-NEXT: [[TMP76:%.*]] = sub <2 x i32> [[TMP73]], [[TMP75]] +; CHECK-NEXT: [[TMP77:%.*]] = shl <2 x i32> [[TMP76]], +; CHECK-NEXT: [[TMP78:%.*]] = add <2 x i32> [[TMP77]], [[TMP70]] +; CHECK-NEXT: [[TMP79:%.*]] = sub <2 x i32> [[TMP65]], [[TMP78]] +; CHECK-NEXT: [[TMP80:%.*]] = shufflevector <2 x i32> [[TMP78]], <2 x i32> [[TMP46]], <2 x i32> +; CHECK-NEXT: [[TMP81:%.*]] = shufflevector <2 x i32> [[TMP65]], <2 x i32> [[TMP30]], <2 x i32> +; CHECK-NEXT: [[TMP82:%.*]] = add <2 x i32> [[TMP80]], [[TMP81]] +; CHECK-NEXT: [[TMP83:%.*]] = shufflevector <2 x i32> [[TMP78]], <2 x i32> [[TMP46]], <2 x i32> +; CHECK-NEXT: [[TMP84:%.*]] = shufflevector <2 x i32> [[TMP65]], <2 x i32> [[TMP30]], <2 x i32> +; CHECK-NEXT: [[TMP85:%.*]] = add <2 x i32> [[TMP83]], [[TMP84]] +; CHECK-NEXT: [[TMP86:%.*]] = add <2 x i32> [[TMP85]], [[TMP82]] +; CHECK-NEXT: [[TMP87:%.*]] = sub <2 x i32> [[TMP82]], [[TMP85]] +; CHECK-NEXT: [[TMP88:%.*]] = extractelement <2 x i32> [[TMP79]], i32 0 +; CHECK-NEXT: [[TMP89:%.*]] = extractelement <2 x i32> [[TMP79]], i32 1 +; CHECK-NEXT: [[SUB59_3:%.*]] = sub i32 [[TMP89]], [[TMP88]] +; CHECK-NEXT: [[TMP90:%.*]] = extractelement <2 x i32> [[TMP86]], i32 0 +; CHECK-NEXT: [[TMP91:%.*]] = extractelement <2 x i32> [[TMP86]], i32 1 +; CHECK-NEXT: [[ADD94:%.*]] = add i32 [[TMP90]], [[TMP91]] +; CHECK-NEXT: [[SUB102:%.*]] = sub i32 [[TMP91]], [[TMP90]] +; CHECK-NEXT: [[TMP92:%.*]] = extractelement <2 x i32> [[TMP54]], i32 1 +; CHECK-NEXT: [[SHR_I:%.*]] = lshr i32 [[TMP92]], 15 ; CHECK-NEXT: [[AND_I:%.*]] = and i32 [[SHR_I]], 65537 ; CHECK-NEXT: [[MUL_I:%.*]] = mul i32 [[AND_I]], 65535 -; CHECK-NEXT: [[TMP94:%.*]] = extractelement <2 x i32> [[TMP86]], i32 1 -; CHECK-NEXT: [[SHR_I49:%.*]] = lshr i32 [[TMP94]], 15 +; CHECK-NEXT: [[TMP93:%.*]] = extractelement <2 x i32> [[TMP85]], i32 1 +; CHECK-NEXT: [[SHR_I49:%.*]] = lshr i32 [[TMP93]], 15 ; CHECK-NEXT: [[AND_I50:%.*]] = and i32 [[SHR_I49]], 65537 ; CHECK-NEXT: [[MUL_I51:%.*]] = mul i32 [[AND_I50]], 65535 -; CHECK-NEXT: [[TMP95:%.*]] = extractelement <2 x i32> [[TMP88]], i32 0 -; CHECK-NEXT: [[TMP96:%.*]] = extractelement <2 x i32> [[TMP88]], i32 1 -; CHECK-NEXT: [[ADD94_2:%.*]] = add i32 [[TMP95]], [[TMP96]] -; CHECK-NEXT: [[TMP97:%.*]] = load <2 x i8>, ptr [[ARRAYIDX20]], align 1 -; CHECK-NEXT: [[TMP98:%.*]] = zext <2 x i8> [[TMP97]] to <2 x i32> -; CHECK-NEXT: [[TMP99:%.*]] = insertelement <2 x i32> poison, i32 [[SUB59_2]], i32 0 -; CHECK-NEXT: [[TMP100:%.*]] = shufflevector <2 x i32> [[TMP99]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP101:%.*]] = insertelement <2 x i32> poison, i32 [[SUB59_3]], i32 0 -; CHECK-NEXT: [[TMP102:%.*]] = shufflevector <2 x i32> [[TMP101]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP103:%.*]] = add <2 x i32> [[TMP100]], [[TMP102]] -; CHECK-NEXT: [[TMP104:%.*]] = sub <2 x i32> [[TMP100]], [[TMP102]] -; CHECK-NEXT: [[TMP105:%.*]] = shufflevector <2 x i32> [[TMP103]], <2 x i32> [[TMP104]], <2 x i32> -; CHECK-NEXT: [[TMP106:%.*]] = load <2 x i8>, ptr [[PIX1]], align 1 -; CHECK-NEXT: [[TMP107:%.*]] = zext <2 x i8> [[TMP106]] to <2 x i32> -; CHECK-NEXT: [[TMP108:%.*]] = shufflevector <2 x i32> [[TMP107]], <2 x i32> poison, <2 x i32> -; CHECK-NEXT: [[TMP109:%.*]] = insertelement <2 x ptr> [[TMP4]], ptr [[ARRAYIDX22]], i32 1 -; CHECK-NEXT: [[TMP110:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> [[TMP109]], i32 1, <2 x i1> , <2 x i8> poison) -; CHECK-NEXT: [[TMP111:%.*]] = zext <2 x i8> [[TMP110]] to <2 x i32> -; CHECK-NEXT: [[TMP112:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> [[TMP2]], i32 1, <2 x i1> , <2 x i8> poison) -; CHECK-NEXT: [[TMP113:%.*]] = zext <2 x i8> [[TMP112]] to <2 x i32> -; CHECK-NEXT: [[TMP114:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> [[TMP5]], i32 1, <2 x i1> , <2 x i8> poison) -; CHECK-NEXT: [[TMP115:%.*]] = zext <2 x i8> [[TMP114]] to <2 x i32> -; CHECK-NEXT: [[TMP116:%.*]] = sub <2 x i32> [[TMP113]], [[TMP115]] -; CHECK-NEXT: [[TMP117:%.*]] = shl <2 x i32> [[TMP116]], -; CHECK-NEXT: [[TMP118:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> [[TMP6]], i32 1, <2 x i1> , <2 x i8> poison) -; CHECK-NEXT: [[TMP119:%.*]] = zext <2 x i8> [[TMP118]] to <2 x i32> -; CHECK-NEXT: [[TMP120:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> [[TMP7]], i32 1, <2 x i1> , <2 x i8> poison) -; CHECK-NEXT: [[TMP121:%.*]] = zext <2 x i8> [[TMP120]] to <2 x i32> -; CHECK-NEXT: [[TMP122:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> [[TMP8]], i32 1, <2 x i1> , <2 x i8> poison) -; CHECK-NEXT: [[TMP123:%.*]] = zext <2 x i8> [[TMP122]] to <2 x i32> -; CHECK-NEXT: [[TMP124:%.*]] = sub <2 x i32> [[TMP121]], [[TMP123]] -; CHECK-NEXT: [[TMP125:%.*]] = shl <2 x i32> [[TMP124]], -; CHECK-NEXT: [[TMP126:%.*]] = shufflevector <2 x i32> [[TMP107]], <2 x i32> [[TMP98]], <2 x i32> -; CHECK-NEXT: [[TMP127:%.*]] = sub <2 x i32> [[TMP126]], [[TMP111]] -; CHECK-NEXT: [[TMP128:%.*]] = add <2 x i32> [[TMP117]], [[TMP127]] -; CHECK-NEXT: [[TMP129:%.*]] = shufflevector <2 x i32> [[TMP108]], <2 x i32> [[TMP98]], <2 x i32> -; CHECK-NEXT: [[TMP130:%.*]] = sub <2 x i32> [[TMP129]], [[TMP119]] -; CHECK-NEXT: [[TMP131:%.*]] = add <2 x i32> [[TMP125]], [[TMP130]] -; CHECK-NEXT: [[TMP132:%.*]] = extractelement <2 x i32> [[TMP128]], i32 1 -; CHECK-NEXT: [[TMP133:%.*]] = extractelement <2 x i32> [[TMP131]], i32 1 -; CHECK-NEXT: [[ADD46:%.*]] = add i32 [[TMP133]], [[TMP132]] -; CHECK-NEXT: [[TMP134:%.*]] = sub <2 x i32> [[TMP128]], [[TMP131]] -; CHECK-NEXT: [[TMP135:%.*]] = extractelement <2 x i32> [[TMP128]], i32 0 -; CHECK-NEXT: [[TMP136:%.*]] = extractelement <2 x i32> [[TMP131]], i32 0 -; CHECK-NEXT: [[ADD44:%.*]] = add i32 [[TMP136]], [[TMP135]] -; CHECK-NEXT: [[TMP137:%.*]] = lshr <2 x i32> [[TMP108]], -; CHECK-NEXT: [[TMP138:%.*]] = and <2 x i32> [[TMP137]], -; CHECK-NEXT: [[TMP139:%.*]] = mul <2 x i32> [[TMP138]], -; CHECK-NEXT: [[TMP140:%.*]] = extractelement <2 x i32> [[TMP134]], i32 0 -; CHECK-NEXT: [[TMP141:%.*]] = extractelement <2 x i32> [[TMP134]], i32 1 -; CHECK-NEXT: [[SUB59:%.*]] = sub i32 [[TMP140]], [[TMP141]] -; CHECK-NEXT: [[TMP142:%.*]] = load <2 x i8>, ptr [[ARRAYIDX8_1]], align 1 -; CHECK-NEXT: [[TMP143:%.*]] = zext <2 x i8> [[TMP142]] to <2 x i32> +; CHECK-NEXT: [[TMP94:%.*]] = extractelement <2 x i32> [[TMP87]], i32 0 +; CHECK-NEXT: [[TMP95:%.*]] = extractelement <2 x i32> [[TMP87]], i32 1 +; CHECK-NEXT: [[ADD94_2:%.*]] = add i32 [[TMP94]], [[TMP95]] +; CHECK-NEXT: [[TMP96:%.*]] = load <2 x i8>, ptr [[ARRAYIDX20]], align 1 +; CHECK-NEXT: [[TMP97:%.*]] = zext <2 x i8> [[TMP96]] to <2 x i32> +; CHECK-NEXT: [[TMP98:%.*]] = insertelement <2 x i32> poison, i32 [[SUB59_2]], i32 0 +; CHECK-NEXT: [[TMP99:%.*]] = shufflevector <2 x i32> [[TMP98]], <2 x i32> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP100:%.*]] = insertelement <2 x i32> poison, i32 [[SUB59_3]], i32 0 +; CHECK-NEXT: [[TMP101:%.*]] = shufflevector <2 x i32> [[TMP100]], <2 x i32> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP102:%.*]] = add <2 x i32> [[TMP99]], [[TMP101]] +; CHECK-NEXT: [[TMP103:%.*]] = sub <2 x i32> [[TMP99]], [[TMP101]] +; CHECK-NEXT: [[TMP104:%.*]] = shufflevector <2 x i32> [[TMP102]], <2 x i32> [[TMP103]], <2 x i32> +; CHECK-NEXT: [[TMP105:%.*]] = load <2 x i8>, ptr [[PIX1]], align 1 +; CHECK-NEXT: [[TMP106:%.*]] = zext <2 x i8> [[TMP105]] to <2 x i32> +; CHECK-NEXT: [[TMP107:%.*]] = shufflevector <2 x i32> [[TMP106]], <2 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP108:%.*]] = insertelement <2 x ptr> [[TMP4]], ptr [[ARRAYIDX22]], i32 1 +; CHECK-NEXT: [[TMP109:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> [[TMP108]], i32 1, <2 x i1> , <2 x i8> poison) +; CHECK-NEXT: [[TMP110:%.*]] = zext <2 x i8> [[TMP109]] to <2 x i32> +; CHECK-NEXT: [[TMP111:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> [[TMP2]], i32 1, <2 x i1> , <2 x i8> poison) +; CHECK-NEXT: [[TMP112:%.*]] = zext <2 x i8> [[TMP111]] to <2 x i32> +; CHECK-NEXT: [[TMP113:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> [[TMP5]], i32 1, <2 x i1> , <2 x i8> poison) +; CHECK-NEXT: [[TMP114:%.*]] = zext <2 x i8> [[TMP113]] to <2 x i32> +; CHECK-NEXT: [[TMP115:%.*]] = sub <2 x i32> [[TMP112]], [[TMP114]] +; CHECK-NEXT: [[TMP116:%.*]] = shl <2 x i32> [[TMP115]], +; CHECK-NEXT: [[TMP117:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> [[TMP6]], i32 1, <2 x i1> , <2 x i8> poison) +; CHECK-NEXT: [[TMP118:%.*]] = zext <2 x i8> [[TMP117]] to <2 x i32> +; CHECK-NEXT: [[TMP119:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> [[TMP7]], i32 1, <2 x i1> , <2 x i8> poison) +; CHECK-NEXT: [[TMP120:%.*]] = zext <2 x i8> [[TMP119]] to <2 x i32> +; CHECK-NEXT: [[TMP121:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> [[TMP8]], i32 1, <2 x i1> , <2 x i8> poison) +; CHECK-NEXT: [[TMP122:%.*]] = zext <2 x i8> [[TMP121]] to <2 x i32> +; CHECK-NEXT: [[TMP123:%.*]] = sub <2 x i32> [[TMP120]], [[TMP122]] +; CHECK-NEXT: [[TMP124:%.*]] = shl <2 x i32> [[TMP123]], +; CHECK-NEXT: [[TMP125:%.*]] = shufflevector <2 x i32> [[TMP106]], <2 x i32> [[TMP97]], <2 x i32> +; CHECK-NEXT: [[TMP126:%.*]] = sub <2 x i32> [[TMP125]], [[TMP110]] +; CHECK-NEXT: [[TMP127:%.*]] = add <2 x i32> [[TMP116]], [[TMP126]] +; CHECK-NEXT: [[TMP128:%.*]] = shufflevector <2 x i32> [[TMP107]], <2 x i32> [[TMP97]], <2 x i32> +; CHECK-NEXT: [[TMP129:%.*]] = sub <2 x i32> [[TMP128]], [[TMP118]] +; CHECK-NEXT: [[TMP130:%.*]] = add <2 x i32> [[TMP124]], [[TMP129]] +; CHECK-NEXT: [[TMP131:%.*]] = extractelement <2 x i32> [[TMP127]], i32 1 +; CHECK-NEXT: [[TMP132:%.*]] = extractelement <2 x i32> [[TMP130]], i32 1 +; CHECK-NEXT: [[ADD46:%.*]] = add i32 [[TMP132]], [[TMP131]] +; CHECK-NEXT: [[TMP133:%.*]] = sub <2 x i32> [[TMP127]], [[TMP130]] +; CHECK-NEXT: [[TMP134:%.*]] = extractelement <2 x i32> [[TMP127]], i32 0 +; CHECK-NEXT: [[TMP135:%.*]] = extractelement <2 x i32> [[TMP130]], i32 0 +; CHECK-NEXT: [[ADD44:%.*]] = add i32 [[TMP135]], [[TMP134]] +; CHECK-NEXT: [[TMP136:%.*]] = lshr <2 x i32> [[TMP107]], +; CHECK-NEXT: [[TMP137:%.*]] = and <2 x i32> [[TMP136]], +; CHECK-NEXT: [[TMP138:%.*]] = mul <2 x i32> [[TMP137]], +; CHECK-NEXT: [[TMP139:%.*]] = extractelement <2 x i32> [[TMP133]], i32 0 +; CHECK-NEXT: [[TMP140:%.*]] = extractelement <2 x i32> [[TMP133]], i32 1 +; CHECK-NEXT: [[SUB59:%.*]] = sub i32 [[TMP139]], [[TMP140]] +; CHECK-NEXT: [[TMP141:%.*]] = load <2 x i8>, ptr [[ARRAYIDX8_1]], align 1 +; CHECK-NEXT: [[TMP142:%.*]] = zext <2 x i8> [[TMP141]] to <2 x i32> ; CHECK-NEXT: [[ADD_PTR644:%.*]] = getelementptr i8, ptr [[PIX2]], i64 [[IDX_EXT63]] ; CHECK-NEXT: [[ARRAYIDX22_1:%.*]] = getelementptr i8, ptr [[ADD_PTR644]], i64 2 -; CHECK-NEXT: [[TMP144:%.*]] = insertelement <2 x ptr> poison, ptr [[ADD_PTR644]], i32 0 -; CHECK-NEXT: [[TMP145:%.*]] = insertelement <2 x ptr> [[TMP144]], ptr [[ARRAYIDX22_1]], i32 1 -; CHECK-NEXT: [[TMP146:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> [[TMP145]], i32 1, <2 x i1> , <2 x i8> poison) -; CHECK-NEXT: [[TMP147:%.*]] = zext <2 x i8> [[TMP146]] to <2 x i32> -; CHECK-NEXT: [[TMP148:%.*]] = insertelement <2 x ptr> poison, ptr [[ADD_PTR3]], i32 0 -; CHECK-NEXT: [[TMP149:%.*]] = shufflevector <2 x ptr> [[TMP148]], <2 x ptr> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP150:%.*]] = getelementptr i8, <2 x ptr> [[TMP149]], <2 x i64> -; CHECK-NEXT: [[TMP151:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> [[TMP150]], i32 1, <2 x i1> , <2 x i8> poison) -; CHECK-NEXT: [[TMP152:%.*]] = zext <2 x i8> [[TMP151]] to <2 x i32> -; CHECK-NEXT: [[TMP153:%.*]] = shufflevector <2 x ptr> [[TMP145]], <2 x ptr> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP154:%.*]] = getelementptr i8, <2 x ptr> [[TMP153]], <2 x i64> -; CHECK-NEXT: [[TMP155:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> [[TMP154]], i32 1, <2 x i1> , <2 x i8> poison) -; CHECK-NEXT: [[TMP156:%.*]] = zext <2 x i8> [[TMP155]] to <2 x i32> -; CHECK-NEXT: [[TMP157:%.*]] = sub <2 x i32> [[TMP152]], [[TMP156]] -; CHECK-NEXT: [[TMP158:%.*]] = shl <2 x i32> [[TMP157]], -; CHECK-NEXT: [[TMP159:%.*]] = getelementptr i8, <2 x ptr> [[TMP153]], <2 x i64> -; CHECK-NEXT: [[TMP160:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> [[TMP159]], i32 1, <2 x i1> , <2 x i8> poison) -; CHECK-NEXT: [[TMP161:%.*]] = zext <2 x i8> [[TMP160]] to <2 x i32> -; CHECK-NEXT: [[TMP162:%.*]] = getelementptr i8, <2 x ptr> [[TMP149]], <2 x i64> -; CHECK-NEXT: [[TMP163:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> [[TMP162]], i32 1, <2 x i1> , <2 x i8> poison) -; CHECK-NEXT: [[TMP164:%.*]] = zext <2 x i8> [[TMP163]] to <2 x i32> -; CHECK-NEXT: [[TMP165:%.*]] = getelementptr i8, <2 x ptr> [[TMP153]], <2 x i64> -; CHECK-NEXT: [[TMP166:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> [[TMP165]], i32 1, <2 x i1> , <2 x i8> poison) -; CHECK-NEXT: [[TMP167:%.*]] = zext <2 x i8> [[TMP166]] to <2 x i32> -; CHECK-NEXT: [[TMP168:%.*]] = sub <2 x i32> [[TMP164]], [[TMP167]] -; CHECK-NEXT: [[TMP169:%.*]] = shl <2 x i32> [[TMP168]], -; CHECK-NEXT: [[TMP170:%.*]] = insertelement <2 x i32> [[TMP143]], i32 [[CONV33_1]], i32 1 -; CHECK-NEXT: [[TMP171:%.*]] = sub <2 x i32> [[TMP170]], [[TMP161]] -; CHECK-NEXT: [[TMP172:%.*]] = add <2 x i32> [[TMP169]], [[TMP171]] -; CHECK-NEXT: [[TMP173:%.*]] = insertelement <2 x i32> [[TMP143]], i32 [[CONV_1]], i32 0 -; CHECK-NEXT: [[TMP174:%.*]] = sub <2 x i32> [[TMP173]], [[TMP147]] -; CHECK-NEXT: [[TMP175:%.*]] = add <2 x i32> [[TMP158]], [[TMP174]] -; CHECK-NEXT: [[TMP176:%.*]] = add <2 x i32> [[TMP172]], [[TMP175]] -; CHECK-NEXT: [[TMP177:%.*]] = sub <2 x i32> [[TMP175]], [[TMP172]] -; CHECK-NEXT: [[TMP178:%.*]] = extractelement <2 x i32> [[TMP176]], i32 0 -; CHECK-NEXT: [[TMP179:%.*]] = extractelement <2 x i32> [[TMP176]], i32 1 -; CHECK-NEXT: [[SUB51_1:%.*]] = sub i32 [[TMP178]], [[TMP179]] -; CHECK-NEXT: [[TMP180:%.*]] = shufflevector <2 x i32> [[TMP177]], <2 x i32> [[TMP134]], <2 x i32> -; CHECK-NEXT: [[TMP181:%.*]] = shufflevector <2 x i32> [[TMP177]], <2 x i32> [[TMP134]], <2 x i32> -; CHECK-NEXT: [[TMP182:%.*]] = add <2 x i32> [[TMP180]], [[TMP181]] -; CHECK-NEXT: [[TMP183:%.*]] = extractelement <2 x i32> [[TMP177]], i32 0 -; CHECK-NEXT: [[TMP184:%.*]] = extractelement <2 x i32> [[TMP177]], i32 1 -; CHECK-NEXT: [[SUB59_1:%.*]] = sub i32 [[TMP183]], [[TMP184]] -; CHECK-NEXT: [[SHR_I54:%.*]] = lshr i32 [[TMP179]], 15 +; CHECK-NEXT: [[TMP143:%.*]] = insertelement <2 x ptr> poison, ptr [[ADD_PTR644]], i32 0 +; CHECK-NEXT: [[TMP144:%.*]] = insertelement <2 x ptr> [[TMP143]], ptr [[ARRAYIDX22_1]], i32 1 +; CHECK-NEXT: [[TMP145:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> [[TMP144]], i32 1, <2 x i1> , <2 x i8> poison) +; CHECK-NEXT: [[TMP146:%.*]] = zext <2 x i8> [[TMP145]] to <2 x i32> +; CHECK-NEXT: [[TMP147:%.*]] = insertelement <2 x ptr> poison, ptr [[ADD_PTR3]], i32 0 +; CHECK-NEXT: [[TMP148:%.*]] = shufflevector <2 x ptr> [[TMP147]], <2 x ptr> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP149:%.*]] = getelementptr i8, <2 x ptr> [[TMP148]], <2 x i64> +; CHECK-NEXT: [[TMP150:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> [[TMP149]], i32 1, <2 x i1> , <2 x i8> poison) +; CHECK-NEXT: [[TMP151:%.*]] = zext <2 x i8> [[TMP150]] to <2 x i32> +; CHECK-NEXT: [[TMP152:%.*]] = shufflevector <2 x ptr> [[TMP144]], <2 x ptr> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP153:%.*]] = getelementptr i8, <2 x ptr> [[TMP152]], <2 x i64> +; CHECK-NEXT: [[TMP154:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> [[TMP153]], i32 1, <2 x i1> , <2 x i8> poison) +; CHECK-NEXT: [[TMP155:%.*]] = zext <2 x i8> [[TMP154]] to <2 x i32> +; CHECK-NEXT: [[TMP156:%.*]] = sub <2 x i32> [[TMP151]], [[TMP155]] +; CHECK-NEXT: [[TMP157:%.*]] = shl <2 x i32> [[TMP156]], +; CHECK-NEXT: [[TMP158:%.*]] = getelementptr i8, <2 x ptr> [[TMP152]], <2 x i64> +; CHECK-NEXT: [[TMP159:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> [[TMP158]], i32 1, <2 x i1> , <2 x i8> poison) +; CHECK-NEXT: [[TMP160:%.*]] = zext <2 x i8> [[TMP159]] to <2 x i32> +; CHECK-NEXT: [[TMP161:%.*]] = getelementptr i8, <2 x ptr> [[TMP148]], <2 x i64> +; CHECK-NEXT: [[TMP162:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> [[TMP161]], i32 1, <2 x i1> , <2 x i8> poison) +; CHECK-NEXT: [[TMP163:%.*]] = zext <2 x i8> [[TMP162]] to <2 x i32> +; CHECK-NEXT: [[TMP164:%.*]] = getelementptr i8, <2 x ptr> [[TMP152]], <2 x i64> +; CHECK-NEXT: [[TMP165:%.*]] = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> [[TMP164]], i32 1, <2 x i1> , <2 x i8> poison) +; CHECK-NEXT: [[TMP166:%.*]] = zext <2 x i8> [[TMP165]] to <2 x i32> +; CHECK-NEXT: [[TMP167:%.*]] = sub <2 x i32> [[TMP163]], [[TMP166]] +; CHECK-NEXT: [[TMP168:%.*]] = shl <2 x i32> [[TMP167]], +; CHECK-NEXT: [[TMP169:%.*]] = insertelement <2 x i32> [[TMP142]], i32 [[CONV33_1]], i32 1 +; CHECK-NEXT: [[TMP170:%.*]] = sub <2 x i32> [[TMP169]], [[TMP160]] +; CHECK-NEXT: [[TMP171:%.*]] = add <2 x i32> [[TMP168]], [[TMP170]] +; CHECK-NEXT: [[TMP172:%.*]] = insertelement <2 x i32> [[TMP142]], i32 [[CONV_1]], i32 0 +; CHECK-NEXT: [[TMP173:%.*]] = sub <2 x i32> [[TMP172]], [[TMP146]] +; CHECK-NEXT: [[TMP174:%.*]] = add <2 x i32> [[TMP157]], [[TMP173]] +; CHECK-NEXT: [[TMP175:%.*]] = add <2 x i32> [[TMP171]], [[TMP174]] +; CHECK-NEXT: [[TMP176:%.*]] = sub <2 x i32> [[TMP174]], [[TMP171]] +; CHECK-NEXT: [[TMP177:%.*]] = extractelement <2 x i32> [[TMP175]], i32 0 +; CHECK-NEXT: [[TMP178:%.*]] = extractelement <2 x i32> [[TMP175]], i32 1 +; CHECK-NEXT: [[SUB51_1:%.*]] = sub i32 [[TMP177]], [[TMP178]] +; CHECK-NEXT: [[TMP179:%.*]] = shufflevector <2 x i32> [[TMP176]], <2 x i32> [[TMP133]], <2 x i32> +; CHECK-NEXT: [[TMP180:%.*]] = shufflevector <2 x i32> [[TMP176]], <2 x i32> [[TMP133]], <2 x i32> +; CHECK-NEXT: [[TMP181:%.*]] = add <2 x i32> [[TMP179]], [[TMP180]] +; CHECK-NEXT: [[TMP182:%.*]] = extractelement <2 x i32> [[TMP176]], i32 0 +; CHECK-NEXT: [[TMP183:%.*]] = extractelement <2 x i32> [[TMP176]], i32 1 +; CHECK-NEXT: [[SUB59_1:%.*]] = sub i32 [[TMP182]], [[TMP183]] +; CHECK-NEXT: [[SHR_I54:%.*]] = lshr i32 [[TMP178]], 15 ; CHECK-NEXT: [[AND_I55:%.*]] = and i32 [[SHR_I54]], 65537 ; CHECK-NEXT: [[MUL_I56:%.*]] = mul i32 [[AND_I55]], 65535 -; CHECK-NEXT: [[TMP185:%.*]] = lshr <2 x i32> [[TMP143]], -; CHECK-NEXT: [[TMP186:%.*]] = and <2 x i32> [[TMP185]], -; CHECK-NEXT: [[TMP187:%.*]] = mul <2 x i32> [[TMP186]], -; CHECK-NEXT: [[TMP188:%.*]] = insertelement <2 x i32> poison, i32 [[SUB59_1]], i32 0 -; CHECK-NEXT: [[TMP189:%.*]] = shufflevector <2 x i32> [[TMP188]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP190:%.*]] = extractelement <2 x i32> [[TMP182]], i32 0 -; CHECK-NEXT: [[TMP191:%.*]] = extractelement <2 x i32> [[TMP182]], i32 1 -; CHECK-NEXT: [[ADD78_1:%.*]] = add i32 [[TMP190]], [[TMP191]] -; CHECK-NEXT: [[TMP192:%.*]] = shufflevector <2 x i32> [[TMP33]], <2 x i32> [[TMP177]], <2 x i32> -; CHECK-NEXT: [[TMP193:%.*]] = lshr <2 x i32> [[TMP192]], -; CHECK-NEXT: [[TMP194:%.*]] = and <2 x i32> [[TMP193]], -; CHECK-NEXT: [[TMP195:%.*]] = mul <2 x i32> [[TMP194]], -; CHECK-NEXT: [[TMP196:%.*]] = insertelement <2 x i32> poison, i32 [[ADD78_1]], i32 0 -; CHECK-NEXT: [[TMP197:%.*]] = shufflevector <2 x i32> [[TMP196]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP198:%.*]] = insertelement <2 x i32> poison, i32 [[ADD94_2]], i32 0 -; CHECK-NEXT: [[TMP199:%.*]] = shufflevector <2 x i32> [[TMP198]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP200:%.*]] = insertelement <2 x i32> poison, i32 [[ADD44]], i32 0 -; CHECK-NEXT: [[TMP201:%.*]] = shufflevector <2 x i32> [[TMP200]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP202:%.*]] = insertelement <2 x i32> , i32 [[ADD46]], i32 1 -; CHECK-NEXT: [[TMP203:%.*]] = lshr <2 x i32> [[TMP201]], [[TMP202]] -; CHECK-NEXT: [[TMP204:%.*]] = sub <2 x i32> [[TMP201]], [[TMP202]] -; CHECK-NEXT: [[TMP205:%.*]] = shufflevector <2 x i32> [[TMP203]], <2 x i32> [[TMP204]], <2 x i32> -; CHECK-NEXT: [[TMP206:%.*]] = extractelement <2 x i32> [[TMP205]], i32 1 -; CHECK-NEXT: [[ADD78_2:%.*]] = add i32 [[SUB51_1]], [[TMP206]] -; CHECK-NEXT: [[TMP207:%.*]] = insertelement <2 x i32> , i32 [[SUB51_1]], i32 1 -; CHECK-NEXT: [[TMP208:%.*]] = and <2 x i32> [[TMP205]], [[TMP207]] -; CHECK-NEXT: [[TMP209:%.*]] = sub <2 x i32> [[TMP205]], [[TMP207]] -; CHECK-NEXT: [[TMP210:%.*]] = shufflevector <2 x i32> [[TMP208]], <2 x i32> [[TMP209]], <2 x i32> -; CHECK-NEXT: [[TMP211:%.*]] = insertelement <2 x i32> poison, i32 [[ADD78_2]], i32 0 -; CHECK-NEXT: [[TMP212:%.*]] = shufflevector <2 x i32> [[TMP211]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP213:%.*]] = add <2 x i32> [[TMP212]], [[TMP199]] -; CHECK-NEXT: [[TMP214:%.*]] = sub <2 x i32> [[TMP212]], [[TMP199]] -; CHECK-NEXT: [[TMP215:%.*]] = shufflevector <2 x i32> [[TMP213]], <2 x i32> [[TMP214]], <2 x i32> -; CHECK-NEXT: [[TMP216:%.*]] = insertelement <2 x i32> [[TMP134]], i32 [[CONV_1]], i32 0 -; CHECK-NEXT: [[TMP217:%.*]] = lshr <2 x i32> [[TMP216]], -; CHECK-NEXT: [[TMP218:%.*]] = and <2 x i32> [[TMP217]], -; CHECK-NEXT: [[TMP219:%.*]] = mul <2 x i32> [[TMP218]], -; CHECK-NEXT: [[TMP220:%.*]] = shufflevector <2 x i32> [[TMP88]], <2 x i32> poison, <2 x i32> -; CHECK-NEXT: [[TMP221:%.*]] = shufflevector <2 x i32> [[TMP220]], <2 x i32> [[TMP182]], <2 x i32> -; CHECK-NEXT: [[TMP222:%.*]] = shufflevector <2 x i32> [[TMP88]], <2 x i32> [[TMP182]], <2 x i32> -; CHECK-NEXT: [[TMP223:%.*]] = sub <2 x i32> [[TMP221]], [[TMP222]] -; CHECK-NEXT: [[TMP224:%.*]] = shufflevector <2 x i32> [[TMP47]], <2 x i32> poison, <2 x i32> -; CHECK-NEXT: [[TMP225:%.*]] = insertelement <2 x i32> [[TMP224]], i32 [[ADD46]], i32 1 -; CHECK-NEXT: [[TMP226:%.*]] = insertelement <2 x i32> [[TMP47]], i32 [[ADD44]], i32 1 -; CHECK-NEXT: [[TMP227:%.*]] = add <2 x i32> [[TMP225]], [[TMP226]] -; CHECK-NEXT: [[TMP228:%.*]] = shufflevector <2 x i32> [[TMP80]], <2 x i32> [[TMP176]], <2 x i32> -; CHECK-NEXT: [[TMP229:%.*]] = shufflevector <2 x i32> [[TMP80]], <2 x i32> [[TMP176]], <2 x i32> -; CHECK-NEXT: [[TMP230:%.*]] = add <2 x i32> [[TMP228]], [[TMP229]] -; CHECK-NEXT: [[TMP231:%.*]] = extractelement <2 x i32> [[TMP227]], i32 0 -; CHECK-NEXT: [[TMP232:%.*]] = extractelement <2 x i32> [[TMP230]], i32 0 -; CHECK-NEXT: [[ADD94_1:%.*]] = add i32 [[TMP232]], [[TMP231]] -; CHECK-NEXT: [[TMP233:%.*]] = insertelement <2 x i32> [[TMP14]], i32 [[ADD46]], i32 1 -; CHECK-NEXT: [[TMP234:%.*]] = lshr <2 x i32> [[TMP233]], -; CHECK-NEXT: [[TMP235:%.*]] = and <2 x i32> [[TMP234]], -; CHECK-NEXT: [[TMP236:%.*]] = mul <2 x i32> [[TMP235]], -; CHECK-NEXT: [[TMP237:%.*]] = extractelement <2 x i32> [[TMP227]], i32 1 -; CHECK-NEXT: [[TMP238:%.*]] = extractelement <2 x i32> [[TMP230]], i32 1 -; CHECK-NEXT: [[ADD78:%.*]] = add i32 [[TMP238]], [[TMP237]] -; CHECK-NEXT: [[TMP239:%.*]] = sub <2 x i32> [[TMP227]], [[TMP230]] +; CHECK-NEXT: [[TMP184:%.*]] = lshr <2 x i32> [[TMP142]], +; CHECK-NEXT: [[TMP185:%.*]] = and <2 x i32> [[TMP184]], +; CHECK-NEXT: [[TMP186:%.*]] = mul <2 x i32> [[TMP185]], +; CHECK-NEXT: [[TMP187:%.*]] = insertelement <2 x i32> poison, i32 [[SUB59_1]], i32 0 +; CHECK-NEXT: [[TMP188:%.*]] = shufflevector <2 x i32> [[TMP187]], <2 x i32> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP189:%.*]] = extractelement <2 x i32> [[TMP181]], i32 0 +; CHECK-NEXT: [[TMP190:%.*]] = extractelement <2 x i32> [[TMP181]], i32 1 +; CHECK-NEXT: [[ADD78_1:%.*]] = add i32 [[TMP189]], [[TMP190]] +; CHECK-NEXT: [[TMP191:%.*]] = shufflevector <2 x i32> [[TMP33]], <2 x i32> [[TMP176]], <2 x i32> +; CHECK-NEXT: [[TMP192:%.*]] = lshr <2 x i32> [[TMP191]], +; CHECK-NEXT: [[TMP193:%.*]] = and <2 x i32> [[TMP192]], +; CHECK-NEXT: [[TMP194:%.*]] = mul <2 x i32> [[TMP193]], +; CHECK-NEXT: [[TMP195:%.*]] = insertelement <2 x i32> poison, i32 [[ADD78_1]], i32 0 +; CHECK-NEXT: [[TMP196:%.*]] = shufflevector <2 x i32> [[TMP195]], <2 x i32> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP197:%.*]] = insertelement <2 x i32> poison, i32 [[ADD94_2]], i32 0 +; CHECK-NEXT: [[TMP198:%.*]] = shufflevector <2 x i32> [[TMP197]], <2 x i32> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP199:%.*]] = insertelement <2 x i32> poison, i32 [[ADD44]], i32 0 +; CHECK-NEXT: [[TMP200:%.*]] = shufflevector <2 x i32> [[TMP199]], <2 x i32> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP201:%.*]] = insertelement <2 x i32> , i32 [[ADD46]], i32 1 +; CHECK-NEXT: [[TMP202:%.*]] = lshr <2 x i32> [[TMP200]], [[TMP201]] +; CHECK-NEXT: [[TMP203:%.*]] = sub <2 x i32> [[TMP200]], [[TMP201]] +; CHECK-NEXT: [[TMP204:%.*]] = shufflevector <2 x i32> [[TMP202]], <2 x i32> [[TMP203]], <2 x i32> +; CHECK-NEXT: [[TMP205:%.*]] = extractelement <2 x i32> [[TMP204]], i32 1 +; CHECK-NEXT: [[ADD78_2:%.*]] = add i32 [[SUB51_1]], [[TMP205]] +; CHECK-NEXT: [[TMP206:%.*]] = insertelement <2 x i32> , i32 [[SUB51_1]], i32 1 +; CHECK-NEXT: [[TMP207:%.*]] = and <2 x i32> [[TMP204]], [[TMP206]] +; CHECK-NEXT: [[TMP208:%.*]] = sub <2 x i32> [[TMP204]], [[TMP206]] +; CHECK-NEXT: [[TMP209:%.*]] = shufflevector <2 x i32> [[TMP207]], <2 x i32> [[TMP208]], <2 x i32> +; CHECK-NEXT: [[TMP210:%.*]] = insertelement <2 x i32> poison, i32 [[ADD78_2]], i32 0 +; CHECK-NEXT: [[TMP211:%.*]] = shufflevector <2 x i32> [[TMP210]], <2 x i32> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP212:%.*]] = add <2 x i32> [[TMP211]], [[TMP198]] +; CHECK-NEXT: [[TMP213:%.*]] = sub <2 x i32> [[TMP211]], [[TMP198]] +; CHECK-NEXT: [[TMP214:%.*]] = shufflevector <2 x i32> [[TMP212]], <2 x i32> [[TMP213]], <2 x i32> +; CHECK-NEXT: [[TMP215:%.*]] = insertelement <2 x i32> [[TMP133]], i32 [[CONV_1]], i32 0 +; CHECK-NEXT: [[TMP216:%.*]] = lshr <2 x i32> [[TMP215]], +; CHECK-NEXT: [[TMP217:%.*]] = and <2 x i32> [[TMP216]], +; CHECK-NEXT: [[TMP218:%.*]] = mul <2 x i32> [[TMP217]], +; CHECK-NEXT: [[TMP219:%.*]] = shufflevector <2 x i32> [[TMP87]], <2 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP220:%.*]] = shufflevector <2 x i32> [[TMP219]], <2 x i32> [[TMP181]], <2 x i32> +; CHECK-NEXT: [[TMP221:%.*]] = shufflevector <2 x i32> [[TMP87]], <2 x i32> [[TMP181]], <2 x i32> +; CHECK-NEXT: [[TMP222:%.*]] = sub <2 x i32> [[TMP220]], [[TMP221]] +; CHECK-NEXT: [[TMP223:%.*]] = shufflevector <2 x i32> [[TMP47]], <2 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP224:%.*]] = insertelement <2 x i32> [[TMP223]], i32 [[ADD46]], i32 1 +; CHECK-NEXT: [[TMP225:%.*]] = insertelement <2 x i32> [[TMP47]], i32 [[ADD44]], i32 1 +; CHECK-NEXT: [[TMP226:%.*]] = add <2 x i32> [[TMP224]], [[TMP225]] +; CHECK-NEXT: [[TMP227:%.*]] = shufflevector <2 x i32> [[TMP79]], <2 x i32> [[TMP175]], <2 x i32> +; CHECK-NEXT: [[TMP228:%.*]] = shufflevector <2 x i32> [[TMP79]], <2 x i32> [[TMP175]], <2 x i32> +; CHECK-NEXT: [[TMP229:%.*]] = add <2 x i32> [[TMP227]], [[TMP228]] +; CHECK-NEXT: [[TMP230:%.*]] = extractelement <2 x i32> [[TMP226]], i32 0 +; CHECK-NEXT: [[TMP231:%.*]] = extractelement <2 x i32> [[TMP229]], i32 0 +; CHECK-NEXT: [[ADD94_1:%.*]] = add i32 [[TMP231]], [[TMP230]] +; CHECK-NEXT: [[TMP232:%.*]] = insertelement <2 x i32> [[TMP14]], i32 [[ADD46]], i32 1 +; CHECK-NEXT: [[TMP233:%.*]] = lshr <2 x i32> [[TMP232]], +; CHECK-NEXT: [[TMP234:%.*]] = and <2 x i32> [[TMP233]], +; CHECK-NEXT: [[TMP235:%.*]] = mul <2 x i32> [[TMP234]], +; CHECK-NEXT: [[TMP236:%.*]] = extractelement <2 x i32> [[TMP226]], i32 1 +; CHECK-NEXT: [[TMP237:%.*]] = extractelement <2 x i32> [[TMP229]], i32 1 +; CHECK-NEXT: [[ADD78:%.*]] = add i32 [[TMP237]], [[TMP236]] +; CHECK-NEXT: [[TMP238:%.*]] = sub <2 x i32> [[TMP226]], [[TMP229]] ; CHECK-NEXT: [[ADD103:%.*]] = add i32 [[ADD94]], [[ADD78]] ; CHECK-NEXT: [[SUB104:%.*]] = sub i32 [[ADD78]], [[ADD94]] -; CHECK-NEXT: [[TMP240:%.*]] = extractelement <2 x i32> [[TMP239]], i32 1 -; CHECK-NEXT: [[ADD105:%.*]] = add i32 [[SUB102]], [[TMP240]] +; CHECK-NEXT: [[TMP239:%.*]] = extractelement <2 x i32> [[TMP238]], i32 1 +; CHECK-NEXT: [[ADD105:%.*]] = add i32 [[SUB102]], [[TMP239]] ; CHECK-NEXT: [[ADD_I:%.*]] = add i32 [[MUL_I]], [[ADD103]] -; CHECK-NEXT: [[XOR_I:%.*]] = xor i32 [[ADD_I]], [[TMP93]] +; CHECK-NEXT: [[XOR_I:%.*]] = xor i32 [[ADD_I]], [[TMP92]] ; CHECK-NEXT: [[ADD_I52:%.*]] = add i32 [[MUL_I51]], [[ADD105]] -; CHECK-NEXT: [[XOR_I53:%.*]] = xor i32 [[ADD_I52]], [[TMP94]] +; CHECK-NEXT: [[XOR_I53:%.*]] = xor i32 [[ADD_I52]], [[TMP93]] ; CHECK-NEXT: [[ADD_I57:%.*]] = add i32 [[MUL_I56]], [[SUB104]] -; CHECK-NEXT: [[XOR_I58:%.*]] = xor i32 [[ADD_I57]], [[TMP179]] +; CHECK-NEXT: [[XOR_I58:%.*]] = xor i32 [[ADD_I57]], [[TMP178]] ; CHECK-NEXT: [[ADD110:%.*]] = add i32 [[XOR_I53]], [[XOR_I]] ; CHECK-NEXT: [[ADD112:%.*]] = add i32 [[ADD110]], [[XOR_I58]] -; CHECK-NEXT: [[TMP241:%.*]] = shufflevector <2 x i32> [[TMP223]], <2 x i32> poison, <2 x i32> -; CHECK-NEXT: [[TMP242:%.*]] = insertelement <2 x i32> [[TMP241]], i32 [[SUB102]], i32 1 -; CHECK-NEXT: [[TMP243:%.*]] = add <2 x i32> [[TMP239]], [[TMP242]] -; CHECK-NEXT: [[TMP244:%.*]] = sub <2 x i32> [[TMP239]], [[TMP242]] -; CHECK-NEXT: [[TMP245:%.*]] = shufflevector <2 x i32> [[TMP243]], <2 x i32> [[TMP244]], <2 x i32> -; CHECK-NEXT: [[TMP246:%.*]] = add <2 x i32> [[TMP236]], [[TMP245]] -; CHECK-NEXT: [[TMP247:%.*]] = xor <2 x i32> [[TMP246]], [[TMP233]] -; CHECK-NEXT: [[TMP248:%.*]] = extractelement <2 x i32> [[TMP247]], i32 1 -; CHECK-NEXT: [[ADD113:%.*]] = add i32 [[ADD112]], [[TMP248]] -; CHECK-NEXT: [[TMP249:%.*]] = insertelement <2 x i32> poison, i32 [[ADD94_1]], i32 0 -; CHECK-NEXT: [[TMP250:%.*]] = shufflevector <2 x i32> [[TMP249]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP251:%.*]] = add <2 x i32> [[TMP197]], [[TMP250]] -; CHECK-NEXT: [[TMP252:%.*]] = sub <2 x i32> [[TMP197]], [[TMP250]] -; CHECK-NEXT: [[TMP253:%.*]] = shufflevector <2 x i32> [[TMP251]], <2 x i32> [[TMP252]], <2 x i32> -; CHECK-NEXT: [[TMP254:%.*]] = add <2 x i32> [[TMP195]], [[TMP253]] -; CHECK-NEXT: [[TMP255:%.*]] = xor <2 x i32> [[TMP254]], [[TMP192]] -; CHECK-NEXT: [[TMP256:%.*]] = extractelement <2 x i32> [[TMP247]], i32 0 -; CHECK-NEXT: [[ADD108_1:%.*]] = add i32 [[TMP256]], [[ADD113]] -; CHECK-NEXT: [[TMP257:%.*]] = extractelement <2 x i32> [[TMP255]], i32 0 -; CHECK-NEXT: [[ADD110_1:%.*]] = add i32 [[ADD108_1]], [[TMP257]] -; CHECK-NEXT: [[TMP258:%.*]] = extractelement <2 x i32> [[TMP255]], i32 1 -; CHECK-NEXT: [[ADD112_1:%.*]] = add i32 [[ADD110_1]], [[TMP258]] -; CHECK-NEXT: [[TMP259:%.*]] = shufflevector <2 x i32> [[TMP210]], <2 x i32> poison, <2 x i32> -; CHECK-NEXT: [[TMP260:%.*]] = shufflevector <2 x i32> [[TMP259]], <2 x i32> [[TMP239]], <2 x i32> -; CHECK-NEXT: [[TMP261:%.*]] = add <2 x i32> [[TMP223]], [[TMP260]] -; CHECK-NEXT: [[TMP262:%.*]] = sub <2 x i32> [[TMP223]], [[TMP260]] -; CHECK-NEXT: [[TMP263:%.*]] = shufflevector <2 x i32> [[TMP261]], <2 x i32> [[TMP262]], <2 x i32> -; CHECK-NEXT: [[TMP264:%.*]] = add <2 x i32> [[TMP219]], [[TMP263]] -; CHECK-NEXT: [[TMP265:%.*]] = xor <2 x i32> [[TMP264]], [[TMP216]] -; CHECK-NEXT: [[TMP266:%.*]] = extractelement <2 x i32> [[TMP265]], i32 1 -; CHECK-NEXT: [[ADD113_1:%.*]] = add i32 [[ADD112_1]], [[TMP266]] -; CHECK-NEXT: [[TMP267:%.*]] = shufflevector <2 x i32> , <2 x i32> [[TMP223]], <2 x i32> -; CHECK-NEXT: [[TMP268:%.*]] = mul <2 x i32> [[TMP210]], [[TMP267]] -; CHECK-NEXT: [[TMP269:%.*]] = sub <2 x i32> [[TMP210]], [[TMP267]] -; CHECK-NEXT: [[TMP270:%.*]] = shufflevector <2 x i32> [[TMP268]], <2 x i32> [[TMP269]], <2 x i32> -; CHECK-NEXT: [[TMP271:%.*]] = add <2 x i32> [[TMP187]], [[TMP215]] -; CHECK-NEXT: [[TMP272:%.*]] = xor <2 x i32> [[TMP271]], [[TMP143]] -; CHECK-NEXT: [[TMP273:%.*]] = extractelement <2 x i32> [[TMP270]], i32 0 -; CHECK-NEXT: [[TMP274:%.*]] = extractelement <2 x i32> [[TMP270]], i32 1 -; CHECK-NEXT: [[ADD_I62_2:%.*]] = add i32 [[TMP273]], [[TMP274]] +; CHECK-NEXT: [[TMP240:%.*]] = shufflevector <2 x i32> [[TMP222]], <2 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP241:%.*]] = insertelement <2 x i32> [[TMP240]], i32 [[SUB102]], i32 1 +; CHECK-NEXT: [[TMP242:%.*]] = add <2 x i32> [[TMP238]], [[TMP241]] +; CHECK-NEXT: [[TMP243:%.*]] = sub <2 x i32> [[TMP238]], [[TMP241]] +; CHECK-NEXT: [[TMP244:%.*]] = shufflevector <2 x i32> [[TMP242]], <2 x i32> [[TMP243]], <2 x i32> +; CHECK-NEXT: [[TMP245:%.*]] = add <2 x i32> [[TMP235]], [[TMP244]] +; CHECK-NEXT: [[TMP246:%.*]] = xor <2 x i32> [[TMP245]], [[TMP232]] +; CHECK-NEXT: [[TMP247:%.*]] = extractelement <2 x i32> [[TMP246]], i32 1 +; CHECK-NEXT: [[ADD113:%.*]] = add i32 [[ADD112]], [[TMP247]] +; CHECK-NEXT: [[TMP248:%.*]] = insertelement <2 x i32> poison, i32 [[ADD94_1]], i32 0 +; CHECK-NEXT: [[TMP249:%.*]] = shufflevector <2 x i32> [[TMP248]], <2 x i32> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP250:%.*]] = add <2 x i32> [[TMP196]], [[TMP249]] +; CHECK-NEXT: [[TMP251:%.*]] = sub <2 x i32> [[TMP196]], [[TMP249]] +; CHECK-NEXT: [[TMP252:%.*]] = shufflevector <2 x i32> [[TMP250]], <2 x i32> [[TMP251]], <2 x i32> +; CHECK-NEXT: [[TMP253:%.*]] = add <2 x i32> [[TMP194]], [[TMP252]] +; CHECK-NEXT: [[TMP254:%.*]] = xor <2 x i32> [[TMP253]], [[TMP191]] +; CHECK-NEXT: [[TMP255:%.*]] = extractelement <2 x i32> [[TMP246]], i32 0 +; CHECK-NEXT: [[ADD108_1:%.*]] = add i32 [[TMP255]], [[ADD113]] +; CHECK-NEXT: [[TMP256:%.*]] = extractelement <2 x i32> [[TMP254]], i32 0 +; CHECK-NEXT: [[ADD110_1:%.*]] = add i32 [[ADD108_1]], [[TMP256]] +; CHECK-NEXT: [[TMP257:%.*]] = extractelement <2 x i32> [[TMP254]], i32 1 +; CHECK-NEXT: [[ADD112_1:%.*]] = add i32 [[ADD110_1]], [[TMP257]] +; CHECK-NEXT: [[TMP258:%.*]] = shufflevector <2 x i32> [[TMP209]], <2 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP259:%.*]] = shufflevector <2 x i32> [[TMP258]], <2 x i32> [[TMP238]], <2 x i32> +; CHECK-NEXT: [[TMP260:%.*]] = add <2 x i32> [[TMP222]], [[TMP259]] +; CHECK-NEXT: [[TMP261:%.*]] = sub <2 x i32> [[TMP222]], [[TMP259]] +; CHECK-NEXT: [[TMP262:%.*]] = shufflevector <2 x i32> [[TMP260]], <2 x i32> [[TMP261]], <2 x i32> +; CHECK-NEXT: [[TMP263:%.*]] = add <2 x i32> [[TMP218]], [[TMP262]] +; CHECK-NEXT: [[TMP264:%.*]] = xor <2 x i32> [[TMP263]], [[TMP215]] +; CHECK-NEXT: [[TMP265:%.*]] = extractelement <2 x i32> [[TMP264]], i32 1 +; CHECK-NEXT: [[ADD113_1:%.*]] = add i32 [[ADD112_1]], [[TMP265]] +; CHECK-NEXT: [[TMP266:%.*]] = shufflevector <2 x i32> , <2 x i32> [[TMP222]], <2 x i32> +; CHECK-NEXT: [[TMP267:%.*]] = mul <2 x i32> [[TMP209]], [[TMP266]] +; CHECK-NEXT: [[TMP268:%.*]] = sub <2 x i32> [[TMP209]], [[TMP266]] +; CHECK-NEXT: [[TMP269:%.*]] = shufflevector <2 x i32> [[TMP267]], <2 x i32> [[TMP268]], <2 x i32> +; CHECK-NEXT: [[TMP270:%.*]] = add <2 x i32> [[TMP186]], [[TMP214]] +; CHECK-NEXT: [[TMP271:%.*]] = xor <2 x i32> [[TMP270]], [[TMP142]] +; CHECK-NEXT: [[TMP272:%.*]] = extractelement <2 x i32> [[TMP269]], i32 0 +; CHECK-NEXT: [[TMP273:%.*]] = extractelement <2 x i32> [[TMP269]], i32 1 +; CHECK-NEXT: [[ADD_I62_2:%.*]] = add i32 [[TMP272]], [[TMP273]] ; CHECK-NEXT: [[XOR_I63_2:%.*]] = xor i32 [[ADD_I62_2]], [[ADD44]] -; CHECK-NEXT: [[TMP275:%.*]] = extractelement <2 x i32> [[TMP265]], i32 0 -; CHECK-NEXT: [[ADD108_2:%.*]] = add i32 [[TMP275]], [[ADD113_1]] -; CHECK-NEXT: [[TMP276:%.*]] = extractelement <2 x i32> [[TMP272]], i32 0 -; CHECK-NEXT: [[ADD110_2:%.*]] = add i32 [[ADD108_2]], [[TMP276]] -; CHECK-NEXT: [[TMP277:%.*]] = extractelement <2 x i32> [[TMP272]], i32 1 -; CHECK-NEXT: [[ADD112_2:%.*]] = add i32 [[ADD110_2]], [[TMP277]] +; CHECK-NEXT: [[TMP274:%.*]] = extractelement <2 x i32> [[TMP264]], i32 0 +; CHECK-NEXT: [[ADD108_2:%.*]] = add i32 [[TMP274]], [[ADD113_1]] +; CHECK-NEXT: [[TMP275:%.*]] = extractelement <2 x i32> [[TMP271]], i32 0 +; CHECK-NEXT: [[ADD110_2:%.*]] = add i32 [[ADD108_2]], [[TMP275]] +; CHECK-NEXT: [[TMP276:%.*]] = extractelement <2 x i32> [[TMP271]], i32 1 +; CHECK-NEXT: [[ADD112_2:%.*]] = add i32 [[ADD110_2]], [[TMP276]] ; CHECK-NEXT: [[ADD113_2:%.*]] = add i32 [[ADD112_2]], [[XOR_I63_2]] -; CHECK-NEXT: [[TMP278:%.*]] = insertelement <2 x i32> poison, i32 [[SUB59]], i32 0 -; CHECK-NEXT: [[TMP279:%.*]] = shufflevector <2 x i32> [[TMP278]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP280:%.*]] = add <2 x i32> [[TMP279]], [[TMP189]] -; CHECK-NEXT: [[TMP281:%.*]] = sub <2 x i32> [[TMP279]], [[TMP189]] -; CHECK-NEXT: [[TMP282:%.*]] = shufflevector <2 x i32> [[TMP280]], <2 x i32> [[TMP281]], <2 x i32> -; CHECK-NEXT: [[TMP283:%.*]] = add <2 x i32> [[TMP105]], [[TMP282]] -; CHECK-NEXT: [[TMP284:%.*]] = sub <2 x i32> [[TMP282]], [[TMP105]] -; CHECK-NEXT: [[TMP285:%.*]] = add <2 x i32> [[TMP139]], [[TMP283]] -; CHECK-NEXT: [[TMP286:%.*]] = xor <2 x i32> [[TMP285]], [[TMP108]] -; CHECK-NEXT: [[TMP287:%.*]] = lshr <2 x i32> [[TMP98]], -; CHECK-NEXT: [[TMP288:%.*]] = and <2 x i32> [[TMP287]], -; CHECK-NEXT: [[TMP289:%.*]] = mul <2 x i32> [[TMP288]], -; CHECK-NEXT: [[TMP290:%.*]] = add <2 x i32> [[TMP289]], [[TMP284]] -; CHECK-NEXT: [[TMP291:%.*]] = xor <2 x i32> [[TMP290]], [[TMP98]] -; CHECK-NEXT: [[TMP292:%.*]] = extractelement <2 x i32> [[TMP286]], i32 1 -; CHECK-NEXT: [[ADD108_3:%.*]] = add i32 [[TMP292]], [[ADD113_2]] -; CHECK-NEXT: [[TMP293:%.*]] = extractelement <2 x i32> [[TMP286]], i32 0 -; CHECK-NEXT: [[ADD110_3:%.*]] = add i32 [[ADD108_3]], [[TMP293]] -; CHECK-NEXT: [[TMP294:%.*]] = extractelement <2 x i32> [[TMP291]], i32 0 -; CHECK-NEXT: [[ADD112_3:%.*]] = add i32 [[ADD110_3]], [[TMP294]] -; CHECK-NEXT: [[TMP295:%.*]] = extractelement <2 x i32> [[TMP291]], i32 1 -; CHECK-NEXT: [[ADD113_3:%.*]] = add i32 [[ADD112_3]], [[TMP295]] +; CHECK-NEXT: [[TMP277:%.*]] = insertelement <2 x i32> poison, i32 [[SUB59]], i32 0 +; CHECK-NEXT: [[TMP278:%.*]] = shufflevector <2 x i32> [[TMP277]], <2 x i32> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP279:%.*]] = add <2 x i32> [[TMP278]], [[TMP188]] +; CHECK-NEXT: [[TMP280:%.*]] = sub <2 x i32> [[TMP278]], [[TMP188]] +; CHECK-NEXT: [[TMP281:%.*]] = shufflevector <2 x i32> [[TMP279]], <2 x i32> [[TMP280]], <2 x i32> +; CHECK-NEXT: [[TMP282:%.*]] = add <2 x i32> [[TMP104]], [[TMP281]] +; CHECK-NEXT: [[TMP283:%.*]] = sub <2 x i32> [[TMP281]], [[TMP104]] +; CHECK-NEXT: [[TMP284:%.*]] = add <2 x i32> [[TMP138]], [[TMP282]] +; CHECK-NEXT: [[TMP285:%.*]] = xor <2 x i32> [[TMP284]], [[TMP107]] +; CHECK-NEXT: [[TMP286:%.*]] = lshr <2 x i32> [[TMP97]], +; CHECK-NEXT: [[TMP287:%.*]] = and <2 x i32> [[TMP286]], +; CHECK-NEXT: [[TMP288:%.*]] = mul <2 x i32> [[TMP287]], +; CHECK-NEXT: [[TMP289:%.*]] = add <2 x i32> [[TMP288]], [[TMP283]] +; CHECK-NEXT: [[TMP290:%.*]] = xor <2 x i32> [[TMP289]], [[TMP97]] +; CHECK-NEXT: [[TMP291:%.*]] = extractelement <2 x i32> [[TMP285]], i32 1 +; CHECK-NEXT: [[ADD108_3:%.*]] = add i32 [[TMP291]], [[ADD113_2]] +; CHECK-NEXT: [[TMP292:%.*]] = extractelement <2 x i32> [[TMP285]], i32 0 +; CHECK-NEXT: [[ADD110_3:%.*]] = add i32 [[ADD108_3]], [[TMP292]] +; CHECK-NEXT: [[TMP293:%.*]] = extractelement <2 x i32> [[TMP290]], i32 0 +; CHECK-NEXT: [[ADD112_3:%.*]] = add i32 [[ADD110_3]], [[TMP293]] +; CHECK-NEXT: [[TMP294:%.*]] = extractelement <2 x i32> [[TMP290]], i32 1 +; CHECK-NEXT: [[ADD113_3:%.*]] = add i32 [[ADD112_3]], [[TMP294]] ; CHECK-NEXT: ret i32 [[ADD113_3]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-vectorized.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-vectorized.ll index a4cc311d12a217..4b0b41970bbb4d 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-vectorized.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-vectorized.ll @@ -5,61 +5,12 @@ define void @test(ptr %p, ptr noalias %s) { ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x float], ptr [[P:%.*]], i64 0, i64 0 -; CHECK-NEXT: [[I:%.*]] = load float, ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 30 -; CHECK-NEXT: [[I1:%.*]] = load float, ptr [[ARRAYIDX1]], align 4 -; CHECK-NEXT: [[ADD:%.*]] = fsub fast float [[I1]], [[I]] ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[S:%.*]], i64 0 -; CHECK-NEXT: store float [[ADD]], ptr [[ARRAYIDX2]], align 4 -; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 4 -; CHECK-NEXT: [[I2:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 -; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 26 -; CHECK-NEXT: [[I3:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 -; CHECK-NEXT: [[ADD7:%.*]] = fsub fast float [[I3]], [[I2]] -; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[S]], i64 1 -; CHECK-NEXT: store float [[ADD7]], ptr [[ARRAYIDX9]], align 4 -; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 8 -; CHECK-NEXT: [[I4:%.*]] = load float, ptr [[ARRAYIDX11]], align 4 -; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 22 -; CHECK-NEXT: [[I5:%.*]] = load float, ptr [[ARRAYIDX13]], align 4 -; CHECK-NEXT: [[ADD14:%.*]] = fsub fast float [[I5]], [[I4]] -; CHECK-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds float, ptr [[S]], i64 2 -; CHECK-NEXT: store float [[ADD14]], ptr [[ARRAYIDX16]], align 4 -; CHECK-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 12 -; CHECK-NEXT: [[I6:%.*]] = load float, ptr [[ARRAYIDX18]], align 4 -; CHECK-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 18 -; CHECK-NEXT: [[I7:%.*]] = load float, ptr [[ARRAYIDX20]], align 4 -; CHECK-NEXT: [[ADD21:%.*]] = fsub fast float [[I7]], [[I6]] -; CHECK-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds float, ptr [[S]], i64 3 -; CHECK-NEXT: store float [[ADD21]], ptr [[ARRAYIDX23]], align 4 -; CHECK-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 16 -; CHECK-NEXT: [[I8:%.*]] = load float, ptr [[ARRAYIDX25]], align 4 -; CHECK-NEXT: [[ARRAYIDX27:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 14 -; CHECK-NEXT: [[I9:%.*]] = load float, ptr [[ARRAYIDX27]], align 4 -; CHECK-NEXT: [[ADD28:%.*]] = fsub fast float [[I9]], [[I8]] -; CHECK-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds float, ptr [[S]], i64 4 -; CHECK-NEXT: store float [[ADD28]], ptr [[ARRAYIDX30]], align 4 -; CHECK-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 20 -; CHECK-NEXT: [[I10:%.*]] = load float, ptr [[ARRAYIDX32]], align 4 -; CHECK-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 10 -; CHECK-NEXT: [[I11:%.*]] = load float, ptr [[ARRAYIDX34]], align 4 -; CHECK-NEXT: [[ADD35:%.*]] = fsub fast float [[I11]], [[I10]] -; CHECK-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds float, ptr [[S]], i64 5 -; CHECK-NEXT: store float [[ADD35]], ptr [[ARRAYIDX37]], align 4 -; CHECK-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 24 -; CHECK-NEXT: [[I12:%.*]] = load float, ptr [[ARRAYIDX39]], align 4 -; CHECK-NEXT: [[ARRAYIDX41:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 6 -; CHECK-NEXT: [[I13:%.*]] = load float, ptr [[ARRAYIDX41]], align 4 -; CHECK-NEXT: [[ADD42:%.*]] = fsub fast float [[I13]], [[I12]] -; CHECK-NEXT: [[ARRAYIDX44:%.*]] = getelementptr inbounds float, ptr [[S]], i64 6 -; CHECK-NEXT: store float [[ADD42]], ptr [[ARRAYIDX44]], align 4 -; CHECK-NEXT: [[ARRAYIDX46:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 28 -; CHECK-NEXT: [[I14:%.*]] = load float, ptr [[ARRAYIDX46]], align 4 -; CHECK-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 2 -; CHECK-NEXT: [[I15:%.*]] = load float, ptr [[ARRAYIDX48]], align 4 -; CHECK-NEXT: [[ADD49:%.*]] = fsub fast float [[I15]], [[I14]] -; CHECK-NEXT: [[ARRAYIDX51:%.*]] = getelementptr inbounds float, ptr [[S]], i64 7 -; CHECK-NEXT: store float [[ADD49]], ptr [[ARRAYIDX51]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX]], i64 16, <8 x i1> , i32 8) +; CHECK-NEXT: [[TMP1:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX1]], i64 -16, <8 x i1> , i32 8) +; CHECK-NEXT: [[TMP2:%.*]] = fsub fast <8 x float> [[TMP1]], [[TMP0]] +; CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[ARRAYIDX2]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -262,67 +213,40 @@ define void @test2(ptr %p, ptr noalias %s, i32 %stride) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[STR:%.*]] = zext i32 [[STRIDE:%.*]] to i64 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x float], ptr [[P:%.*]], i64 0, i64 2 -; CHECK-NEXT: [[I:%.*]] = load float, ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[ST6:%.*]] = mul i64 [[STR]], 7 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST6]] ; CHECK-NEXT: [[I1:%.*]] = load float, ptr [[ARRAYIDX1]], align 4 -; CHECK-NEXT: [[ADD:%.*]] = fsub fast float [[I1]], [[I]] ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[S:%.*]], i64 0 -; CHECK-NEXT: store float [[ADD]], ptr [[ARRAYIDX2]], align 4 -; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 6 -; CHECK-NEXT: [[I2:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 ; CHECK-NEXT: [[ST5:%.*]] = mul i64 [[STR]], 6 ; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST5]] ; CHECK-NEXT: [[I3:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 -; CHECK-NEXT: [[ADD7:%.*]] = fsub fast float [[I3]], [[I2]] -; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[S]], i64 1 -; CHECK-NEXT: store float [[ADD7]], ptr [[ARRAYIDX9]], align 4 -; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 10 -; CHECK-NEXT: [[I4:%.*]] = load float, ptr [[ARRAYIDX11]], align 4 ; CHECK-NEXT: [[ST4:%.*]] = mul i64 [[STR]], 5 ; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST4]] ; CHECK-NEXT: [[I5:%.*]] = load float, ptr [[ARRAYIDX13]], align 4 -; CHECK-NEXT: [[ADD14:%.*]] = fsub fast float [[I5]], [[I4]] -; CHECK-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds float, ptr [[S]], i64 2 -; CHECK-NEXT: store float [[ADD14]], ptr [[ARRAYIDX16]], align 4 -; CHECK-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 14 -; CHECK-NEXT: [[I6:%.*]] = load float, ptr [[ARRAYIDX18]], align 4 ; CHECK-NEXT: [[ST3:%.*]] = mul i64 [[STR]], 4 ; CHECK-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST3]] ; CHECK-NEXT: [[I7:%.*]] = load float, ptr [[ARRAYIDX20]], align 4 -; CHECK-NEXT: [[ADD21:%.*]] = fsub fast float [[I7]], [[I6]] -; CHECK-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds float, ptr [[S]], i64 3 -; CHECK-NEXT: store float [[ADD21]], ptr [[ARRAYIDX23]], align 4 -; CHECK-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 18 ; CHECK-NEXT: [[ST2:%.*]] = mul i64 [[STR]], 3 -; CHECK-NEXT: [[I8:%.*]] = load float, ptr [[ARRAYIDX25]], align 4 ; CHECK-NEXT: [[ARRAYIDX27:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST2]] ; CHECK-NEXT: [[I9:%.*]] = load float, ptr [[ARRAYIDX27]], align 4 -; CHECK-NEXT: [[ADD28:%.*]] = fsub fast float [[I9]], [[I8]] -; CHECK-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds float, ptr [[S]], i64 4 -; CHECK-NEXT: store float [[ADD28]], ptr [[ARRAYIDX30]], align 4 -; CHECK-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 22 -; CHECK-NEXT: [[I10:%.*]] = load float, ptr [[ARRAYIDX32]], align 4 ; CHECK-NEXT: [[ST1:%.*]] = mul i64 [[STR]], 2 ; CHECK-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST1]] ; CHECK-NEXT: [[I11:%.*]] = load float, ptr [[ARRAYIDX34]], align 4 -; CHECK-NEXT: [[ADD35:%.*]] = fsub fast float [[I11]], [[I10]] -; CHECK-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds float, ptr [[S]], i64 5 -; CHECK-NEXT: store float [[ADD35]], ptr [[ARRAYIDX37]], align 4 -; CHECK-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 26 -; CHECK-NEXT: [[I12:%.*]] = load float, ptr [[ARRAYIDX39]], align 4 ; CHECK-NEXT: [[ARRAYIDX41:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[STR]] ; CHECK-NEXT: [[I13:%.*]] = load float, ptr [[ARRAYIDX41]], align 4 -; CHECK-NEXT: [[ADD42:%.*]] = fsub fast float [[I13]], [[I12]] -; CHECK-NEXT: [[ARRAYIDX44:%.*]] = getelementptr inbounds float, ptr [[S]], i64 6 -; CHECK-NEXT: store float [[ADD42]], ptr [[ARRAYIDX44]], align 4 -; CHECK-NEXT: [[ARRAYIDX46:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 30 -; CHECK-NEXT: [[I14:%.*]] = load float, ptr [[ARRAYIDX46]], align 4 ; CHECK-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 0 ; CHECK-NEXT: [[I15:%.*]] = load float, ptr [[ARRAYIDX48]], align 4 -; CHECK-NEXT: [[ADD49:%.*]] = fsub fast float [[I15]], [[I14]] -; CHECK-NEXT: [[ARRAYIDX51:%.*]] = getelementptr inbounds float, ptr [[S]], i64 7 -; CHECK-NEXT: store float [[ADD49]], ptr [[ARRAYIDX51]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX]], i64 16, <8 x i1> , i32 8) +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x float> poison, float [[I1]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x float> [[TMP1]], float [[I3]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x float> [[TMP2]], float [[I5]], i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x float> [[TMP3]], float [[I7]], i32 3 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x float> [[TMP4]], float [[I9]], i32 4 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x float> [[TMP5]], float [[I11]], i32 5 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x float> [[TMP6]], float [[I13]], i32 6 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x float> [[TMP7]], float [[I15]], i32 7 +; CHECK-NEXT: [[TMP9:%.*]] = fsub fast <8 x float> [[TMP8]], [[TMP0]] +; CHECK-NEXT: store <8 x float> [[TMP9]], ptr [[ARRAYIDX2]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -397,27 +321,12 @@ define void @test3(ptr %p, ptr noalias %s) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x float], ptr [[P:%.*]], i64 0, i64 0 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[S:%.*]], i64 0 -; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 4 -; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 8 -; CHECK-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 12 -; CHECK-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 16 -; CHECK-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 20 -; CHECK-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 24 -; CHECK-NEXT: [[ARRAYIDX46:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 28 ; CHECK-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 23 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x ptr> poison, ptr [[ARRAYIDX]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x ptr> [[TMP0]], ptr [[ARRAYIDX4]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x ptr> [[TMP1]], ptr [[ARRAYIDX11]], i32 2 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x ptr> [[TMP2]], ptr [[ARRAYIDX18]], i32 3 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x ptr> [[TMP3]], ptr [[ARRAYIDX25]], i32 4 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x ptr> [[TMP4]], ptr [[ARRAYIDX32]], i32 5 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x ptr> [[TMP5]], ptr [[ARRAYIDX39]], i32 6 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x ptr> [[TMP6]], ptr [[ARRAYIDX46]], i32 7 -; CHECK-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP7]], i32 4, <8 x i1> , <8 x float> poison) -; CHECK-NEXT: [[TMP9:%.*]] = load <8 x float>, ptr [[ARRAYIDX48]], align 4 -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x float> [[TMP9]], <8 x float> poison, <8 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = fsub fast <8 x float> [[TMP10]], [[TMP8]] -; CHECK-NEXT: store <8 x float> [[TMP11]], ptr [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX]], i64 16, <8 x i1> , i32 8) +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, ptr [[ARRAYIDX48]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = fsub fast <8 x float> [[TMP2]], [[TMP0]] +; CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[ARRAYIDX2]], align 4 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-with-external-use-ptr.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-with-external-use-ptr.ll index 5aba9ea115a4b9..ec152c707eec6b 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-with-external-use-ptr.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-with-external-use-ptr.ll @@ -8,7 +8,7 @@ define i16 @test() { ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[PPREV_058_I:%.*]] = getelementptr [[S:%.*]], ptr null, i64 -1 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x ptr> , ptr [[PPREV_058_I]], i32 0 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x ptr> , ptr [[PPREV_058_I]], i32 1 ; CHECK-NEXT: br label [[WHILE_BODY_I:%.*]] ; CHECK: while.body.i: ; CHECK-NEXT: [[TMP1:%.*]] = phi i16 [ 0, [[WHILE_BODY_I]] ], [ 0, [[ENTRY:%.*]] ] @@ -17,7 +17,7 @@ define i16 @test() { ; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> [[TMP3]], i32 2, <2 x i1> , <2 x i16> poison) ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i16> [[TMP4]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i16> [[TMP4]], i32 1 -; CHECK-NEXT: [[CMP_I178:%.*]] = icmp ult i16 [[TMP6]], [[TMP5]] +; CHECK-NEXT: [[CMP_I178:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]] ; CHECK-NEXT: br label [[WHILE_BODY_I]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads.ll index 8f2c72bb4c6856..8ab57cc73e646f 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads.ll @@ -5,14 +5,11 @@ define i32 @sum_of_abs(ptr noalias %a, ptr noalias %b) { ; CHECK-LABEL: define i32 @sum_of_abs ; CHECK-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x ptr> poison, ptr [[A]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x ptr> [[TMP0]], <8 x ptr> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, <8 x ptr> [[TMP1]], <8 x i64> -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> [[TMP2]], i32 1, <8 x i1> , <8 x i8> poison) -; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i8> @llvm.abs.v8i8(<8 x i8> [[TMP3]], i1 false) -; CHECK-NEXT: [[TMP5:%.*]] = sext <8 x i8> [[TMP4]] to <8 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP5]]) -; CHECK-NEXT: ret i32 [[TMP6]] +; CHECK-NEXT: [[TMP0:%.*]] = call <8 x i8> @llvm.experimental.vp.strided.load.v8i8.p0.i64(ptr align 1 [[A]], i64 64, <8 x i1> , i32 8) +; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i8> @llvm.abs.v8i8(<8 x i8> [[TMP0]], i1 false) +; CHECK-NEXT: [[TMP2:%.*]] = sext <8 x i8> [[TMP1]] to <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]]) +; CHECK-NEXT: ret i32 [[TMP3]] ; entry: %0 = load i8, ptr %a, align 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gep-nodes-with-non-gep-inst.ll b/llvm/test/Transforms/SLPVectorizer/X86/gep-nodes-with-non-gep-inst.ll index 96d4c307f1c67f..9e43cefef2801d 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/gep-nodes-with-non-gep-inst.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/gep-nodes-with-non-gep-inst.ll @@ -30,7 +30,7 @@ define void @test() { ; CHECK-SLP-THRESHOLD: bb: ; CHECK-SLP-THRESHOLD-NEXT: [[TMP0:%.*]] = insertelement <4 x ptr> poison, ptr [[COND_IN_V]], i32 0 ; CHECK-SLP-THRESHOLD-NEXT: [[TMP1:%.*]] = shufflevector <4 x ptr> [[TMP0]], <4 x ptr> poison, <4 x i32> zeroinitializer -; CHECK-SLP-THRESHOLD-NEXT: [[TMP2:%.*]] = getelementptr i64, <4 x ptr> [[TMP1]], <4 x i64> +; CHECK-SLP-THRESHOLD-NEXT: [[TMP2:%.*]] = getelementptr i64, <4 x ptr> [[TMP1]], <4 x i64> ; CHECK-SLP-THRESHOLD-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[TMP2]], i32 8, <4 x i1> , <4 x i64> poison) ; CHECK-SLP-THRESHOLD-NEXT: [[TMP4:%.*]] = icmp eq <4 x i64> [[TMP3]], zeroinitializer ; CHECK-SLP-THRESHOLD-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/remark_gather-load-redux-cost.ll b/llvm/test/Transforms/SLPVectorizer/X86/remark_gather-load-redux-cost.ll index 1add732d32e85c..3bc6e64606e399 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/remark_gather-load-redux-cost.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/remark_gather-load-redux-cost.ll @@ -7,7 +7,7 @@ define i32 @test(ptr noalias %p, ptr noalias %addr) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x ptr> poison, ptr [[ADDR:%.*]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x ptr> [[TMP0]], <8 x ptr> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, <8 x ptr> [[TMP1]], <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, <8 x ptr> [[TMP1]], <8 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP2]], i32 8, <8 x i1> , <8 x i32> poison) ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x ptr> poison, ptr [[P:%.*]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x ptr> [[TMP4]], <8 x ptr> poison, <8 x i32> zeroinitializer From 45260bf23b802047ab4fd888b8bf2b32e4c5eb69 Mon Sep 17 00:00:00 2001 From: Petr Date: Mon, 12 Feb 2024 13:50:00 +0100 Subject: [PATCH 05/59] Fix use after free error in NVVMReflect (#81471) I have a Triton kernel, which triggered a heap-use-after-free error in LLVM. The problem was that the same instruction may be added to the `ToSimplify` array multiple times. If this duplicate instruction is trivially dead, it gets deleted on the first pass. Then, on the second pass, the freed instruction is passed. To fix this, I'm adding the instructions to the `ToRemove` array and filter it out for duplicates to avoid possible double frees. --- llvm/lib/Target/NVPTX/NVVMReflect.cpp | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/NVPTX/NVVMReflect.cpp b/llvm/lib/Target/NVPTX/NVVMReflect.cpp index 64fedf32e9a269..29c95e4226bf40 100644 --- a/llvm/lib/Target/NVPTX/NVVMReflect.cpp +++ b/llvm/lib/Target/NVPTX/NVVMReflect.cpp @@ -39,6 +39,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" +#include #include #include #define NVVM_REFLECT_FUNCTION "__nvvm_reflect" @@ -185,9 +186,6 @@ static bool runNVVMReflect(Function &F, unsigned SmVersion) { ToRemove.push_back(Call); } - for (Instruction *I : ToRemove) - I->eraseFromParent(); - // The code guarded by __nvvm_reflect may be invalid for the target machine. // Traverse the use-def chain, continually simplifying constant expressions // until we find a terminator that we can then remove. @@ -200,13 +198,23 @@ static bool runNVVMReflect(Function &F, unsigned SmVersion) { ToSimplify.push_back(I); I->replaceAllUsesWith(C); - if (isInstructionTriviallyDead(I)) - I->eraseFromParent(); + if (isInstructionTriviallyDead(I)) { + ToRemove.push_back(I); + } } else if (I->isTerminator()) { ConstantFoldTerminator(I->getParent()); } } + // Removing via isInstructionTriviallyDead may add duplicates to the ToRemove + // array. Filter out the duplicates before starting to erase from parent. + std::sort(ToRemove.begin(), ToRemove.end()); + auto NewLastIter = std::unique(ToRemove.begin(), ToRemove.end()); + ToRemove.erase(NewLastIter, ToRemove.end()); + + for (Instruction *I : ToRemove) + I->eraseFromParent(); + return ToRemove.size() > 0; } From 2ac8e6b7f5b8f495f496a55512e5ea452beb0bca Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Mon, 12 Feb 2024 07:07:48 -0600 Subject: [PATCH 06/59] [NVPTX] Implement `__builtin_readcyclecounter` on NVPTX (#81344) Summary: This patch simply states that `__builtin_readcyclecounter` is legal on NVPTX and makes it return the value from the `clock64` sreg. The timer intrinsics are marked as having side effects, which is desireable for timing primitives and required to pattern match the instrinic DAG. --- llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 2 ++ llvm/lib/Target/NVPTX/NVPTXIntrinsics.td | 4 ++++ llvm/test/CodeGen/NVPTX/intrinsics.ll | 12 ++++++++++++ 3 files changed, 18 insertions(+) diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index 5c24f00dbca0e2..80a67ca8e368c6 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -489,6 +489,8 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f16, Expand); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f16, Expand); + setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal); + setFP16OperationAction(ISD::SETCC, MVT::f16, Legal, Promote); setFP16OperationAction(ISD::SETCC, MVT::v2f16, Legal, Expand); diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index 2330d7213c26dc..133e2827008a87 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -6372,12 +6372,16 @@ def INT_PTX_SREG_LANEMASK_GE : def INT_PTX_SREG_LANEMASK_GT : PTX_READ_SREG_R32<"lanemask_gt", int_nvvm_read_ptx_sreg_lanemask_gt>; +let hasSideEffects = 1 in { def INT_PTX_SREG_CLOCK : PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>; def INT_PTX_SREG_CLOCK64 : PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>; def INT_PTX_SREG_GLOBALTIMER : PTX_READ_SREG_R64<"globaltimer", int_nvvm_read_ptx_sreg_globaltimer>; +} + +def: Pat <(i64 (readcyclecounter)), (INT_PTX_SREG_CLOCK64)>; def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>; def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>; diff --git a/llvm/test/CodeGen/NVPTX/intrinsics.ll b/llvm/test/CodeGen/NVPTX/intrinsics.ll index 2994f600d45c71..d84ee6754281e5 100644 --- a/llvm/test/CodeGen/NVPTX/intrinsics.ll +++ b/llvm/test/CodeGen/NVPTX/intrinsics.ll @@ -151,6 +151,17 @@ define i64 @test_globaltimer() { ret i64 %ret } +; CHECK-LABEL: test_cyclecounter +define i64 @test_cyclecounter() { +; CHECK: mov.u64 %r{{.*}}, %clock64; + %a = tail call i64 @llvm.readcyclecounter() +; CHECK: mov.u64 %r{{.*}}, %clock64; + %b = tail call i64 @llvm.readcyclecounter() + %ret = add i64 %a, %b +; CHECK: ret + ret i64 %ret +} + declare float @llvm.fabs.f32(float) declare double @llvm.fabs.f64(double) declare float @llvm.nvvm.sqrt.f(float) @@ -166,3 +177,4 @@ declare i32 @llvm.nvvm.read.ptx.sreg.clock() declare i64 @llvm.nvvm.read.ptx.sreg.clock64() declare void @llvm.nvvm.exit() declare i64 @llvm.nvvm.read.ptx.sreg.globaltimer() +declare i64 @llvm.readcyclecounter() From d1c481d27db07acb6fa68a244bb110bd7ad57a05 Mon Sep 17 00:00:00 2001 From: Antonio Frighetto Date: Fri, 29 Dec 2023 00:21:12 +0100 Subject: [PATCH 07/59] [CGP] Precommit tests for PR76613 (NFC) --- llvm/test/CodeGen/X86/tailcall-cgp-dup.ll | 193 ++++++++++++++++++++++ 1 file changed, 193 insertions(+) diff --git a/llvm/test/CodeGen/X86/tailcall-cgp-dup.ll b/llvm/test/CodeGen/X86/tailcall-cgp-dup.ll index 75bbae1050d61c..c48087da500e0e 100644 --- a/llvm/test/CodeGen/X86/tailcall-cgp-dup.ll +++ b/llvm/test/CodeGen/X86/tailcall-cgp-dup.ll @@ -184,3 +184,196 @@ return: %retval = phi ptr [ %ptr, %if.then ], [ %obj, %entry ] ret ptr %retval } + +define ptr @memset_tailc(ptr %ret_val, i64 %sz) nounwind { +; CHECK-LABEL: memset_tailc: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: testq %rdi, %rdi +; CHECK-NEXT: je LBB4_2 +; CHECK-NEXT: ## %bb.1: ## %if.then +; CHECK-NEXT: movq %rsi, %rdx +; CHECK-NEXT: movq %rbx, %rdi +; CHECK-NEXT: xorl %esi, %esi +; CHECK-NEXT: callq _memset +; CHECK-NEXT: LBB4_2: ## %return +; CHECK-NEXT: movq %rbx, %rax +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: retq +entry: + %cmp = icmp eq ptr %ret_val, null + br i1 %cmp, label %return, label %if.then + +if.then: + tail call void @llvm.memset.p0.i64(ptr nonnull align 1 %ret_val, i8 0, i64 %sz, i1 false) + br label %return + +return: + ret ptr %ret_val +} + +define ptr @memcpy_tailc(ptr %ret_val, i64 %sz, ptr %src) nounwind { +; CHECK-LABEL: memcpy_tailc: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: testq %rsi, %rsi +; CHECK-NEXT: je LBB5_1 +; CHECK-NEXT: ## %bb.2: ## %if.then +; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: movq %rdx, %rsi +; CHECK-NEXT: movq %rax, %rdx +; CHECK-NEXT: callq _memcpy +; CHECK-NEXT: jmp LBB5_3 +; CHECK-NEXT: LBB5_1: +; CHECK-NEXT: movq %rdx, %rbx +; CHECK-NEXT: LBB5_3: ## %return +; CHECK-NEXT: movq %rbx, %rax +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: retq +entry: + %cmp = icmp eq i64 %sz, 0 + br i1 %cmp, label %return, label %if.then + +if.then: + tail call void @llvm.memcpy.p0.p0.i64(ptr align 1 %ret_val, ptr align 1 %src, i64 %sz, i1 false) + br label %return + +return: + %phi = phi ptr [ %ret_val, %if.then ], [ %src, %entry ] + ret ptr %phi +} + +define ptr @strcpy_legal_and_baz_illegal(ptr %arg, i64 %sz, ptr %2) nounwind { +; CHECK-LABEL: strcpy_legal_and_baz_illegal: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: pushq %r15 +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: movq %rdx, %r14 +; CHECK-NEXT: movq %rsi, %r15 +; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: movq %rsi, %rdi +; CHECK-NEXT: callq _malloc +; CHECK-NEXT: testq %r15, %r15 +; CHECK-NEXT: je LBB6_2 +; CHECK-NEXT: ## %bb.1: ## %if.then +; CHECK-NEXT: movq %rax, %rdi +; CHECK-NEXT: movq %r14, %rsi +; CHECK-NEXT: movq %rax, %rbx +; CHECK-NEXT: callq _strcpy +; CHECK-NEXT: jmp LBB6_3 +; CHECK-NEXT: LBB6_2: ## %if.else +; CHECK-NEXT: movq %rbx, %rdi +; CHECK-NEXT: movq %r14, %rsi +; CHECK-NEXT: callq _baz +; CHECK-NEXT: LBB6_3: ## %return +; CHECK-NEXT: movq %rbx, %rax +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: popq %r15 +; CHECK-NEXT: retq +entry: + %strcpy_ret_val = tail call noalias ptr @malloc(i64 %sz) + %cmp = icmp eq i64 %sz, 0 + br i1 %cmp, label %if.else, label %if.then + +if.then: + %rv_unused = tail call ptr @strcpy(ptr dereferenceable(1) %strcpy_ret_val, ptr dereferenceable(1) %2) + br label %return + +if.else: + %rv_unused_2 = tail call ptr @baz(ptr %arg, ptr %2) + br label %return + +return: + %phi = phi ptr [ %strcpy_ret_val, %if.then ], [ %arg, %if.else ] + ret ptr %phi +} + +define ptr @baz_illegal_tailc(ptr %ret_val, ptr %arg) nounwind { +; CHECK-LABEL: baz_illegal_tailc: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: testq %rdi, %rdi +; CHECK-NEXT: je LBB7_2 +; CHECK-NEXT: ## %bb.1: ## %if.then +; CHECK-NEXT: movq %rbx, %rdi +; CHECK-NEXT: callq _baz +; CHECK-NEXT: LBB7_2: ## %return +; CHECK-NEXT: movq %rbx, %rax +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: retq +entry: + %cmp = icmp eq ptr %ret_val, null + br i1 %cmp, label %return, label %if.then + +if.then: + %rv = tail call ptr @baz(ptr %ret_val, ptr %arg) + br label %return + +return: + ret ptr %ret_val +} + +define ptr @memset_illegal_tailc(ptr %arg, i64 %sz, ptr %ret_val_1, ptr %ret_val_2) nounwind { +; CHECK-LABEL: memset_illegal_tailc: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: testq %rsi, %rsi +; CHECK-NEXT: je LBB8_2 +; CHECK-NEXT: ## %bb.1: ## %if.then +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: movq %rcx, %rbx +; CHECK-NEXT: movq %rsi, %rdx +; CHECK-NEXT: xorl %esi, %esi +; CHECK-NEXT: callq _memset +; CHECK-NEXT: movq %rbx, %rax +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: LBB8_2: ## %return +; CHECK-NEXT: retq +entry: + %cmp = icmp eq i64 %sz, 0 + br i1 %cmp, label %return, label %if.then + +if.then: + tail call void @llvm.memset.p0.i64(ptr align 1 %arg, i8 0, i64 %sz, i1 false) + br label %return + +return: + %phi = phi ptr [ %ret_val_2, %if.then ], [ %ret_val_1, %entry ] + ret ptr %phi +} + +define ptr @strcpy_illegal_tailc(ptr %dest, i64 %sz, ptr readonly returned %src) nounwind { +; CHECK-LABEL: strcpy_illegal_tailc: +; CHECK: ## %bb.0: +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: movq %rdx, %rbx +; CHECK-NEXT: testq %rsi, %rsi +; CHECK-NEXT: je LBB9_2 +; CHECK-NEXT: ## %bb.1: ## %if.then +; CHECK-NEXT: movq %rbx, %rsi +; CHECK-NEXT: callq _strcpy +; CHECK-NEXT: LBB9_2: ## %return +; CHECK-NEXT: movq %rbx, %rax +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: retq + %cmp = icmp eq i64 %sz, 0 + br i1 %cmp, label %return, label %if.then + +if.then: + %6 = tail call ptr @strcpy(ptr dereferenceable(1) %dest, ptr dereferenceable(1) %src) + br label %return + +return: + ret ptr %src +} + +declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1) +declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1) +declare noalias ptr @malloc(i64) +declare ptr @strcpy(ptr noalias returned writeonly, ptr noalias nocapture readonly) +declare ptr @baz(ptr, ptr) From 8373ceef8f2ee377d6daf884e2f3ea11408a7fe2 Mon Sep 17 00:00:00 2001 From: Antonio Frighetto Date: Fri, 29 Dec 2023 00:25:23 +0100 Subject: [PATCH 08/59] [CGP] Extend `dupRetToEnableTailCallOpts` to known intrinsics Hint further tail call optimization opportunities when the examined returned value is the return value of a known intrinsic or library function, and it appears as first function argument. Fixes: https://github.com/llvm/llvm-project/issues/75455. --- llvm/lib/CodeGen/CodeGenPrepare.cpp | 70 +++++++++++++++++++++-- llvm/test/CodeGen/X86/tailcall-cgp-dup.ll | 52 +++++++---------- 2 files changed, 85 insertions(+), 37 deletions(-) diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 09c4922d8822cc..32a25b49b4e4b0 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -2522,8 +2522,40 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) { return false; } +static bool isIntrinsicOrLFToBeTailCalled(const TargetLibraryInfo *TLInfo, + const CallInst *CI) { + assert(CI && CI->use_empty()); + + if (const auto *II = dyn_cast(CI)) + switch (II->getIntrinsicID()) { + case Intrinsic::memset: + case Intrinsic::memcpy: + case Intrinsic::memmove: + return true; + default: + return false; + } + + LibFunc LF; + Function *Callee = CI->getCalledFunction(); + if (Callee && TLInfo && TLInfo->getLibFunc(*Callee, LF)) + switch (LF) { + case LibFunc_strcpy: + case LibFunc_strncpy: + case LibFunc_strcat: + case LibFunc_strncat: + return true; + default: + return false; + } + + return false; +} + /// Look for opportunities to duplicate return instructions to the predecessor -/// to enable tail call optimizations. The case it is currently looking for is: +/// to enable tail call optimizations. The case it is currently looking for is +/// the following one. Known intrinsics or library function that may be tail +/// called are taken into account as well. /// @code /// bb0: /// %tmp0 = tail call i32 @f0() @@ -2580,8 +2612,6 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, } PN = dyn_cast(V); - if (!PN) - return false; } if (PN && PN->getParent() != BB) @@ -2620,8 +2650,30 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, // Make sure the phi value is indeed produced by the tail call. if (CI && CI->hasOneUse() && CI->getParent() == PredBB && TLI->mayBeEmittedAsTailCall(CI) && - attributesPermitTailCall(F, CI, RetI, *TLI)) + attributesPermitTailCall(F, CI, RetI, *TLI)) { TailCallBBs.push_back(PredBB); + } else { + // Consider the cases in which the phi value is indirectly produced by + // the tail call, for example when encountering memset(), memmove(), + // strcpy(), whose return value may have been optimized out. In such + // cases, the value needs to be the first function argument. + // + // bb0: + // tail call void @llvm.memset.p0.i64(ptr %0, i8 0, i64 %1) + // br label %return + // return: + // %phi = phi ptr [ %0, %bb0 ], [ %2, %entry ] + if (PredBB && PredBB->getSingleSuccessor() == BB) + CI = dyn_cast_or_null( + PredBB->getTerminator()->getPrevNonDebugInstruction(true)); + + if (CI && CI->use_empty() && + isIntrinsicOrLFToBeTailCalled(TLInfo, CI) && + IncomingVal == CI->getArgOperand(0) && + TLI->mayBeEmittedAsTailCall(CI) && + attributesPermitTailCall(F, CI, RetI, *TLI)) + TailCallBBs.push_back(PredBB); + } } } else { SmallPtrSet VisitedBBs; @@ -2631,8 +2683,14 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, if (Instruction *I = Pred->rbegin()->getPrevNonDebugInstruction(true)) { CallInst *CI = dyn_cast(I); if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) && - attributesPermitTailCall(F, CI, RetI, *TLI)) - TailCallBBs.push_back(Pred); + attributesPermitTailCall(F, CI, RetI, *TLI)) { + // Either we return void or the return value must be the first + // argument of a known intrinsic or library function. + if (!V || (isIntrinsicOrLFToBeTailCalled(TLInfo, CI) && + V == CI->getArgOperand(0))) { + TailCallBBs.push_back(Pred); + } + } } } } diff --git a/llvm/test/CodeGen/X86/tailcall-cgp-dup.ll b/llvm/test/CodeGen/X86/tailcall-cgp-dup.ll index c48087da500e0e..401ed9f7bc5a9e 100644 --- a/llvm/test/CodeGen/X86/tailcall-cgp-dup.ll +++ b/llvm/test/CodeGen/X86/tailcall-cgp-dup.ll @@ -188,18 +188,14 @@ return: define ptr @memset_tailc(ptr %ret_val, i64 %sz) nounwind { ; CHECK-LABEL: memset_tailc: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: movq %rdi, %rbx ; CHECK-NEXT: testq %rdi, %rdi -; CHECK-NEXT: je LBB4_2 -; CHECK-NEXT: ## %bb.1: ## %if.then +; CHECK-NEXT: je LBB4_1 +; CHECK-NEXT: ## %bb.2: ## %if.then ; CHECK-NEXT: movq %rsi, %rdx -; CHECK-NEXT: movq %rbx, %rdi ; CHECK-NEXT: xorl %esi, %esi -; CHECK-NEXT: callq _memset -; CHECK-NEXT: LBB4_2: ## %return -; CHECK-NEXT: movq %rbx, %rax -; CHECK-NEXT: popq %rbx +; CHECK-NEXT: jmp _memset ## TAILCALL +; CHECK-NEXT: LBB4_1: ## %return +; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: retq entry: %cmp = icmp eq ptr %ret_val, null @@ -216,21 +212,15 @@ return: define ptr @memcpy_tailc(ptr %ret_val, i64 %sz, ptr %src) nounwind { ; CHECK-LABEL: memcpy_tailc: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: testq %rsi, %rsi ; CHECK-NEXT: je LBB5_1 ; CHECK-NEXT: ## %bb.2: ## %if.then ; CHECK-NEXT: movq %rsi, %rax -; CHECK-NEXT: movq %rdi, %rbx ; CHECK-NEXT: movq %rdx, %rsi ; CHECK-NEXT: movq %rax, %rdx -; CHECK-NEXT: callq _memcpy -; CHECK-NEXT: jmp LBB5_3 -; CHECK-NEXT: LBB5_1: -; CHECK-NEXT: movq %rdx, %rbx -; CHECK-NEXT: LBB5_3: ## %return -; CHECK-NEXT: movq %rbx, %rax -; CHECK-NEXT: popq %rbx +; CHECK-NEXT: jmp _memcpy ## TAILCALL +; CHECK-NEXT: LBB5_1: ## %return +; CHECK-NEXT: movq %rdx, %rax ; CHECK-NEXT: retq entry: %cmp = icmp eq i64 %sz, 0 @@ -251,25 +241,25 @@ define ptr @strcpy_legal_and_baz_illegal(ptr %arg, i64 %sz, ptr %2) nounwind { ; CHECK-NEXT: pushq %r15 ; CHECK-NEXT: pushq %r14 ; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: movq %rdx, %r14 +; CHECK-NEXT: movq %rdx, %rbx ; CHECK-NEXT: movq %rsi, %r15 -; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: movq %rdi, %r14 ; CHECK-NEXT: movq %rsi, %rdi ; CHECK-NEXT: callq _malloc ; CHECK-NEXT: testq %r15, %r15 -; CHECK-NEXT: je LBB6_2 -; CHECK-NEXT: ## %bb.1: ## %if.then +; CHECK-NEXT: je LBB6_1 +; CHECK-NEXT: ## %bb.2: ## %if.then ; CHECK-NEXT: movq %rax, %rdi -; CHECK-NEXT: movq %r14, %rsi -; CHECK-NEXT: movq %rax, %rbx -; CHECK-NEXT: callq _strcpy -; CHECK-NEXT: jmp LBB6_3 -; CHECK-NEXT: LBB6_2: ## %if.else -; CHECK-NEXT: movq %rbx, %rdi -; CHECK-NEXT: movq %r14, %rsi +; CHECK-NEXT: movq %rbx, %rsi +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: popq %r15 +; CHECK-NEXT: jmp _strcpy ## TAILCALL +; CHECK-NEXT: LBB6_1: ## %if.else +; CHECK-NEXT: movq %r14, %rdi +; CHECK-NEXT: movq %rbx, %rsi ; CHECK-NEXT: callq _baz -; CHECK-NEXT: LBB6_3: ## %return -; CHECK-NEXT: movq %rbx, %rax +; CHECK-NEXT: movq %r14, %rax ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: popq %r14 ; CHECK-NEXT: popq %r15 From afa413a132c0959295df36c28814ee83948e4931 Mon Sep 17 00:00:00 2001 From: Stephen Tozer Date: Mon, 12 Feb 2024 13:50:37 +0000 Subject: [PATCH 09/59] [RemoveDIs][DebugInfo] Correctly visit DPValues in StackInfoBuilder::visit (#81247) In `StackInfoBuilder::visit(Instruction &Inst)`, operations are performed on memory-related instructions, including debug intrinsics that refer to "interesting" allocas. There is a block that also visits DPValues attached to the instruction, but this block is near the end of the function; this has two problems: 1. The DPValues attached to an instruction precede that instruction, so they should always be processed before the instruction itself. 2. More importantly, some of the paths for visiting other instructions contain early returns, which will result in the DPValues not being visited at all. This patch simply moves the DPValue-visiting block to the top of the function, which should resolve both of these problems. --- .../Transforms/Utils/MemoryTaggingSupport.cpp | 28 +++++++++---------- .../dbg-declare-tag-offset.ll | 7 ++++- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp b/llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp index 648a5276a3d60a..43366950d8df1e 100644 --- a/llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp +++ b/llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp @@ -110,6 +110,20 @@ Instruction *getUntagLocationIfFunctionExit(Instruction &Inst) { } void StackInfoBuilder::visit(Instruction &Inst) { + // Check for non-intrinsic debug-info records. + for (auto &DPV : Inst.getDbgValueRange()) { + for (Value *V : DPV.location_ops()) { + if (auto *AI = dyn_cast_or_null(V)) { + if (!isInterestingAlloca(*AI)) + continue; + AllocaInfo &AInfo = Info.AllocasToInstrument[AI]; + auto &DPVVec = AInfo.DbgVariableRecords; + if (DPVVec.empty() || DPVVec.back() != &DPV) + DPVVec.push_back(&DPV); + } + } + } + if (CallInst *CI = dyn_cast(&Inst)) { if (CI->canReturnTwice()) { Info.CallsReturnTwice = true; @@ -150,20 +164,6 @@ void StackInfoBuilder::visit(Instruction &Inst) { } } - // Check for non-intrinsic debug-info records. - for (auto &DPV : Inst.getDbgValueRange()) { - for (Value *V : DPV.location_ops()) { - if (auto *AI = dyn_cast_or_null(V)) { - if (!isInterestingAlloca(*AI)) - continue; - AllocaInfo &AInfo = Info.AllocasToInstrument[AI]; - auto &DPVVec = AInfo.DbgVariableRecords; - if (DPVVec.empty() || DPVVec.back() != &DPV) - DPVVec.push_back(&DPV); - } - } - } - Instruction *ExitUntag = getUntagLocationIfFunctionExit(Inst); if (ExitUntag) Info.RetVec.push_back(ExitUntag); diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/dbg-declare-tag-offset.ll b/llvm/test/Instrumentation/HWAddressSanitizer/dbg-declare-tag-offset.ll index d5e362bc800ad8..50bad19250267c 100644 --- a/llvm/test/Instrumentation/HWAddressSanitizer/dbg-declare-tag-offset.ll +++ b/llvm/test/Instrumentation/HWAddressSanitizer/dbg-declare-tag-offset.ll @@ -1,5 +1,10 @@ ; RUN: opt -passes=hwasan -S -o - %s | FileCheck %s +;; Also test with RemoveDIs to verify that debug intrinsics immediately +;; preceding an alloca (or other instruction of interest to stack tagging) will +;; be correctly processed. +; RUN: opt --try-experimental-debuginfo-iterators -passes=hwasan -S -o - %s | FileCheck %s + target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64--linux-android" @@ -12,11 +17,11 @@ entry: %nodebug2 = alloca ptr %nodebug3 = alloca ptr %a = alloca ptr - %b = alloca ptr ; CHECK: @llvm.dbg.declare{{.*}} !DIExpression(DW_OP_LLVM_tag_offset, 32) call void @llvm.dbg.declare(metadata ptr %a, metadata !12, metadata !DIExpression()), !dbg !14 ; CHECK: @llvm.dbg.declare{{.*}} !DIExpression(DW_OP_LLVM_tag_offset, 32) call void @llvm.dbg.declare(metadata ptr %a, metadata !12, metadata !DIExpression()), !dbg !14 + %b = alloca ptr ; CHECK: @llvm.dbg.declare{{.*}} !DIExpression(DW_OP_LLVM_tag_offset, 96) call void @llvm.dbg.declare(metadata ptr %b, metadata !13, metadata !DIExpression()), !dbg !14 ; CHECK: @llvm.dbg.declare{{.*}} !DIExpression(DW_OP_LLVM_tag_offset, 96) From f5fd0deb2371d0bae3bef2563f50e005a140fc6d Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Mon, 12 Feb 2024 08:14:54 -0600 Subject: [PATCH 10/59] [LinkerWrapper][NFC] Rename 'all' to 'generic' for architecture agnostic IR Summary: A previous patch introduced `all` as a special architecture. I have decided I do not like this name and have changed it to `generic`. --- clang/docs/ClangLinkerWrapper.rst | 8 ++++++++ clang/test/Driver/linker-wrapper.c | 2 +- llvm/lib/Object/OffloadBinary.cpp | 2 +- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/clang/docs/ClangLinkerWrapper.rst b/clang/docs/ClangLinkerWrapper.rst index 6d7770b50e7260..3bef5584757351 100644 --- a/clang/docs/ClangLinkerWrapper.rst +++ b/clang/docs/ClangLinkerWrapper.rst @@ -79,6 +79,14 @@ linking is desired, simply do not run the binaries through the ``clang-linker-wrapper``. This will simply append the embedded device code so that it can be linked later. +Matching +======== + +The linker wrapper will link extracted device code that is compatible with each +other. Generally, this requires that the target triple and architecture match. +An exception is made when the architecture is listed as ``generic``, which will +cause it be linked with any other device code with the same target triple. + Example ======= diff --git a/clang/test/Driver/linker-wrapper.c b/clang/test/Driver/linker-wrapper.c index 647629a5969bdc..7fd46778ac9102 100644 --- a/clang/test/Driver/linker-wrapper.c +++ b/clang/test/Driver/linker-wrapper.c @@ -173,7 +173,7 @@ __attribute__((visibility("protected"), used)) int x; // AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack- -O2 -Wl,--no-undefined {{.*}}.o {{.*}}.o // RUN: clang-offload-packager -o %t-lib.out \ -// RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=all +// RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=generic // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t-lib.out // RUN: llvm-ar rcs %t.a %t.o // RUN: clang-offload-packager -o %t1.out \ diff --git a/llvm/lib/Object/OffloadBinary.cpp b/llvm/lib/Object/OffloadBinary.cpp index 58b9b39e0d2721..4ab6536dc90b52 100644 --- a/llvm/lib/Object/OffloadBinary.cpp +++ b/llvm/lib/Object/OffloadBinary.cpp @@ -356,7 +356,7 @@ bool object::areTargetsCompatible(const OffloadFile::TargetID &LHS, return false; // If the architecture is "all" we assume it is always compatible. - if (LHS.second.equals("all") || RHS.second.equals("all")) + if (LHS.second.equals("generic") || RHS.second.equals("generic")) return true; // Only The AMDGPU target requires additional checks. From 50ed98f1cf2516c59de0351fa683639c937782ea Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Mon, 12 Feb 2024 15:05:48 +0100 Subject: [PATCH 11/59] Revert "Reland "[clang-format] Update FormatToken::isSimpleTypeSpecifier() (#80241)"" The change caused an asan crash when running the `QualifierFixerTest.IsQualifierType` unittest, see details: https://github.com/llvm/llvm-project/pull/80241#issuecomment-1938749844 This reverts commit 7f40c5cc4e15a0a67b031f13370afc342a5dc14b. --- clang/lib/Format/FormatToken.cpp | 40 +++++++++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 3 deletions(-) diff --git a/clang/lib/Format/FormatToken.cpp b/clang/lib/Format/FormatToken.cpp index 33bcde3c25ece9..b791c5a26bbe3a 100644 --- a/clang/lib/Format/FormatToken.cpp +++ b/clang/lib/Format/FormatToken.cpp @@ -14,7 +14,9 @@ #include "FormatToken.h" #include "ContinuationIndenter.h" -#include "TokenAnalyzer.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Debug.h" +#include namespace clang { namespace format { @@ -32,9 +34,41 @@ const char *getTokenTypeName(TokenType Type) { return nullptr; } +// FIXME: This is copy&pasted from Sema. Put it in a common place and remove +// duplication. bool FormatToken::isSimpleTypeSpecifier() const { - assert(LangOpts.CPlusPlus); - return Tok.isSimpleTypeSpecifier(LangOpts); + switch (Tok.getKind()) { + case tok::kw_short: + case tok::kw_long: + case tok::kw___int64: + case tok::kw___int128: + case tok::kw_signed: + case tok::kw_unsigned: + case tok::kw_void: + case tok::kw_char: + case tok::kw_int: + case tok::kw_half: + case tok::kw_float: + case tok::kw_double: + case tok::kw___bf16: + case tok::kw__Float16: + case tok::kw___float128: + case tok::kw___ibm128: + case tok::kw_wchar_t: + case tok::kw_bool: +#define TRANSFORM_TYPE_TRAIT_DEF(_, Trait) case tok::kw___##Trait: +#include "clang/Basic/TransformTypeTraits.def" + case tok::annot_typename: + case tok::kw_char8_t: + case tok::kw_char16_t: + case tok::kw_char32_t: + case tok::kw_typeof: + case tok::kw_decltype: + case tok::kw__Atomic: + return true; + default: + return false; + } } bool FormatToken::isTypeOrIdentifier() const { From 6a471611a4b98396abb0680229aa0694ffe88a43 Mon Sep 17 00:00:00 2001 From: Hirofumi Nakamura Date: Mon, 12 Feb 2024 23:27:09 +0900 Subject: [PATCH 12/59] [clang-format] Support of TableGen value annotations. (#80299) This implements the annotation of the values in TableGen. The main changes are, - parseTableGenValue(), the simplified parser method for the syntax of values. - modified consumeToken() to parseTableGenValue in 'if', 'assert' and after '='. - modified parseParens() to call parseTableGenValue inside. - modified parseSquare() to to call parseTableGenValue inside, with skipping separator tokens. - modified parseAngle() to call parseTableGenValue inside, with skipping separator tokens. --- clang/lib/Format/FormatToken.h | 10 + clang/lib/Format/FormatTokenLexer.cpp | 2 +- clang/lib/Format/TokenAnnotator.cpp | 308 +++++++++++++++++- clang/lib/Format/UnwrappedLineParser.cpp | 13 +- clang/unittests/Format/TokenAnnotatorTest.cpp | 45 +++ 5 files changed, 369 insertions(+), 9 deletions(-) diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h index bace91b5f99b4d..0c1dce7a294082 100644 --- a/clang/lib/Format/FormatToken.h +++ b/clang/lib/Format/FormatToken.h @@ -150,7 +150,17 @@ namespace format { TYPE(StructuredBindingLSquare) \ TYPE(TableGenBangOperator) \ TYPE(TableGenCondOperator) \ + TYPE(TableGenCondOperatorColon) \ + TYPE(TableGenCondOperatorComma) \ + TYPE(TableGenDAGArgCloser) \ + TYPE(TableGenDAGArgListColon) \ + TYPE(TableGenDAGArgListComma) \ + TYPE(TableGenDAGArgOpener) \ + TYPE(TableGenListCloser) \ + TYPE(TableGenListOpener) \ TYPE(TableGenMultiLineString) \ + TYPE(TableGenTrailingPasteOperator) \ + TYPE(TableGenValueSuffix) \ TYPE(TemplateCloser) \ TYPE(TemplateOpener) \ TYPE(TemplateString) \ diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp index a57659fd422720..492e7e96dd22e6 100644 --- a/clang/lib/Format/FormatTokenLexer.cpp +++ b/clang/lib/Format/FormatTokenLexer.cpp @@ -812,7 +812,7 @@ void FormatTokenLexer::handleTableGenMultilineString() { auto CloseOffset = Lex->getBuffer().find("}]", OpenOffset); if (CloseOffset == StringRef::npos) return; - auto Text = Lex->getBuffer().substr(OpenOffset, CloseOffset + 2); + auto Text = Lex->getBuffer().substr(OpenOffset, CloseOffset - OpenOffset + 2); MultiLineString->TokenText = Text; resetLexer(SourceMgr.getFileOffset( Lex->getSourceLocation(Lex->getBufferLocation() - 2 + Text.size()))); diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index b1034002f351bf..d353388a862b56 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -256,6 +256,18 @@ class AnnotatingParser { } } } + if (Style.isTableGen()) { + if (CurrentToken->isOneOf(tok::comma, tok::equal)) { + // They appear as separators. Unless they are not in class definition. + next(); + continue; + } + // In angle, there must be Value like tokens. Types are also able to be + // parsed in the same way with Values. + if (!parseTableGenValue()) + return false; + continue; + } if (!consumeToken()) return false; } @@ -388,6 +400,28 @@ class AnnotatingParser { Contexts.back().IsExpression = !IsForOrCatch; } + if (Style.isTableGen()) { + if (FormatToken *Prev = OpeningParen.Previous) { + if (Prev->is(TT_TableGenCondOperator)) { + Contexts.back().IsTableGenCondOpe = true; + Contexts.back().IsExpression = true; + } else if (Contexts.size() > 1 && + Contexts[Contexts.size() - 2].IsTableGenBangOpe) { + // Hack to handle bang operators. The parent context's flag + // was set by parseTableGenSimpleValue(). + // We have to specify the context outside because the prev of "(" may + // be ">", not the bang operator in this case. + Contexts.back().IsTableGenBangOpe = true; + Contexts.back().IsExpression = true; + } else { + // Otherwise, this paren seems DAGArg. + if (!parseTableGenDAGArg()) + return false; + return parseTableGenDAGArgAndList(&OpeningParen); + } + } + } + // Infer the role of the l_paren based on the previous token if we haven't // detected one yet. if (PrevNonComment && OpeningParen.is(TT_Unknown)) { @@ -549,6 +583,22 @@ class AnnotatingParser { if (CurrentToken->is(tok::comma)) Contexts.back().CanBeExpression = true; + if (Style.isTableGen()) { + if (CurrentToken->is(tok::comma)) { + if (Contexts.back().IsTableGenCondOpe) + CurrentToken->setType(TT_TableGenCondOperatorComma); + next(); + } else if (CurrentToken->is(tok::colon)) { + if (Contexts.back().IsTableGenCondOpe) + CurrentToken->setType(TT_TableGenCondOperatorColon); + next(); + } + // In TableGen there must be Values in parens. + if (!parseTableGenValue()) + return false; + continue; + } + FormatToken *Tok = CurrentToken; if (!consumeToken()) return false; @@ -803,6 +853,8 @@ class AnnotatingParser { if (Left->BlockParameterCount > 1) Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 0; } + if (Style.isTableGen() && Left->is(TT_TableGenListOpener)) + CurrentToken->setType(TT_TableGenListCloser); next(); return true; } @@ -833,6 +885,19 @@ class AnnotatingParser { Left->setType(TT_ArrayInitializerLSquare); } FormatToken *Tok = CurrentToken; + if (Style.isTableGen()) { + if (CurrentToken->isOneOf(tok::comma, tok::minus, tok::ellipsis)) { + // '-' and '...' appears as a separator in slice. + next(); + } else { + // In TableGen there must be a list of Values in square brackets. + // It must be ValueList or SliceElements. + if (!parseTableGenValue()) + return false; + } + updateParameterCount(Left, Tok); + continue; + } if (!consumeToken()) return false; updateParameterCount(Left, Tok); @@ -840,6 +905,194 @@ class AnnotatingParser { return false; } + void skipToNextNonComment() { + next(); + while (CurrentToken && CurrentToken->is(tok::comment)) + next(); + } + + // Simplified parser for TableGen Value. Returns true on success. + // It consists of SimpleValues, SimpleValues with Suffixes, and Value followed + // by '#', paste operator. + // There also exists the case the Value is parsed as NameValue. + // In this case, the Value ends if '{' is found. + bool parseTableGenValue(bool ParseNameMode = false) { + if (!CurrentToken) + return false; + while (CurrentToken->is(tok::comment)) + next(); + if (!parseTableGenSimpleValue()) + return false; + if (!CurrentToken) + return true; + // Value "#" [Value] + if (CurrentToken->is(tok::hash)) { + if (CurrentToken->Next && + CurrentToken->Next->isOneOf(tok::colon, tok::semi, tok::l_brace)) { + // Trailing paste operator. + // These are only the allowed cases in TGParser::ParseValue(). + CurrentToken->setType(TT_TableGenTrailingPasteOperator); + next(); + return true; + } + FormatToken *HashTok = CurrentToken; + skipToNextNonComment(); + HashTok->setType(TT_Unknown); + if (!parseTableGenValue(ParseNameMode)) + return false; + } + // In name mode, '{' is regarded as the end of the value. + // See TGParser::ParseValue in TGParser.cpp + if (ParseNameMode && CurrentToken->is(tok::l_brace)) + return true; + // These tokens indicates this is a value with suffixes. + if (CurrentToken->isOneOf(tok::l_brace, tok::l_square, tok::period)) { + CurrentToken->setType(TT_TableGenValueSuffix); + FormatToken *Suffix = CurrentToken; + skipToNextNonComment(); + if (Suffix->is(tok::l_square)) + return parseSquare(); + if (Suffix->is(tok::l_brace)) { + Scopes.push_back(getScopeType(*Suffix)); + return parseBrace(); + } + } + return true; + } + + // TokVarName ::= "$" ualpha (ualpha | "0"..."9")* + // Appears as a part of DagArg. + // This does not change the current token on fail. + bool tryToParseTableGenTokVar() { + if (!CurrentToken) + return false; + if (CurrentToken->is(tok::identifier) && + CurrentToken->TokenText.front() == '$') { + skipToNextNonComment(); + return true; + } + return false; + } + + // DagArg ::= Value [":" TokVarName] | TokVarName + // Appears as a part of SimpleValue6. + bool parseTableGenDAGArg() { + if (tryToParseTableGenTokVar()) + return true; + if (parseTableGenValue()) { + if (CurrentToken && CurrentToken->is(tok::colon)) { + CurrentToken->setType(TT_TableGenDAGArgListColon); + skipToNextNonComment(); + return tryToParseTableGenTokVar(); + } + return true; + } + return false; + } + + // SimpleValue6 ::= "(" DagArg [DagArgList] ")" + // This parses SimpleValue 6's inside part of "(" ")" + bool parseTableGenDAGArgAndList(FormatToken *Opener) { + FormatToken *FirstTok = CurrentToken; + if (!parseTableGenDAGArg()) + return false; + // Parse the [DagArgList] part + bool FirstDAGArgListElm = true; + while (CurrentToken) { + if (!FirstDAGArgListElm && CurrentToken->is(tok::comma)) { + CurrentToken->setType(TT_TableGenDAGArgListComma); + skipToNextNonComment(); + } + if (CurrentToken && CurrentToken->is(tok::r_paren)) { + CurrentToken->setType(TT_TableGenDAGArgCloser); + Opener->MatchingParen = CurrentToken; + CurrentToken->MatchingParen = Opener; + skipToNextNonComment(); + return true; + } + if (!parseTableGenDAGArg()) + return false; + FirstDAGArgListElm = false; + } + return false; + } + + bool parseTableGenSimpleValue() { + assert(Style.isTableGen()); + if (!CurrentToken) + return false; + FormatToken *Tok = CurrentToken; + skipToNextNonComment(); + // SimpleValue 1, 2, 3: Literals + if (Tok->isOneOf(tok::numeric_constant, tok::string_literal, + TT_TableGenMultiLineString, tok::kw_true, tok::kw_false, + tok::question, tok::kw_int)) { + return true; + } + // SimpleValue 4: ValueList, Type + if (Tok->is(tok::l_brace)) { + Scopes.push_back(getScopeType(*Tok)); + return parseBrace(); + } + // SimpleValue 5: List initializer + if (Tok->is(tok::l_square)) { + Tok->setType(TT_TableGenListOpener); + if (!parseSquare()) + return false; + if (Tok->is(tok::less)) { + CurrentToken->setType(TT_TemplateOpener); + return parseAngle(); + } + return true; + } + // SimpleValue 6: DAGArg [DAGArgList] + // SimpleValue6 ::= "(" DagArg [DagArgList] ")" + if (Tok->is(tok::l_paren)) { + Tok->setType(TT_TableGenDAGArgOpener); + return parseTableGenDAGArgAndList(Tok); + } + // SimpleValue 9: Bang operator + if (Tok->is(TT_TableGenBangOperator)) { + if (CurrentToken && CurrentToken->is(tok::less)) { + CurrentToken->setType(TT_TemplateOpener); + skipToNextNonComment(); + if (!parseAngle()) + return false; + } + if (!CurrentToken || CurrentToken->isNot(tok::l_paren)) + return false; + skipToNextNonComment(); + // FIXME: Hack using inheritance to child context + Contexts.back().IsTableGenBangOpe = true; + bool Result = parseParens(); + Contexts.back().IsTableGenBangOpe = false; + return Result; + } + // SimpleValue 9: Cond operator + if (Tok->is(TT_TableGenCondOperator)) { + Tok = CurrentToken; + skipToNextNonComment(); + if (!Tok || Tok->isNot(tok::l_paren)) + return false; + bool Result = parseParens(); + return Result; + } + // We have to check identifier at the last because the kind of bang/cond + // operators are also identifier. + // SimpleValue 7: Identifiers + if (Tok->is(tok::identifier)) { + // SimpleValue 8: Anonymous record + if (CurrentToken && CurrentToken->is(tok::less)) { + CurrentToken->setType(TT_TemplateOpener); + skipToNextNonComment(); + return parseAngle(); + } + return true; + } + + return false; + } + bool couldBeInStructArrayInitializer() const { if (Contexts.size() < 2) return false; @@ -880,6 +1133,8 @@ class AnnotatingParser { OpeningBrace.getPreviousNonComment()->isNot(Keywords.kw_apostrophe))) { Contexts.back().VerilogMayBeConcatenation = true; } + if (Style.isTableGen()) + Contexts.back().ColonIsDictLiteral = false; unsigned CommaCount = 0; while (CurrentToken) { @@ -906,7 +1161,7 @@ class AnnotatingParser { FormatToken *Previous = CurrentToken->getPreviousNonComment(); if (Previous->is(TT_JsTypeOptionalQuestion)) Previous = Previous->getPreviousNonComment(); - if ((CurrentToken->is(tok::colon) && + if ((CurrentToken->is(tok::colon) && !Style.isTableGen() && (!Contexts.back().ColonIsDictLiteral || !Style.isCpp())) || Style.isProto()) { OpeningBrace.setType(TT_DictLiteral); @@ -915,10 +1170,12 @@ class AnnotatingParser { Previous->setType(TT_SelectorName); } } - if (CurrentToken->is(tok::colon) && OpeningBrace.is(TT_Unknown)) + if (CurrentToken->is(tok::colon) && OpeningBrace.is(TT_Unknown) && + !Style.isTableGen()) { OpeningBrace.setType(TT_DictLiteral); - else if (Style.isJavaScript()) + } else if (Style.isJavaScript()) { OpeningBrace.overwriteFixedType(TT_DictLiteral); + } } if (CurrentToken->is(tok::comma)) { if (Style.isJavaScript()) @@ -989,6 +1246,9 @@ class AnnotatingParser { // operators. if (Tok->is(TT_VerilogTableItem)) return true; + // Multi-line string itself is a single annotated token. + if (Tok->is(TT_TableGenMultiLineString)) + return true; switch (Tok->Tok.getKind()) { case tok::plus: case tok::minus: @@ -1119,6 +1379,10 @@ class AnnotatingParser { Tok->setType(TT_ObjCMethodExpr); } else if (Contexts.back().ContextKind == tok::l_paren && !Line.InPragmaDirective) { + if (Style.isTableGen() && Contexts.back().IsTableGenDAGArg) { + Tok->setType(TT_TableGenDAGArgListColon); + break; + } Tok->setType(TT_InlineASMColon); } break; @@ -1130,6 +1394,14 @@ class AnnotatingParser { Tok->setType(TT_JsTypeOperator); break; case tok::kw_if: + if (Style.isTableGen()) { + // In TableGen it has the form 'if' 'then'. + if (!parseTableGenValue()) + return false; + if (CurrentToken && CurrentToken->is(Keywords.kw_then)) + next(); // skip then + break; + } if (CurrentToken && CurrentToken->isOneOf(tok::kw_constexpr, tok::identifier)) { next(); @@ -1235,6 +1507,8 @@ class AnnotatingParser { } break; case tok::l_square: + if (Style.isTableGen()) + Tok->setType(TT_TableGenListOpener); if (!parseSquare()) return false; break; @@ -1264,6 +1538,8 @@ class AnnotatingParser { if (Previous && Previous->getType() != TT_DictLiteral) Previous->setType(TT_SelectorName); } + if (Style.isTableGen()) + Tok->setType(TT_TemplateOpener); } else { Tok->setType(TT_BinaryOperator); NonTemplateLess.insert(Tok); @@ -1423,11 +1699,28 @@ class AnnotatingParser { if (!Tok->getPreviousNonComment()) Line.IsContinuation = true; } + if (Style.isTableGen()) { + if (Tok->is(Keywords.kw_assert)) { + if (!parseTableGenValue()) + return false; + } else if (Tok->isOneOf(Keywords.kw_def, Keywords.kw_defm) && + (!Tok->Next || + !Tok->Next->isOneOf(tok::colon, tok::l_brace))) { + // The case NameValue appears. + if (!parseTableGenValue(true)) + return false; + } + } break; case tok::arrow: if (Tok->Previous && Tok->Previous->is(tok::kw_noexcept)) Tok->setType(TT_TrailingReturnArrow); break; + case tok::equal: + // In TableGen, there must be a value after "="; + if (Style.isTableGen() && !parseTableGenValue()) + return false; + break; default: break; } @@ -1757,6 +2050,9 @@ class AnnotatingParser { // Whether the braces may mean concatenation instead of structure or array // literal. bool VerilogMayBeConcatenation = false; + bool IsTableGenDAGArg = false; + bool IsTableGenBangOpe = false; + bool IsTableGenCondOpe = false; enum { Unknown, // Like the part after `:` in a constructor. @@ -2061,6 +2357,9 @@ class AnnotatingParser { // In JavaScript, `interface X { foo?(): bar; }` is an optional method // on the interface, not a ternary expression. Current.setType(TT_JsTypeOptionalQuestion); + } else if (Style.isTableGen()) { + // In TableGen, '?' is just an identifier like token. + Current.setType(TT_Unknown); } else { Current.setType(TT_ConditionalExpr); } @@ -2239,6 +2538,9 @@ class AnnotatingParser { // keywords such as let and def* defines names. if (Keywords.isTableGenDefinition(*PreviousNotConst)) return true; + // Otherwise C++ style declarations is available only inside the brace. + if (Contexts.back().ContextKind != tok::l_brace) + return false; } bool IsPPKeyword = PreviousNotConst->is(tok::identifier) && diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index d4f9b3f9df524e..d84914c6a4cf1c 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -495,12 +495,15 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { do { NextTok = Tokens->getNextToken(); } while (NextTok->is(tok::comment)); - while (NextTok->is(tok::hash) && !Line->InMacroBody) { - NextTok = Tokens->getNextToken(); - do { + if (!Style.isTableGen()) { + // InTableGen, '#' is like binary operator. Not a preprocessor directive. + while (NextTok->is(tok::hash) && !Line->InMacroBody) { NextTok = Tokens->getNextToken(); - } while (NextTok->is(tok::comment) || - (NextTok->NewlinesBefore == 0 && NextTok->isNot(tok::eof))); + do { + NextTok = Tokens->getNextToken(); + } while (NextTok->is(tok::comment) || + (NextTok->NewlinesBefore == 0 && NextTok->isNot(tok::eof))); + } } switch (Tok->Tok.getKind()) { diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp index 52a00c8a1a35da..3b36e407228195 100644 --- a/clang/unittests/Format/TokenAnnotatorTest.cpp +++ b/clang/unittests/Format/TokenAnnotatorTest.cpp @@ -2287,6 +2287,51 @@ TEST_F(TokenAnnotatorTest, UnderstandTableGenTokens) { EXPECT_TOKEN(Tokens[0], tok::identifier, TT_TableGenBangOperator); Tokens = Annotate("!cond"); EXPECT_TOKEN(Tokens[0], tok::identifier, TT_TableGenCondOperator); + + auto AnnotateValue = [this, &Style](llvm::StringRef Code) { + // Values are annotated only in specific context. + auto Result = annotate(("def X { let V = " + Code + "; }").str(), Style); + return decltype(Result){Result.begin() + 6, Result.end() - 3}; + }; + // Both of bang/cond operators. + Tokens = AnnotateValue("!cond(!eq(x, 0): 1, true: x)"); + ASSERT_EQ(Tokens.size(), 15u) << Tokens; + EXPECT_TOKEN(Tokens[0], tok::identifier, TT_TableGenCondOperator); + EXPECT_TOKEN(Tokens[2], tok::identifier, TT_TableGenBangOperator); + EXPECT_TOKEN(Tokens[8], tok::colon, TT_TableGenCondOperatorColon); + EXPECT_TOKEN(Tokens[10], tok::comma, TT_TableGenCondOperatorComma); + EXPECT_TOKEN(Tokens[12], tok::colon, TT_TableGenCondOperatorColon); + // DAGArg values with operator identifier + Tokens = AnnotateValue("(ins type1:$src1, type2:$src2)"); + ASSERT_EQ(Tokens.size(), 10u) << Tokens; + EXPECT_TOKEN(Tokens[0], tok::l_paren, TT_TableGenDAGArgOpener); + EXPECT_TOKEN(Tokens[3], tok::colon, TT_TableGenDAGArgListColon); + EXPECT_TOKEN(Tokens[4], tok::identifier, TT_Unknown); // $src1 + EXPECT_TOKEN(Tokens[5], tok::comma, TT_TableGenDAGArgListComma); + EXPECT_TOKEN(Tokens[7], tok::colon, TT_TableGenDAGArgListColon); + EXPECT_TOKEN(Tokens[9], tok::r_paren, TT_TableGenDAGArgCloser); + // List literal + Tokens = AnnotateValue("[1, 2, 3]"); + ASSERT_EQ(Tokens.size(), 7u) << Tokens; + EXPECT_TOKEN(Tokens[0], tok::l_square, TT_TableGenListOpener); + EXPECT_TOKEN(Tokens[6], tok::r_square, TT_TableGenListCloser); + // Suffixes of values + Tokens = AnnotateValue("valid.field"); + ASSERT_EQ(Tokens.size(), 3u) << Tokens; + EXPECT_TOKEN(Tokens[1], tok::period, TT_TableGenValueSuffix); + // Code + Tokens = AnnotateValue("[{ code is multiline string }]"); + ASSERT_EQ(Tokens.size(), 1u) << Tokens; + EXPECT_TOKEN(Tokens[0], tok::string_literal, TT_TableGenMultiLineString); + + // The definition + Tokens = annotate("def Def : Parent {}", Style); + ASSERT_EQ(Tokens.size(), 10u) << Tokens; // This contains eof. + // We use inheritance colon and function brace. They are enough. + EXPECT_TOKEN(Tokens[2], tok::colon, TT_InheritanceColon); + EXPECT_TOKEN(Tokens[4], tok::less, TT_TemplateOpener); + EXPECT_TOKEN(Tokens[6], tok::greater, TT_TemplateCloser); + EXPECT_TOKEN(Tokens[7], tok::l_brace, TT_FunctionLBrace); } TEST_F(TokenAnnotatorTest, UnderstandConstructors) { From 2b5e4eeb26ce57a8d7c2835cffc80388f43f044a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bal=C3=A1zs=20K=C3=A9ri?= Date: Mon, 12 Feb 2024 15:30:58 +0100 Subject: [PATCH 13/59] [clang][analyzer] Remove 'alpha.core.CallAndMessageUnInitRefArg' from documentation (NFC). (#81138) This checker does not exist (any more?) but appeared in the documentation. No other references to CallAndMessageUnInitRefArg are found in the full clang code. --- clang/docs/analyzer/checkers.rst | 22 ---------------------- clang/www/analyzer/alpha_checks.html | 23 ----------------------- 2 files changed, 45 deletions(-) diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst index bb637cf1b8007b..510629d8a2d480 100644 --- a/clang/docs/analyzer/checkers.rst +++ b/clang/docs/analyzer/checkers.rst @@ -1890,28 +1890,6 @@ the locking/unlocking of ``mtx_t`` mutexes. mtx_lock(&mtx1); // warn: This lock has already been acquired } -.. _alpha-core-CallAndMessageUnInitRefArg: - -alpha.core.CallAndMessageUnInitRefArg (C,C++, ObjC) -""""""""""""""""""""""""""""""""""""""""""""""""""" -Check for logical errors for function calls and Objective-C -message expressions (e.g., uninitialized arguments, null function pointers, and pointer to undefined variables). - -.. code-block:: c - - void test(void) { - int t; - int &p = t; - int &s = p; - int &q = s; - foo(q); // warn - } - - void test(void) { - int x; - foo(&x); // warn - } - .. _alpha-core-CastSize: alpha.core.CastSize (C) diff --git a/clang/www/analyzer/alpha_checks.html b/clang/www/analyzer/alpha_checks.html index 11ef7d405dd4c8..7bbe4a20288f23 100644 --- a/clang/www/analyzer/alpha_checks.html +++ b/clang/www/analyzer/alpha_checks.html @@ -87,29 +87,6 @@

Core Alpha Checkers

- - - -