X86ISelLowering.cpp [plain text]
#include "X86ISelLowering.h"
#include "Utils/X86ShuffleDecode.h"
#include "X86CallingConv.h"
#include "X86FrameLowering.h"
#include "X86InstrBuilder.h"
#include "X86MachineFunctionInfo.h"
#include "X86TargetMachine.h"
#include "X86TargetObjectFile.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/VariadicFunction.h"
#include "llvm/CodeGen/IntrinsicLowering.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetOptions.h"
#include "X86IntrinsicsInfo.h"
#include <bitset>
#include <numeric>
#include <cctype>
using namespace llvm;
#define DEBUG_TYPE "x86-isel"
STATISTIC(NumTailCalls, "Number of tail calls");
static cl::opt<bool> ExperimentalVectorWideningLegalization(
"x86-experimental-vector-widening-legalization", cl::init(false),
cl::desc("Enable an experimental vector type legalization through widening "
"rather than promotion."),
cl::Hidden);
static cl::opt<bool> ExperimentalVectorShuffleLowering(
"x86-experimental-vector-shuffle-lowering", cl::init(true),
cl::desc("Enable an experimental vector shuffle lowering code path."),
cl::Hidden);
static cl::opt<bool> ExperimentalVectorShuffleLegality(
"x86-experimental-vector-shuffle-legality", cl::init(false),
cl::desc("Enable experimental shuffle legality based on the experimental "
"shuffle lowering. Should only be used with the experimental "
"shuffle lowering."),
cl::Hidden);
static cl::opt<int> ReciprocalEstimateRefinementSteps(
"x86-recip-refinement-steps", cl::init(1),
cl::desc("Specify the number of Newton-Raphson iterations applied to the "
"result of the hardware reciprocal estimate instruction."),
cl::NotHidden);
static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1,
SDValue V2);
static SDValue ExtractSubVector(SDValue Vec, unsigned IdxVal,
SelectionDAG &DAG, SDLoc dl,
unsigned vectorWidth) {
assert((vectorWidth == 128 || vectorWidth == 256) &&
"Unsupported vector width");
EVT VT = Vec.getValueType();
EVT ElVT = VT.getVectorElementType();
unsigned Factor = VT.getSizeInBits()/vectorWidth;
EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
VT.getVectorNumElements()/Factor);
if (Vec.getOpcode() == ISD::UNDEF)
return DAG.getUNDEF(ResultVT);
unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits();
unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / vectorWidth)
* ElemsPerChunk);
if (Vec.getOpcode() == ISD::BUILD_VECTOR)
return DAG.getNode(ISD::BUILD_VECTOR, dl, ResultVT,
makeArrayRef(Vec->op_begin() + NormalizedIdxVal,
ElemsPerChunk));
SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx);
}
static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal,
SelectionDAG &DAG, SDLoc dl) {
assert((Vec.getValueType().is256BitVector() ||
Vec.getValueType().is512BitVector()) && "Unexpected vector size!");
return ExtractSubVector(Vec, IdxVal, DAG, dl, 128);
}
static SDValue Extract256BitVector(SDValue Vec, unsigned IdxVal,
SelectionDAG &DAG, SDLoc dl) {
assert(Vec.getValueType().is512BitVector() && "Unexpected vector size!");
return ExtractSubVector(Vec, IdxVal, DAG, dl, 256);
}
static SDValue InsertSubVector(SDValue Result, SDValue Vec,
unsigned IdxVal, SelectionDAG &DAG,
SDLoc dl, unsigned vectorWidth) {
assert((vectorWidth == 128 || vectorWidth == 256) &&
"Unsupported vector width");
if (Vec.getOpcode() == ISD::UNDEF)
return Result;
EVT VT = Vec.getValueType();
EVT ElVT = VT.getVectorElementType();
EVT ResultVT = Result.getValueType();
unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits();
unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits())/vectorWidth)
* ElemsPerChunk);
SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal);
return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx);
}
static SDValue Insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
SelectionDAG &DAG,SDLoc dl) {
assert(Vec.getValueType().is128BitVector() && "Unexpected vector size!");
return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 128);
}
static SDValue Insert256BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
SelectionDAG &DAG, SDLoc dl) {
assert(Vec.getValueType().is256BitVector() && "Unexpected vector size!");
return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 256);
}
static SDValue Concat128BitVectors(SDValue V1, SDValue V2, EVT VT,
unsigned NumElems, SelectionDAG &DAG,
SDLoc dl) {
SDValue V = Insert128BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
return Insert128BitVector(V, V2, NumElems/2, DAG, dl);
}
static SDValue Concat256BitVectors(SDValue V1, SDValue V2, EVT VT,
unsigned NumElems, SelectionDAG &DAG,
SDLoc dl) {
SDValue V = Insert256BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
return Insert256BitVector(V, V2, NumElems/2, DAG, dl);
}
X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
const X86Subtarget &STI)
: TargetLowering(TM), Subtarget(&STI) {
X86ScalarSSEf64 = Subtarget->hasSSE2();
X86ScalarSSEf32 = Subtarget->hasSSE1();
TD = getDataLayout();
static const MVT IntVTs[] = { MVT::i8, MVT::i16, MVT::i32, MVT::i64 };
setBooleanContents(ZeroOrOneBooleanContent);
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
if (Subtarget->isAtom())
setSchedulingPreference(Sched::ILP);
else if (Subtarget->is64Bit())
setSchedulingPreference(Sched::ILP);
else
setSchedulingPreference(Sched::RegPressure);
const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
if (TM.getOptLevel() >= CodeGenOpt::Default) {
if (Subtarget->hasSlowDivide32())
addBypassSlowDiv(32, 8);
if (Subtarget->hasSlowDivide64() && Subtarget->is64Bit())
addBypassSlowDiv(64, 16);
}
if (Subtarget->isTargetKnownWindowsMSVC()) {
setLibcallName(RTLIB::SDIV_I64, "_alldiv");
setLibcallName(RTLIB::UDIV_I64, "_aulldiv");
setLibcallName(RTLIB::SREM_I64, "_allrem");
setLibcallName(RTLIB::UREM_I64, "_aullrem");
setLibcallName(RTLIB::MUL_I64, "_allmul");
setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall);
setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall);
setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall);
setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall);
setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall);
setLibcallName(RTLIB::FPTOUINT_F64_I64, nullptr);
setLibcallName(RTLIB::FPTOUINT_F32_I64, nullptr);
setLibcallName(RTLIB::FPTOUINT_F64_I32, nullptr);
setLibcallName(RTLIB::FPTOUINT_F32_I32, nullptr);
}
if (Subtarget->isTargetDarwin()) {
setUseUnderscoreSetJmp(false);
setUseUnderscoreLongJmp(false);
} else if (Subtarget->isTargetWindowsGNU()) {
setUseUnderscoreSetJmp(true);
setUseUnderscoreLongJmp(false);
} else {
setUseUnderscoreSetJmp(true);
setUseUnderscoreLongJmp(true);
}
addRegisterClass(MVT::i8, &X86::GR8RegClass);
addRegisterClass(MVT::i16, &X86::GR16RegClass);
addRegisterClass(MVT::i32, &X86::GR32RegClass);
if (Subtarget->is64Bit())
addRegisterClass(MVT::i64, &X86::GR64RegClass);
for (MVT VT : MVT::integer_valuetypes())
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
setTruncStoreAction(MVT::i64, MVT::i32, Expand);
setTruncStoreAction(MVT::i64, MVT::i16, Expand);
setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
setTruncStoreAction(MVT::i32, MVT::i16, Expand);
setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
setTruncStoreAction(MVT::i16, MVT::i8, Expand);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand);
setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand);
setCondCodeAction(ISD::SETOEQ, MVT::f80, Expand);
setCondCodeAction(ISD::SETUNE, MVT::f32, Expand);
setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
setCondCodeAction(ISD::SETUNE, MVT::f80, Expand);
setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote);
setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote);
setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote);
if (Subtarget->is64Bit()) {
setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
} else if (!TM.Options.UseSoftFloat) {
setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
}
setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote);
setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote);
if (!TM.Options.UseSoftFloat) {
if (X86ScalarSSEf32) {
setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
} else {
setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom);
setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
}
} else {
setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Promote);
}
setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom);
setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom);
setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote);
setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote);
if (X86ScalarSSEf32) {
setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
} else {
setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom);
setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
}
setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote);
setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote);
setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote);
if (Subtarget->is64Bit()) {
setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand);
setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
} else if (!TM.Options.UseSoftFloat) {
if (Subtarget->hasSSE1() && !Subtarget->hasSSE3())
setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand);
else
setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
}
if (isTargetFTOL()) {
setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
}
if (!X86ScalarSSEf64) {
setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
if (Subtarget->is64Bit()) {
setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
setOperationAction(ISD::BITCAST , MVT::i64 , Expand);
}
}
for (unsigned i = 0; i != array_lengthof(IntVTs); ++i) {
MVT VT = IntVTs[i];
setOperationAction(ISD::MULHS, VT, Expand);
setOperationAction(ISD::MULHU, VT, Expand);
setOperationAction(ISD::SDIV, VT, Expand);
setOperationAction(ISD::UDIV, VT, Expand);
setOperationAction(ISD::SREM, VT, Expand);
setOperationAction(ISD::UREM, VT, Expand);
setOperationAction(ISD::ADDC, VT, Custom);
setOperationAction(ISD::ADDE, VT, Custom);
setOperationAction(ISD::SUBC, VT, Custom);
setOperationAction(ISD::SUBE, VT, Custom);
}
setOperationAction(ISD::BR_JT , MVT::Other, Expand);
setOperationAction(ISD::BRCOND , MVT::Other, Custom);
setOperationAction(ISD::BR_CC , MVT::f32, Expand);
setOperationAction(ISD::BR_CC , MVT::f64, Expand);
setOperationAction(ISD::BR_CC , MVT::f80, Expand);
setOperationAction(ISD::BR_CC , MVT::i8, Expand);
setOperationAction(ISD::BR_CC , MVT::i16, Expand);
setOperationAction(ISD::BR_CC , MVT::i32, Expand);
setOperationAction(ISD::BR_CC , MVT::i64, Expand);
setOperationAction(ISD::SELECT_CC , MVT::f32, Expand);
setOperationAction(ISD::SELECT_CC , MVT::f64, Expand);
setOperationAction(ISD::SELECT_CC , MVT::f80, Expand);
setOperationAction(ISD::SELECT_CC , MVT::i8, Expand);
setOperationAction(ISD::SELECT_CC , MVT::i16, Expand);
setOperationAction(ISD::SELECT_CC , MVT::i32, Expand);
setOperationAction(ISD::SELECT_CC , MVT::i64, Expand);
if (Subtarget->is64Bit())
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand);
setOperationAction(ISD::FREM , MVT::f32 , Expand);
setOperationAction(ISD::FREM , MVT::f64 , Expand);
setOperationAction(ISD::FREM , MVT::f80 , Expand);
setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
setOperationAction(ISD::CTTZ , MVT::i8 , Promote);
AddPromotedToType (ISD::CTTZ , MVT::i8 , MVT::i32);
setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i8 , Promote);
AddPromotedToType (ISD::CTTZ_ZERO_UNDEF , MVT::i8 , MVT::i32);
if (Subtarget->hasBMI()) {
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , Expand);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Expand);
if (Subtarget->is64Bit())
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
} else {
setOperationAction(ISD::CTTZ , MVT::i16 , Custom);
setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
if (Subtarget->is64Bit())
setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
}
if (Subtarget->hasLZCNT()) {
setOperationAction(ISD::CTLZ , MVT::i8 , Promote);
AddPromotedToType (ISD::CTLZ , MVT::i8 , MVT::i32);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , Promote);
AddPromotedToType (ISD::CTLZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16 , Expand);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32 , Expand);
if (Subtarget->is64Bit())
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
} else {
setOperationAction(ISD::CTLZ , MVT::i8 , Custom);
setOperationAction(ISD::CTLZ , MVT::i16 , Custom);
setOperationAction(ISD::CTLZ , MVT::i32 , Custom);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , Custom);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16 , Custom);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32 , Custom);
if (Subtarget->is64Bit()) {
setOperationAction(ISD::CTLZ , MVT::i64 , Custom);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
}
}
if (TM.Options.UseSoftFloat || !Subtarget->hasF16C()) {
setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
}
setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
setOperationAction(ISD::FP16_TO_FP, MVT::f80, Expand);
setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
setOperationAction(ISD::FP_TO_FP16, MVT::f80, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f80, MVT::f16, Expand);
setTruncStoreAction(MVT::f32, MVT::f16, Expand);
setTruncStoreAction(MVT::f64, MVT::f16, Expand);
setTruncStoreAction(MVT::f80, MVT::f16, Expand);
if (Subtarget->hasPOPCNT()) {
setOperationAction(ISD::CTPOP , MVT::i8 , Promote);
} else {
setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
if (Subtarget->is64Bit())
setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
}
setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
if (!Subtarget->hasMOVBE())
setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
setOperationAction(ISD::SELECT , MVT::i1 , Promote);
setOperationAction(ISD::SELECT , MVT::i8 , Custom);
setOperationAction(ISD::SELECT , MVT::i16 , Custom);
setOperationAction(ISD::SELECT , MVT::i32 , Custom);
setOperationAction(ISD::SELECT , MVT::f32 , Custom);
setOperationAction(ISD::SELECT , MVT::f64 , Custom);
setOperationAction(ISD::SELECT , MVT::f80 , Custom);
setOperationAction(ISD::SETCC , MVT::i8 , Custom);
setOperationAction(ISD::SETCC , MVT::i16 , Custom);
setOperationAction(ISD::SETCC , MVT::i32 , Custom);
setOperationAction(ISD::SETCC , MVT::f32 , Custom);
setOperationAction(ISD::SETCC , MVT::f64 , Custom);
setOperationAction(ISD::SETCC , MVT::f80 , Custom);
if (Subtarget->is64Bit()) {
setOperationAction(ISD::SELECT , MVT::i64 , Custom);
setOperationAction(ISD::SETCC , MVT::i64 , Custom);
}
setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
setOperationAction(ISD::ConstantPool , MVT::i32 , Custom);
setOperationAction(ISD::JumpTable , MVT::i32 , Custom);
setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom);
setOperationAction(ISD::GlobalTLSAddress, MVT::i32 , Custom);
if (Subtarget->is64Bit())
setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom);
setOperationAction(ISD::BlockAddress , MVT::i32 , Custom);
if (Subtarget->is64Bit()) {
setOperationAction(ISD::ConstantPool , MVT::i64 , Custom);
setOperationAction(ISD::JumpTable , MVT::i64 , Custom);
setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom);
setOperationAction(ISD::ExternalSymbol, MVT::i64 , Custom);
setOperationAction(ISD::BlockAddress , MVT::i64 , Custom);
}
setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom);
setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom);
setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom);
if (Subtarget->is64Bit()) {
setOperationAction(ISD::SHL_PARTS , MVT::i64 , Custom);
setOperationAction(ISD::SRA_PARTS , MVT::i64 , Custom);
setOperationAction(ISD::SRL_PARTS , MVT::i64 , Custom);
}
if (Subtarget->hasSSE1())
setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom);
for (unsigned i = 0; i != array_lengthof(IntVTs); ++i) {
MVT VT = IntVTs[i];
setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
}
if (Subtarget->hasCmpxchg16b()) {
setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
}
if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetELF() &&
!Subtarget->isTargetCygMing() && !Subtarget->isTargetWin64()) {
setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
}
if (Subtarget->is64Bit()) {
setExceptionPointerRegister(X86::RAX);
setExceptionSelectorRegister(X86::RDX);
} else {
setExceptionPointerRegister(X86::EAX);
setExceptionSelectorRegister(X86::EDX);
}
setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);
setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
setOperationAction(ISD::TRAP, MVT::Other, Legal);
setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
setOperationAction(ISD::VASTART , MVT::Other, Custom);
setOperationAction(ISD::VAEND , MVT::Other, Expand);
if (Subtarget->is64Bit() && !Subtarget->isTargetWin64()) {
setOperationAction(ISD::VAARG , MVT::Other, Custom);
setOperationAction(ISD::VACOPY , MVT::Other, Custom);
} else {
setOperationAction(ISD::VAARG , MVT::Other, Expand);
setOperationAction(ISD::VACOPY , MVT::Other, Expand);
}
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
setOperationAction(ISD::DYNAMIC_STACKALLOC, getPointerTy(), Custom);
if (!TM.Options.UseSoftFloat && X86ScalarSSEf64) {
addRegisterClass(MVT::f32, &X86::FR32RegClass);
addRegisterClass(MVT::f64, &X86::FR64RegClass);
setOperationAction(ISD::FABS , MVT::f64, Custom);
setOperationAction(ISD::FABS , MVT::f32, Custom);
setOperationAction(ISD::FNEG , MVT::f64, Custom);
setOperationAction(ISD::FNEG , MVT::f32, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
setOperationAction(ISD::FSIN , MVT::f64, Expand);
setOperationAction(ISD::FCOS , MVT::f64, Expand);
setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
setOperationAction(ISD::FSIN , MVT::f32, Expand);
setOperationAction(ISD::FCOS , MVT::f32, Expand);
setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
addLegalFPImmediate(APFloat(+0.0)); addLegalFPImmediate(APFloat(+0.0f)); } else if (!TM.Options.UseSoftFloat && X86ScalarSSEf32) {
addRegisterClass(MVT::f32, &X86::FR32RegClass);
addRegisterClass(MVT::f64, &X86::RFP64RegClass);
setOperationAction(ISD::FABS , MVT::f32, Custom);
setOperationAction(ISD::FNEG , MVT::f32, Custom);
setOperationAction(ISD::UNDEF, MVT::f64, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
setOperationAction(ISD::FSIN , MVT::f32, Expand);
setOperationAction(ISD::FCOS , MVT::f32, Expand);
setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
addLegalFPImmediate(APFloat(+0.0f)); addLegalFPImmediate(APFloat(+0.0)); addLegalFPImmediate(APFloat(+1.0)); addLegalFPImmediate(APFloat(-0.0)); addLegalFPImmediate(APFloat(-1.0));
if (!TM.Options.UnsafeFPMath) {
setOperationAction(ISD::FSIN , MVT::f64, Expand);
setOperationAction(ISD::FCOS , MVT::f64, Expand);
setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
}
} else if (!TM.Options.UseSoftFloat) {
addRegisterClass(MVT::f64, &X86::RFP64RegClass);
addRegisterClass(MVT::f32, &X86::RFP32RegClass);
setOperationAction(ISD::UNDEF, MVT::f64, Expand);
setOperationAction(ISD::UNDEF, MVT::f32, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
if (!TM.Options.UnsafeFPMath) {
setOperationAction(ISD::FSIN , MVT::f64, Expand);
setOperationAction(ISD::FSIN , MVT::f32, Expand);
setOperationAction(ISD::FCOS , MVT::f64, Expand);
setOperationAction(ISD::FCOS , MVT::f32, Expand);
setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
}
addLegalFPImmediate(APFloat(+0.0)); addLegalFPImmediate(APFloat(+1.0)); addLegalFPImmediate(APFloat(-0.0)); addLegalFPImmediate(APFloat(-1.0)); addLegalFPImmediate(APFloat(+0.0f)); addLegalFPImmediate(APFloat(+1.0f)); addLegalFPImmediate(APFloat(-0.0f)); addLegalFPImmediate(APFloat(-1.0f)); }
setOperationAction(ISD::FMA, MVT::f64, Expand);
setOperationAction(ISD::FMA, MVT::f32, Expand);
if (!TM.Options.UseSoftFloat) {
addRegisterClass(MVT::f80, &X86::RFP80RegClass);
setOperationAction(ISD::UNDEF, MVT::f80, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
{
APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended);
addLegalFPImmediate(TmpFlt); TmpFlt.changeSign();
addLegalFPImmediate(TmpFlt);
bool ignored;
APFloat TmpFlt2(+1.0);
TmpFlt2.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven,
&ignored);
addLegalFPImmediate(TmpFlt2); TmpFlt2.changeSign();
addLegalFPImmediate(TmpFlt2); }
if (!TM.Options.UnsafeFPMath) {
setOperationAction(ISD::FSIN , MVT::f80, Expand);
setOperationAction(ISD::FCOS , MVT::f80, Expand);
setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
}
setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
setOperationAction(ISD::FCEIL, MVT::f80, Expand);
setOperationAction(ISD::FTRUNC, MVT::f80, Expand);
setOperationAction(ISD::FRINT, MVT::f80, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
setOperationAction(ISD::FMA, MVT::f80, Expand);
}
setOperationAction(ISD::FPOW , MVT::f32 , Expand);
setOperationAction(ISD::FPOW , MVT::f64 , Expand);
setOperationAction(ISD::FPOW , MVT::f80 , Expand);
setOperationAction(ISD::FLOG, MVT::f80, Expand);
setOperationAction(ISD::FLOG2, MVT::f80, Expand);
setOperationAction(ISD::FLOG10, MVT::f80, Expand);
setOperationAction(ISD::FEXP, MVT::f80, Expand);
setOperationAction(ISD::FEXP2, MVT::f80, Expand);
setOperationAction(ISD::FMINNUM, MVT::f80, Expand);
setOperationAction(ISD::FMAXNUM, MVT::f80, Expand);
for (MVT VT : MVT::vector_valuetypes()) {
setOperationAction(ISD::ADD , VT, Expand);
setOperationAction(ISD::SUB , VT, Expand);
setOperationAction(ISD::FADD, VT, Expand);
setOperationAction(ISD::FNEG, VT, Expand);
setOperationAction(ISD::FSUB, VT, Expand);
setOperationAction(ISD::MUL , VT, Expand);
setOperationAction(ISD::FMUL, VT, Expand);
setOperationAction(ISD::SDIV, VT, Expand);
setOperationAction(ISD::UDIV, VT, Expand);
setOperationAction(ISD::FDIV, VT, Expand);
setOperationAction(ISD::SREM, VT, Expand);
setOperationAction(ISD::UREM, VT, Expand);
setOperationAction(ISD::LOAD, VT, Expand);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand);
setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);
setOperationAction(ISD::FABS, VT, Expand);
setOperationAction(ISD::FSIN, VT, Expand);
setOperationAction(ISD::FSINCOS, VT, Expand);
setOperationAction(ISD::FCOS, VT, Expand);
setOperationAction(ISD::FSINCOS, VT, Expand);
setOperationAction(ISD::FREM, VT, Expand);
setOperationAction(ISD::FMA, VT, Expand);
setOperationAction(ISD::FPOWI, VT, Expand);
setOperationAction(ISD::FSQRT, VT, Expand);
setOperationAction(ISD::FCOPYSIGN, VT, Expand);
setOperationAction(ISD::FFLOOR, VT, Expand);
setOperationAction(ISD::FCEIL, VT, Expand);
setOperationAction(ISD::FTRUNC, VT, Expand);
setOperationAction(ISD::FRINT, VT, Expand);
setOperationAction(ISD::FNEARBYINT, VT, Expand);
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
setOperationAction(ISD::MULHS, VT, Expand);
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
setOperationAction(ISD::MULHU, VT, Expand);
setOperationAction(ISD::SDIVREM, VT, Expand);
setOperationAction(ISD::UDIVREM, VT, Expand);
setOperationAction(ISD::FPOW, VT, Expand);
setOperationAction(ISD::CTPOP, VT, Expand);
setOperationAction(ISD::CTTZ, VT, Expand);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
setOperationAction(ISD::CTLZ, VT, Expand);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
setOperationAction(ISD::SHL, VT, Expand);
setOperationAction(ISD::SRA, VT, Expand);
setOperationAction(ISD::SRL, VT, Expand);
setOperationAction(ISD::ROTL, VT, Expand);
setOperationAction(ISD::ROTR, VT, Expand);
setOperationAction(ISD::BSWAP, VT, Expand);
setOperationAction(ISD::SETCC, VT, Expand);
setOperationAction(ISD::FLOG, VT, Expand);
setOperationAction(ISD::FLOG2, VT, Expand);
setOperationAction(ISD::FLOG10, VT, Expand);
setOperationAction(ISD::FEXP, VT, Expand);
setOperationAction(ISD::FEXP2, VT, Expand);
setOperationAction(ISD::FP_TO_UINT, VT, Expand);
setOperationAction(ISD::FP_TO_SINT, VT, Expand);
setOperationAction(ISD::UINT_TO_FP, VT, Expand);
setOperationAction(ISD::SINT_TO_FP, VT, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand);
setOperationAction(ISD::TRUNCATE, VT, Expand);
setOperationAction(ISD::SIGN_EXTEND, VT, Expand);
setOperationAction(ISD::ZERO_EXTEND, VT, Expand);
setOperationAction(ISD::ANY_EXTEND, VT, Expand);
setOperationAction(ISD::VSELECT, VT, Expand);
setOperationAction(ISD::SELECT_CC, VT, Expand);
for (MVT InnerVT : MVT::vector_valuetypes()) {
setTruncStoreAction(InnerVT, VT, Expand);
setLoadExtAction(ISD::SEXTLOAD, InnerVT, VT, Expand);
setLoadExtAction(ISD::ZEXTLOAD, InnerVT, VT, Expand);
if (VT.getVectorElementType() == MVT::i1)
setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
}
}
if (!TM.Options.UseSoftFloat && Subtarget->hasMMX()) {
addRegisterClass(MVT::x86mmx, &X86::VR64RegClass);
}
setOperationAction(ISD::MULHS, MVT::v8i8, Expand);
setOperationAction(ISD::MULHS, MVT::v4i16, Expand);
setOperationAction(ISD::MULHS, MVT::v2i32, Expand);
setOperationAction(ISD::MULHS, MVT::v1i64, Expand);
setOperationAction(ISD::AND, MVT::v8i8, Expand);
setOperationAction(ISD::AND, MVT::v4i16, Expand);
setOperationAction(ISD::AND, MVT::v2i32, Expand);
setOperationAction(ISD::AND, MVT::v1i64, Expand);
setOperationAction(ISD::OR, MVT::v8i8, Expand);
setOperationAction(ISD::OR, MVT::v4i16, Expand);
setOperationAction(ISD::OR, MVT::v2i32, Expand);
setOperationAction(ISD::OR, MVT::v1i64, Expand);
setOperationAction(ISD::XOR, MVT::v8i8, Expand);
setOperationAction(ISD::XOR, MVT::v4i16, Expand);
setOperationAction(ISD::XOR, MVT::v2i32, Expand);
setOperationAction(ISD::XOR, MVT::v1i64, Expand);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Expand);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Expand);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i32, Expand);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Expand);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v1i64, Expand);
setOperationAction(ISD::SELECT, MVT::v8i8, Expand);
setOperationAction(ISD::SELECT, MVT::v4i16, Expand);
setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
setOperationAction(ISD::SELECT, MVT::v1i64, Expand);
setOperationAction(ISD::BITCAST, MVT::v8i8, Expand);
setOperationAction(ISD::BITCAST, MVT::v4i16, Expand);
setOperationAction(ISD::BITCAST, MVT::v2i32, Expand);
setOperationAction(ISD::BITCAST, MVT::v1i64, Expand);
if (!TM.Options.UseSoftFloat && Subtarget->hasSSE1()) {
addRegisterClass(MVT::v4f32, &X86::VR128RegClass);
setOperationAction(ISD::FADD, MVT::v4f32, Legal);
setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
setOperationAction(ISD::FABS, MVT::v4f32, Custom);
setOperationAction(ISD::LOAD, MVT::v4f32, Legal);
setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
}
if (!TM.Options.UseSoftFloat && Subtarget->hasSSE2()) {
addRegisterClass(MVT::v2f64, &X86::VR128RegClass);
addRegisterClass(MVT::v16i8, &X86::VR128RegClass);
addRegisterClass(MVT::v8i16, &X86::VR128RegClass);
addRegisterClass(MVT::v4i32, &X86::VR128RegClass);
addRegisterClass(MVT::v2i64, &X86::VR128RegClass);
setOperationAction(ISD::ADD, MVT::v16i8, Legal);
setOperationAction(ISD::ADD, MVT::v8i16, Legal);
setOperationAction(ISD::ADD, MVT::v4i32, Legal);
setOperationAction(ISD::ADD, MVT::v2i64, Legal);
setOperationAction(ISD::MUL, MVT::v4i32, Custom);
setOperationAction(ISD::MUL, MVT::v2i64, Custom);
setOperationAction(ISD::UMUL_LOHI, MVT::v4i32, Custom);
setOperationAction(ISD::SMUL_LOHI, MVT::v4i32, Custom);
setOperationAction(ISD::MULHU, MVT::v8i16, Legal);
setOperationAction(ISD::MULHS, MVT::v8i16, Legal);
setOperationAction(ISD::SUB, MVT::v16i8, Legal);
setOperationAction(ISD::SUB, MVT::v8i16, Legal);
setOperationAction(ISD::SUB, MVT::v4i32, Legal);
setOperationAction(ISD::SUB, MVT::v2i64, Legal);
setOperationAction(ISD::MUL, MVT::v8i16, Legal);
setOperationAction(ISD::FADD, MVT::v2f64, Legal);
setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
setOperationAction(ISD::FABS, MVT::v2f64, Custom);
setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
setOperationAction(ISD::SETCC, MVT::v16i8, Custom);
setOperationAction(ISD::SETCC, MVT::v8i16, Custom);
setOperationAction(ISD::SETCC, MVT::v4i32, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
if (!Subtarget->hasPOPCNT()) {
setOperationAction(ISD::CTPOP, MVT::v4i32, Custom);
setOperationAction(ISD::CTPOP, MVT::v2i64, Custom);
}
for (int i = MVT::v16i8; i != MVT::v2i64; ++i) {
MVT VT = (MVT::SimpleValueType)i;
if (!isPowerOf2_32(VT.getVectorNumElements()))
continue;
if (!VT.is128BitVector())
continue;
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
}
for (MVT VT : MVT::integer_vector_valuetypes()) {
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i8, Custom);
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i16, Custom);
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v8i8, Custom);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Custom);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i16, Custom);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i32, Custom);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i8, Custom);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i16, Custom);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8i8, Custom);
}
setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
if (Subtarget->is64Bit()) {
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
}
for (int i = MVT::v16i8; i != MVT::v2i64; ++i) {
MVT VT = (MVT::SimpleValueType)i;
if (!VT.is128BitVector())
continue;
setOperationAction(ISD::AND, VT, Promote);
AddPromotedToType (ISD::AND, VT, MVT::v2i64);
setOperationAction(ISD::OR, VT, Promote);
AddPromotedToType (ISD::OR, VT, MVT::v2i64);
setOperationAction(ISD::XOR, VT, Promote);
AddPromotedToType (ISD::XOR, VT, MVT::v2i64);
setOperationAction(ISD::LOAD, VT, Promote);
AddPromotedToType (ISD::LOAD, VT, MVT::v2i64);
setOperationAction(ISD::SELECT, VT, Promote);
AddPromotedToType (ISD::SELECT, VT, MVT::v2i64);
}
setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
setOperationAction(ISD::LOAD, MVT::v2i64, Legal);
setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
if (!Subtarget->is64Bit())
setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom);
setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
for (MVT VT : MVT::fp_vector_valuetypes())
setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2f32, Legal);
setOperationAction(ISD::BITCAST, MVT::v2i32, Custom);
setOperationAction(ISD::BITCAST, MVT::v4i16, Custom);
setOperationAction(ISD::BITCAST, MVT::v8i8, Custom);
}
if (!TM.Options.UseSoftFloat && Subtarget->hasSSE41()) {
setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
setOperationAction(ISD::FCEIL, MVT::f32, Legal);
setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
setOperationAction(ISD::FRINT, MVT::f32, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
setOperationAction(ISD::FCEIL, MVT::f64, Legal);
setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
setOperationAction(ISD::FRINT, MVT::f64, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
setOperationAction(ISD::MUL, MVT::v4i32, Legal);
setOperationAction(ISD::VSELECT, MVT::v2f64, Custom);
setOperationAction(ISD::VSELECT, MVT::v2i64, Custom);
setOperationAction(ISD::VSELECT, MVT::v4i32, Custom);
setOperationAction(ISD::VSELECT, MVT::v4f32, Custom);
setOperationAction(ISD::VSELECT, MVT::v8i16, Custom);
setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
for (MVT VT : MVT::integer_vector_valuetypes()) {
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Custom);
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Custom);
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Custom);
}
setLoadExtAction(ISD::SEXTLOAD, MVT::v8i16, MVT::v8i8, Legal);
setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i8, Legal);
setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i8, Legal);
setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i16, Legal);
setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i16, Legal);
setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i32, Legal);
setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i16, MVT::v8i8, Legal);
setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i8, Legal);
setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i8, Legal);
setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i16, Legal);
setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i16, Legal);
setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i32, Legal);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
if (Subtarget->is64Bit()) {
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
}
}
if (Subtarget->hasSSE2()) {
setOperationAction(ISD::SRL, MVT::v8i16, Custom);
setOperationAction(ISD::SRL, MVT::v16i8, Custom);
setOperationAction(ISD::SHL, MVT::v8i16, Custom);
setOperationAction(ISD::SHL, MVT::v16i8, Custom);
setOperationAction(ISD::SRA, MVT::v8i16, Custom);
setOperationAction(ISD::SRA, MVT::v16i8, Custom);
setOperationAction(ISD::SRL, MVT::v2i64, Custom);
setOperationAction(ISD::SRL, MVT::v4i32, Custom);
setOperationAction(ISD::SHL, MVT::v2i64, Custom);
setOperationAction(ISD::SHL, MVT::v4i32, Custom);
setOperationAction(ISD::SRA, MVT::v4i32, Custom);
}
if (!TM.Options.UseSoftFloat && Subtarget->hasFp256()) {
addRegisterClass(MVT::v32i8, &X86::VR256RegClass);
addRegisterClass(MVT::v16i16, &X86::VR256RegClass);
addRegisterClass(MVT::v8i32, &X86::VR256RegClass);
addRegisterClass(MVT::v8f32, &X86::VR256RegClass);
addRegisterClass(MVT::v4i64, &X86::VR256RegClass);
addRegisterClass(MVT::v4f64, &X86::VR256RegClass);
setOperationAction(ISD::LOAD, MVT::v8f32, Legal);
setOperationAction(ISD::LOAD, MVT::v4f64, Legal);
setOperationAction(ISD::LOAD, MVT::v4i64, Legal);
setOperationAction(ISD::FADD, MVT::v8f32, Legal);
setOperationAction(ISD::FSUB, MVT::v8f32, Legal);
setOperationAction(ISD::FMUL, MVT::v8f32, Legal);
setOperationAction(ISD::FDIV, MVT::v8f32, Legal);
setOperationAction(ISD::FSQRT, MVT::v8f32, Legal);
setOperationAction(ISD::FFLOOR, MVT::v8f32, Legal);
setOperationAction(ISD::FCEIL, MVT::v8f32, Legal);
setOperationAction(ISD::FTRUNC, MVT::v8f32, Legal);
setOperationAction(ISD::FRINT, MVT::v8f32, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::v8f32, Legal);
setOperationAction(ISD::FNEG, MVT::v8f32, Custom);
setOperationAction(ISD::FABS, MVT::v8f32, Custom);
setOperationAction(ISD::FADD, MVT::v4f64, Legal);
setOperationAction(ISD::FSUB, MVT::v4f64, Legal);
setOperationAction(ISD::FMUL, MVT::v4f64, Legal);
setOperationAction(ISD::FDIV, MVT::v4f64, Legal);
setOperationAction(ISD::FSQRT, MVT::v4f64, Legal);
setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal);
setOperationAction(ISD::FCEIL, MVT::v4f64, Legal);
setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal);
setOperationAction(ISD::FRINT, MVT::v4f64, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Legal);
setOperationAction(ISD::FNEG, MVT::v4f64, Custom);
setOperationAction(ISD::FABS, MVT::v4f64, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Promote);
setOperationAction(ISD::FP_TO_UINT, MVT::v8i16, Promote);
setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Promote);
setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
for (MVT VT : MVT::fp_vector_valuetypes())
setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4f32, Legal);
setOperationAction(ISD::SRL, MVT::v16i16, Custom);
setOperationAction(ISD::SRL, MVT::v32i8, Custom);
setOperationAction(ISD::SHL, MVT::v16i16, Custom);
setOperationAction(ISD::SHL, MVT::v32i8, Custom);
setOperationAction(ISD::SRA, MVT::v16i16, Custom);
setOperationAction(ISD::SRA, MVT::v32i8, Custom);
setOperationAction(ISD::SETCC, MVT::v32i8, Custom);
setOperationAction(ISD::SETCC, MVT::v16i16, Custom);
setOperationAction(ISD::SETCC, MVT::v8i32, Custom);
setOperationAction(ISD::SETCC, MVT::v4i64, Custom);
setOperationAction(ISD::SELECT, MVT::v4f64, Custom);
setOperationAction(ISD::SELECT, MVT::v4i64, Custom);
setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
setOperationAction(ISD::VSELECT, MVT::v4f64, Custom);
setOperationAction(ISD::VSELECT, MVT::v4i64, Custom);
setOperationAction(ISD::VSELECT, MVT::v8i32, Custom);
setOperationAction(ISD::VSELECT, MVT::v8f32, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v4i64, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v16i16, Custom);
setOperationAction(ISD::ANY_EXTEND, MVT::v4i64, Custom);
setOperationAction(ISD::ANY_EXTEND, MVT::v8i32, Custom);
setOperationAction(ISD::ANY_EXTEND, MVT::v16i16, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
if (Subtarget->hasFMA() || Subtarget->hasFMA4()) {
setOperationAction(ISD::FMA, MVT::v8f32, Legal);
setOperationAction(ISD::FMA, MVT::v4f64, Legal);
setOperationAction(ISD::FMA, MVT::v4f32, Legal);
setOperationAction(ISD::FMA, MVT::v2f64, Legal);
setOperationAction(ISD::FMA, MVT::f32, Legal);
setOperationAction(ISD::FMA, MVT::f64, Legal);
}
if (Subtarget->hasInt256()) {
setOperationAction(ISD::ADD, MVT::v4i64, Legal);
setOperationAction(ISD::ADD, MVT::v8i32, Legal);
setOperationAction(ISD::ADD, MVT::v16i16, Legal);
setOperationAction(ISD::ADD, MVT::v32i8, Legal);
setOperationAction(ISD::SUB, MVT::v4i64, Legal);
setOperationAction(ISD::SUB, MVT::v8i32, Legal);
setOperationAction(ISD::SUB, MVT::v16i16, Legal);
setOperationAction(ISD::SUB, MVT::v32i8, Legal);
setOperationAction(ISD::MUL, MVT::v4i64, Custom);
setOperationAction(ISD::MUL, MVT::v8i32, Legal);
setOperationAction(ISD::MUL, MVT::v16i16, Legal);
setOperationAction(ISD::UMUL_LOHI, MVT::v8i32, Custom);
setOperationAction(ISD::SMUL_LOHI, MVT::v8i32, Custom);
setOperationAction(ISD::MULHU, MVT::v16i16, Legal);
setOperationAction(ISD::MULHS, MVT::v16i16, Legal);
setOperationAction(ISD::VSELECT, MVT::v16i16, Custom);
setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom);
if (!Subtarget->hasPOPCNT())
setOperationAction(ISD::CTPOP, MVT::v4i64, Custom);
setOperationAction(ISD::CTPOP, MVT::v8i32, Custom);
setLoadExtAction(ISD::SEXTLOAD, MVT::v16i16, MVT::v16i8, Legal);
setLoadExtAction(ISD::SEXTLOAD, MVT::v8i32, MVT::v8i8, Legal);
setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64, MVT::v4i8, Legal);
setLoadExtAction(ISD::SEXTLOAD, MVT::v8i32, MVT::v8i16, Legal);
setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64, MVT::v4i16, Legal);
setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64, MVT::v4i32, Legal);
setLoadExtAction(ISD::ZEXTLOAD, MVT::v16i16, MVT::v16i8, Legal);
setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i32, MVT::v8i8, Legal);
setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i64, MVT::v4i8, Legal);
setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i32, MVT::v8i16, Legal);
setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i64, MVT::v4i16, Legal);
setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i64, MVT::v4i32, Legal);
} else {
setOperationAction(ISD::ADD, MVT::v4i64, Custom);
setOperationAction(ISD::ADD, MVT::v8i32, Custom);
setOperationAction(ISD::ADD, MVT::v16i16, Custom);
setOperationAction(ISD::ADD, MVT::v32i8, Custom);
setOperationAction(ISD::SUB, MVT::v4i64, Custom);
setOperationAction(ISD::SUB, MVT::v8i32, Custom);
setOperationAction(ISD::SUB, MVT::v16i16, Custom);
setOperationAction(ISD::SUB, MVT::v32i8, Custom);
setOperationAction(ISD::MUL, MVT::v4i64, Custom);
setOperationAction(ISD::MUL, MVT::v8i32, Custom);
setOperationAction(ISD::MUL, MVT::v16i16, Custom);
}
setOperationAction(ISD::SRL, MVT::v4i64, Custom);
setOperationAction(ISD::SRL, MVT::v8i32, Custom);
setOperationAction(ISD::SHL, MVT::v4i64, Custom);
setOperationAction(ISD::SHL, MVT::v8i32, Custom);
setOperationAction(ISD::SRA, MVT::v8i32, Custom);
for (MVT VT : MVT::vector_valuetypes()) {
if (VT.getScalarSizeInBits() >= 32) {
setOperationAction(ISD::MLOAD, VT, Legal);
setOperationAction(ISD::MSTORE, VT, Legal);
}
if (VT.is128BitVector()) {
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
}
if (!VT.is256BitVector())
continue;
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
}
for (int i = MVT::v32i8; i != MVT::v4i64; ++i) {
MVT VT = (MVT::SimpleValueType)i;
if (!VT.is256BitVector())
continue;
setOperationAction(ISD::AND, VT, Promote);
AddPromotedToType (ISD::AND, VT, MVT::v4i64);
setOperationAction(ISD::OR, VT, Promote);
AddPromotedToType (ISD::OR, VT, MVT::v4i64);
setOperationAction(ISD::XOR, VT, Promote);
AddPromotedToType (ISD::XOR, VT, MVT::v4i64);
setOperationAction(ISD::LOAD, VT, Promote);
AddPromotedToType (ISD::LOAD, VT, MVT::v4i64);
setOperationAction(ISD::SELECT, VT, Promote);
AddPromotedToType (ISD::SELECT, VT, MVT::v4i64);
}
}
if (!TM.Options.UseSoftFloat && Subtarget->hasAVX512()) {
addRegisterClass(MVT::v16i32, &X86::VR512RegClass);
addRegisterClass(MVT::v16f32, &X86::VR512RegClass);
addRegisterClass(MVT::v8i64, &X86::VR512RegClass);
addRegisterClass(MVT::v8f64, &X86::VR512RegClass);
addRegisterClass(MVT::i1, &X86::VK1RegClass);
addRegisterClass(MVT::v8i1, &X86::VK8RegClass);
addRegisterClass(MVT::v16i1, &X86::VK16RegClass);
for (MVT VT : MVT::fp_vector_valuetypes())
setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8f32, Legal);
setOperationAction(ISD::BR_CC, MVT::i1, Expand);
setOperationAction(ISD::SETCC, MVT::i1, Custom);
setOperationAction(ISD::XOR, MVT::i1, Legal);
setOperationAction(ISD::OR, MVT::i1, Legal);
setOperationAction(ISD::AND, MVT::i1, Legal);
setOperationAction(ISD::LOAD, MVT::v16f32, Legal);
setOperationAction(ISD::LOAD, MVT::v8f64, Legal);
setOperationAction(ISD::LOAD, MVT::v8i64, Legal);
setOperationAction(ISD::LOAD, MVT::v16i32, Legal);
setOperationAction(ISD::LOAD, MVT::v16i1, Legal);
setOperationAction(ISD::FADD, MVT::v16f32, Legal);
setOperationAction(ISD::FSUB, MVT::v16f32, Legal);
setOperationAction(ISD::FMUL, MVT::v16f32, Legal);
setOperationAction(ISD::FDIV, MVT::v16f32, Legal);
setOperationAction(ISD::FSQRT, MVT::v16f32, Legal);
setOperationAction(ISD::FNEG, MVT::v16f32, Custom);
setOperationAction(ISD::FADD, MVT::v8f64, Legal);
setOperationAction(ISD::FSUB, MVT::v8f64, Legal);
setOperationAction(ISD::FMUL, MVT::v8f64, Legal);
setOperationAction(ISD::FDIV, MVT::v8f64, Legal);
setOperationAction(ISD::FSQRT, MVT::v8f64, Legal);
setOperationAction(ISD::FNEG, MVT::v8f64, Custom);
setOperationAction(ISD::FMA, MVT::v8f64, Legal);
setOperationAction(ISD::FMA, MVT::v16f32, Legal);
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
if (Subtarget->is64Bit()) {
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Legal);
setOperationAction(ISD::FP_TO_SINT, MVT::i64, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::i64, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Legal);
}
setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v8i1, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v16i1, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v16i8, Promote);
setOperationAction(ISD::SINT_TO_FP, MVT::v16i16, Promote);
setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
setOperationAction(ISD::FP_ROUND, MVT::v8f32, Legal);
setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Legal);
setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v8i1, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v16i1, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v16i8, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v8i16, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f64, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i64, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f32, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i32, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i1, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i1, Legal);
setOperationAction(ISD::SETCC, MVT::v16i1, Custom);
setOperationAction(ISD::SETCC, MVT::v8i1, Custom);
setOperationAction(ISD::MUL, MVT::v8i64, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i1, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i1, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i1, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i1, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v8i1, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v16i1, Custom);
setOperationAction(ISD::SELECT, MVT::v8f64, Custom);
setOperationAction(ISD::SELECT, MVT::v8i64, Custom);
setOperationAction(ISD::SELECT, MVT::v16f32, Custom);
setOperationAction(ISD::ADD, MVT::v8i64, Legal);
setOperationAction(ISD::ADD, MVT::v16i32, Legal);
setOperationAction(ISD::SUB, MVT::v8i64, Legal);
setOperationAction(ISD::SUB, MVT::v16i32, Legal);
setOperationAction(ISD::MUL, MVT::v16i32, Legal);
setOperationAction(ISD::SRL, MVT::v8i64, Custom);
setOperationAction(ISD::SRL, MVT::v16i32, Custom);
setOperationAction(ISD::SHL, MVT::v8i64, Custom);
setOperationAction(ISD::SHL, MVT::v16i32, Custom);
setOperationAction(ISD::SRA, MVT::v8i64, Custom);
setOperationAction(ISD::SRA, MVT::v16i32, Custom);
setOperationAction(ISD::AND, MVT::v8i64, Legal);
setOperationAction(ISD::OR, MVT::v8i64, Legal);
setOperationAction(ISD::XOR, MVT::v8i64, Legal);
setOperationAction(ISD::AND, MVT::v16i32, Legal);
setOperationAction(ISD::OR, MVT::v16i32, Legal);
setOperationAction(ISD::XOR, MVT::v16i32, Legal);
if (Subtarget->hasCDI()) {
setOperationAction(ISD::CTLZ, MVT::v8i64, Legal);
setOperationAction(ISD::CTLZ, MVT::v16i32, Legal);
}
for (MVT VT : MVT::vector_valuetypes()) {
unsigned EltSize = VT.getVectorElementType().getSizeInBits();
if (VT.is128BitVector() || VT.is256BitVector()) {
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
}
if (VT.getVectorElementType() == MVT::i1)
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
if (!VT.is512BitVector())
continue;
if ( EltSize >= 32) {
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::VSELECT, VT, Legal);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::MLOAD, VT, Legal);
setOperationAction(ISD::MSTORE, VT, Legal);
}
}
for (int i = MVT::v32i8; i != MVT::v8i64; ++i) {
MVT VT = (MVT::SimpleValueType)i;
if (!VT.is512BitVector())
continue;
setOperationAction(ISD::SELECT, VT, Promote);
AddPromotedToType (ISD::SELECT, VT, MVT::v8i64);
}
}
if (!TM.Options.UseSoftFloat && Subtarget->hasBWI()) {
addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
addRegisterClass(MVT::v64i8, &X86::VR512RegClass);
addRegisterClass(MVT::v32i1, &X86::VK32RegClass);
addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
setOperationAction(ISD::LOAD, MVT::v32i16, Legal);
setOperationAction(ISD::LOAD, MVT::v64i8, Legal);
setOperationAction(ISD::SETCC, MVT::v32i1, Custom);
setOperationAction(ISD::SETCC, MVT::v64i1, Custom);
setOperationAction(ISD::ADD, MVT::v32i16, Legal);
setOperationAction(ISD::ADD, MVT::v64i8, Legal);
setOperationAction(ISD::SUB, MVT::v32i16, Legal);
setOperationAction(ISD::SUB, MVT::v64i8, Legal);
setOperationAction(ISD::MUL, MVT::v32i16, Legal);
for (int i = MVT::v32i8; i != MVT::v8i64; ++i) {
const MVT VT = (MVT::SimpleValueType)i;
const unsigned EltSize = VT.getVectorElementType().getSizeInBits();
if (!VT.is512BitVector())
continue;
if (EltSize < 32) {
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::VSELECT, VT, Legal);
}
}
}
if (!TM.Options.UseSoftFloat && Subtarget->hasVLX()) {
addRegisterClass(MVT::v4i1, &X86::VK4RegClass);
addRegisterClass(MVT::v2i1, &X86::VK2RegClass);
setOperationAction(ISD::SETCC, MVT::v4i1, Custom);
setOperationAction(ISD::SETCC, MVT::v2i1, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i1, Legal);
setOperationAction(ISD::AND, MVT::v8i32, Legal);
setOperationAction(ISD::OR, MVT::v8i32, Legal);
setOperationAction(ISD::XOR, MVT::v8i32, Legal);
setOperationAction(ISD::AND, MVT::v4i32, Legal);
setOperationAction(ISD::OR, MVT::v4i32, Legal);
setOperationAction(ISD::XOR, MVT::v4i32, Legal);
}
for (MVT VT : MVT::vector_valuetypes())
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
if (!Subtarget->is64Bit())
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
for (unsigned i = 0, e = 3+Subtarget->is64Bit(); i != e; ++i) {
MVT VT = IntVTs[i];
setOperationAction(ISD::SADDO, VT, Custom);
setOperationAction(ISD::UADDO, VT, Custom);
setOperationAction(ISD::SSUBO, VT, Custom);
setOperationAction(ISD::USUBO, VT, Custom);
setOperationAction(ISD::SMULO, VT, Custom);
setOperationAction(ISD::UMULO, VT, Custom);
}
if (!Subtarget->is64Bit()) {
setLibcallName(RTLIB::SHL_I128, nullptr);
setLibcallName(RTLIB::SRL_I128, nullptr);
setLibcallName(RTLIB::SRA_I128, nullptr);
}
if (Subtarget->hasSinCos()) {
setLibcallName(RTLIB::SINCOS_F32, "sincosf");
setLibcallName(RTLIB::SINCOS_F64, "sincos");
if (Subtarget->isTargetDarwin()) {
setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
}
}
if (Subtarget->isTargetWin64()) {
setOperationAction(ISD::SDIV, MVT::i128, Custom);
setOperationAction(ISD::UDIV, MVT::i128, Custom);
setOperationAction(ISD::SREM, MVT::i128, Custom);
setOperationAction(ISD::UREM, MVT::i128, Custom);
setOperationAction(ISD::SDIVREM, MVT::i128, Custom);
setOperationAction(ISD::UDIVREM, MVT::i128, Custom);
}
setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
setTargetDAGCombine(ISD::BITCAST);
setTargetDAGCombine(ISD::VSELECT);
setTargetDAGCombine(ISD::SELECT);
setTargetDAGCombine(ISD::SHL);
setTargetDAGCombine(ISD::SRA);
setTargetDAGCombine(ISD::SRL);
setTargetDAGCombine(ISD::OR);
setTargetDAGCombine(ISD::AND);
setTargetDAGCombine(ISD::ADD);
setTargetDAGCombine(ISD::FADD);
setTargetDAGCombine(ISD::FSUB);
setTargetDAGCombine(ISD::FMA);
setTargetDAGCombine(ISD::SUB);
setTargetDAGCombine(ISD::LOAD);
setTargetDAGCombine(ISD::MLOAD);
setTargetDAGCombine(ISD::STORE);
setTargetDAGCombine(ISD::MSTORE);
setTargetDAGCombine(ISD::ZERO_EXTEND);
setTargetDAGCombine(ISD::ANY_EXTEND);
setTargetDAGCombine(ISD::SIGN_EXTEND);
setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
setTargetDAGCombine(ISD::TRUNCATE);
setTargetDAGCombine(ISD::SINT_TO_FP);
setTargetDAGCombine(ISD::SETCC);
setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
setTargetDAGCombine(ISD::BUILD_VECTOR);
setTargetDAGCombine(ISD::MUL);
setTargetDAGCombine(ISD::XOR);
computeRegisterProperties(Subtarget->getRegisterInfo());
MaxStoresPerMemset = 16; MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 16 : 8;
MaxStoresPerMemcpy = 8; MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
MaxStoresPerMemmove = 8; MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
setPrefLoopAlignment(4);
PredictableSelectIsExpensive = !Subtarget->isAtom();
EnableExtLdPromotion = true;
setPrefFunctionAlignment(4);
verifyIntrinsicTables();
}
bool X86TargetLowering::useLoadStackGuardNode() const {
return Subtarget->isTargetMachO() && Subtarget->is64Bit();
}
TargetLoweringBase::LegalizeTypeAction
X86TargetLowering::getPreferredVectorAction(EVT VT) const {
if (ExperimentalVectorWideningLegalization &&
VT.getVectorNumElements() != 1 &&
VT.getVectorElementType().getSimpleVT() != MVT::i1)
return TypeWidenVector;
return TargetLoweringBase::getPreferredVectorAction(VT);
}
EVT X86TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
if (!VT.isVector())
return Subtarget->hasAVX512() ? MVT::i1: MVT::i8;
const unsigned NumElts = VT.getVectorNumElements();
const EVT EltVT = VT.getVectorElementType();
if (VT.is512BitVector()) {
if (Subtarget->hasAVX512())
if (EltVT == MVT::i32 || EltVT == MVT::i64 ||
EltVT == MVT::f32 || EltVT == MVT::f64)
switch(NumElts) {
case 8: return MVT::v8i1;
case 16: return MVT::v16i1;
}
if (Subtarget->hasBWI())
if (EltVT == MVT::i8 || EltVT == MVT::i16)
switch(NumElts) {
case 32: return MVT::v32i1;
case 64: return MVT::v64i1;
}
}
if (VT.is256BitVector() || VT.is128BitVector()) {
if (Subtarget->hasVLX())
if (EltVT == MVT::i32 || EltVT == MVT::i64 ||
EltVT == MVT::f32 || EltVT == MVT::f64)
switch(NumElts) {
case 2: return MVT::v2i1;
case 4: return MVT::v4i1;
case 8: return MVT::v8i1;
}
if (Subtarget->hasBWI() && Subtarget->hasVLX())
if (EltVT == MVT::i8 || EltVT == MVT::i16)
switch(NumElts) {
case 8: return MVT::v8i1;
case 16: return MVT::v16i1;
case 32: return MVT::v32i1;
}
}
return VT.changeVectorElementTypeToInteger();
}
static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) {
if (MaxAlign == 16)
return;
if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
if (VTy->getBitWidth() == 128)
MaxAlign = 16;
} else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
unsigned EltAlign = 0;
getMaxByValAlign(ATy->getElementType(), EltAlign);
if (EltAlign > MaxAlign)
MaxAlign = EltAlign;
} else if (StructType *STy = dyn_cast<StructType>(Ty)) {
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
unsigned EltAlign = 0;
getMaxByValAlign(STy->getElementType(i), EltAlign);
if (EltAlign > MaxAlign)
MaxAlign = EltAlign;
if (MaxAlign == 16)
break;
}
}
}
unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty) const {
if (Subtarget->is64Bit()) {
unsigned TyAlign = TD->getABITypeAlignment(Ty);
if (TyAlign > 8)
return TyAlign;
return 8;
}
unsigned Align = 4;
if (Subtarget->hasSSE1())
getMaxByValAlign(Ty, Align);
return Align;
}
EVT
X86TargetLowering::getOptimalMemOpType(uint64_t Size,
unsigned DstAlign, unsigned SrcAlign,
bool IsMemset, bool ZeroMemset,
bool MemcpyStrSrc,
MachineFunction &MF) const {
const Function *F = MF.getFunction();
if ((!IsMemset || ZeroMemset) &&
!F->hasFnAttribute(Attribute::NoImplicitFloat)) {
if (Size >= 16 &&
(Subtarget->isUnalignedMemAccessFast() ||
((DstAlign == 0 || DstAlign >= 16) &&
(SrcAlign == 0 || SrcAlign >= 16)))) {
if (Size >= 32) {
if (Subtarget->hasInt256())
return MVT::v8i32;
if (Subtarget->hasFp256())
return MVT::v8f32;
}
if (Subtarget->hasSSE2())
return MVT::v4i32;
if (Subtarget->hasSSE1())
return MVT::v4f32;
} else if (!MemcpyStrSrc && Size >= 8 &&
!Subtarget->is64Bit() &&
Subtarget->hasSSE2()) {
return MVT::f64;
}
}
if (Subtarget->is64Bit() && Size >= 8)
return MVT::i64;
return MVT::i32;
}
bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
if (VT == MVT::f32)
return X86ScalarSSEf32;
else if (VT == MVT::f64)
return X86ScalarSSEf64;
return true;
}
bool
X86TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
unsigned,
unsigned,
bool *Fast) const {
if (Fast)
*Fast = Subtarget->isUnalignedMemAccessFast();
return true;
}
unsigned X86TargetLowering::getJumpTableEncoding() const {
if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
Subtarget->isPICStyleGOT())
return MachineJumpTableInfo::EK_Custom32;
return TargetLowering::getJumpTableEncoding();
}
const MCExpr *
X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
const MachineBasicBlock *MBB,
unsigned uid,MCContext &Ctx) const{
assert(MBB->getParent()->getTarget().getRelocationModel() == Reloc::PIC_ &&
Subtarget->isPICStyleGOT());
return MCSymbolRefExpr::Create(MBB->getSymbol(),
MCSymbolRefExpr::VK_GOTOFF, Ctx);
}
SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
SelectionDAG &DAG) const {
if (!Subtarget->is64Bit())
return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), getPointerTy());
return Table;
}
const MCExpr *X86TargetLowering::
getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
MCContext &Ctx) const {
if (Subtarget->isPICStyleRIPRel())
return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
return MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), Ctx);
}
std::pair<const TargetRegisterClass *, uint8_t>
X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
MVT VT) const {
const TargetRegisterClass *RRC = nullptr;
uint8_t Cost = 1;
switch (VT.SimpleTy) {
default:
return TargetLowering::findRepresentativeClass(TRI, VT);
case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
RRC = Subtarget->is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
break;
case MVT::x86mmx:
RRC = &X86::VR64RegClass;
break;
case MVT::f32: case MVT::f64:
case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
case MVT::v4f32: case MVT::v2f64:
case MVT::v32i8: case MVT::v8i32: case MVT::v4i64: case MVT::v8f32:
case MVT::v4f64:
RRC = &X86::VR128RegClass;
break;
}
return std::make_pair(RRC, Cost);
}
bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace,
unsigned &Offset) const {
if (!Subtarget->isTargetLinux())
return false;
if (Subtarget->is64Bit()) {
Offset = 0x28;
if (getTargetMachine().getCodeModel() == CodeModel::Kernel)
AddressSpace = 256;
else
AddressSpace = 257;
} else {
Offset = 0x14;
AddressSpace = 256;
}
return true;
}
bool X86TargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
unsigned DestAS) const {
assert(SrcAS != DestAS && "Expected different address spaces!");
return SrcAS < 256 && DestAS < 256;
}
#include "X86GenCallingConv.inc"
bool
X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv,
MachineFunction &MF, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
LLVMContext &Context) const {
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
return CCInfo.CheckReturn(Outs, RetCC_X86);
}
const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
return ScratchRegs;
}
SDValue
X86TargetLowering::LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
SDLoc dl, SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
CCInfo.AnalyzeReturn(Outs, RetCC_X86);
SDValue Flag;
SmallVector<SDValue, 6> RetOps;
RetOps.push_back(Chain); RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(),
MVT::i16));
for (unsigned i = 0; i != RVLocs.size(); ++i) {
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
SDValue ValToCopy = OutVals[i];
EVT ValVT = ValToCopy.getValueType();
if (VA.getLocInfo() == CCValAssign::SExt)
ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
else if (VA.getLocInfo() == CCValAssign::ZExt)
ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
else if (VA.getLocInfo() == CCValAssign::AExt)
ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
else if (VA.getLocInfo() == CCValAssign::BCvt)
ValToCopy = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), ValToCopy);
assert(VA.getLocInfo() != CCValAssign::FPExt &&
"Unexpected FP-extend for return value.");
if ((ValVT == MVT::f32 || ValVT == MVT::f64 ||
VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) &&
(Subtarget->is64Bit() && !Subtarget->hasSSE1())) {
report_fatal_error("SSE register return with SSE disabled");
}
if (ValVT == MVT::f64 &&
(Subtarget->is64Bit() && !Subtarget->hasSSE2()))
report_fatal_error("SSE2 register return with SSE2 disabled");
if (VA.getLocReg() == X86::FP0 ||
VA.getLocReg() == X86::FP1) {
if (isScalarFPTypeInSSEReg(VA.getValVT()))
ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
RetOps.push_back(ValToCopy);
continue;
}
if (Subtarget->is64Bit()) {
if (ValVT == MVT::x86mmx) {
if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
ValToCopy = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ValToCopy);
ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
ValToCopy);
if (!Subtarget->hasSSE2())
ValToCopy = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32,ValToCopy);
}
}
}
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ValToCopy, Flag);
Flag = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) {
assert((Subtarget->is64Bit() || Subtarget->isTargetKnownWindowsMSVC()) &&
"No need for an sret register");
SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg, getPointerTy());
unsigned RetValReg
= (Subtarget->is64Bit() && !Subtarget->isTarget64BitILP32()) ?
X86::RAX : X86::EAX;
Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
Flag = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(RetValReg, getPointerTy()));
}
RetOps[0] = Chain;
if (Flag.getNode())
RetOps.push_back(Flag);
return DAG.getNode(X86ISD::RET_FLAG, dl, MVT::Other, RetOps);
}
bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
if (N->getNumValues() != 1)
return false;
if (!N->hasNUsesOfValue(1, 0))
return false;
SDValue TCChain = Chain;
SDNode *Copy = *N->use_begin();
if (Copy->getOpcode() == ISD::CopyToReg) {
if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
return false;
TCChain = Copy->getOperand(0);
} else if (Copy->getOpcode() != ISD::FP_EXTEND)
return false;
bool HasRet = false;
for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
UI != UE; ++UI) {
if (UI->getOpcode() != X86ISD::RET_FLAG)
return false;
if (UI->getNumOperands() > 4)
return false;
if (UI->getNumOperands() == 4 &&
UI->getOperand(UI->getNumOperands()-1).getValueType() != MVT::Glue)
return false;
HasRet = true;
}
if (!HasRet)
return false;
Chain = TCChain;
return true;
}
EVT
X86TargetLowering::getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT,
ISD::NodeType ExtendKind) const {
MVT ReturnMVT;
if (Subtarget->is64Bit() && VT == MVT::i1 && ExtendKind == ISD::ZERO_EXTEND)
ReturnMVT = MVT::i8;
else
ReturnMVT = MVT::i32;
EVT MinVT = getRegisterType(Context, ReturnMVT);
return VT.bitsLT(MinVT) ? MinVT : VT;
}
SDValue
X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
SmallVector<CCValAssign, 16> RVLocs;
bool Is64Bit = Subtarget->is64Bit();
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
*DAG.getContext());
CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
CCValAssign &VA = RVLocs[i];
EVT CopyVT = VA.getValVT();
if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
report_fatal_error("SSE register return with SSE disabled");
}
if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
isScalarFPTypeInSSEReg(VA.getValVT()))
CopyVT = MVT::f80;
Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
CopyVT, InFlag).getValue(1);
SDValue Val = Chain.getValue(0);
if (CopyVT != VA.getValVT())
Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
DAG.getIntPtrConstant(1));
InFlag = Chain.getValue(2);
InVals.push_back(Val);
}
return Chain;
}
enum StructReturnType {
NotStructReturn,
RegStructReturn,
StackStructReturn
};
static StructReturnType
callIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs) {
if (Outs.empty())
return NotStructReturn;
const ISD::ArgFlagsTy &Flags = Outs[0].Flags;
if (!Flags.isSRet())
return NotStructReturn;
if (Flags.isInReg())
return RegStructReturn;
return StackStructReturn;
}
static StructReturnType
argsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins) {
if (Ins.empty())
return NotStructReturn;
const ISD::ArgFlagsTy &Flags = Ins[0].Flags;
if (!Flags.isSRet())
return NotStructReturn;
if (Flags.isInReg())
return RegStructReturn;
return StackStructReturn;
}
static SDValue
CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
SDLoc dl) {
SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
false, true,
MachinePointerInfo(), MachinePointerInfo());
}
static bool IsTailCallConvention(CallingConv::ID CC) {
return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
CC == CallingConv::HiPE);
}
static bool IsCCallConvention(CallingConv::ID CC) {
return (CC == CallingConv::C || CC == CallingConv::X86_64_Win64 ||
CC == CallingConv::X86_64_SysV);
}
bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
if (!CI->isTailCall() || getTargetMachine().Options.DisableTailCalls)
return false;
CallSite CS(CI);
CallingConv::ID CalleeCC = CS.getCallingConv();
if (!IsTailCallConvention(CalleeCC) && !IsCCallConvention(CalleeCC))
return false;
return true;
}
static bool FuncIsMadeTailCallSafe(CallingConv::ID CC,
bool GuaranteedTailCallOpt) {
return GuaranteedTailCallOpt && IsTailCallConvention(CC);
}
SDValue
X86TargetLowering::LowerMemArgument(SDValue Chain,
CallingConv::ID CallConv,
const SmallVectorImpl<ISD::InputArg> &Ins,
SDLoc dl, SelectionDAG &DAG,
const CCValAssign &VA,
MachineFrameInfo *MFI,
unsigned i) const {
ISD::ArgFlagsTy Flags = Ins[i].Flags;
bool AlwaysUseMutable = FuncIsMadeTailCallSafe(
CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
EVT ValVT;
if (VA.getLocInfo() == CCValAssign::Indirect)
ValVT = VA.getLocVT();
else
ValVT = VA.getValVT();
if (Flags.isByVal()) {
unsigned Bytes = Flags.getByValSize();
if (Bytes == 0) Bytes = 1; int FI = MFI->CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable);
return DAG.getFrameIndex(FI, getPointerTy());
} else {
int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8,
VA.getLocMemOffset(), isImmutable);
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
return DAG.getLoad(ValVT, dl, Chain, FIN,
MachinePointerInfo::getFixedStack(FI),
false, false, false, 0);
}
}
static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
const X86Subtarget *Subtarget) {
assert(Subtarget->is64Bit());
if (Subtarget->isCallingConvWin64(CallConv)) {
static const MCPhysReg GPR64ArgRegsWin64[] = {
X86::RCX, X86::RDX, X86::R8, X86::R9
};
return makeArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
}
static const MCPhysReg GPR64ArgRegs64Bit[] = {
X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
};
return makeArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
}
static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
CallingConv::ID CallConv,
const X86Subtarget *Subtarget) {
assert(Subtarget->is64Bit());
if (Subtarget->isCallingConvWin64(CallConv)) {
return None;
}
const Function *Fn = MF.getFunction();
bool NoImplicitFloatOps = Fn->hasFnAttribute(Attribute::NoImplicitFloat);
assert(!(MF.getTarget().Options.UseSoftFloat && NoImplicitFloatOps) &&
"SSE register cannot be used when SSE is disabled!");
if (MF.getTarget().Options.UseSoftFloat || NoImplicitFloatOps ||
!Subtarget->hasSSE1())
return None;
static const MCPhysReg XMMArgRegs64Bit[] = {
X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
};
return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
}
SDValue
X86TargetLowering::LowerFormalArguments(SDValue Chain,
CallingConv::ID CallConv,
bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
SDLoc dl,
SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals)
const {
MachineFunction &MF = DAG.getMachineFunction();
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
const Function* Fn = MF.getFunction();
if (Fn->hasExternalLinkage() &&
Subtarget->isTargetCygMing() &&
Fn->getName() == "main")
FuncInfo->setForceFramePointer(true);
MachineFrameInfo *MFI = MF.getFrameInfo();
bool Is64Bit = Subtarget->is64Bit();
bool IsWin64 = Subtarget->isCallingConvWin64(CallConv);
assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
"Var args not supported with calling convention fastcc, ghc or hipe");
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
if (IsWin64)
CCInfo.AllocateStack(32, 8);
CCInfo.AnalyzeFormalArguments(Ins, CC_X86);
unsigned LastVal = ~0U;
SDValue ArgValue;
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
assert(VA.getValNo() != LastVal &&
"Don't support value assigned to multiple locs yet");
(void)LastVal;
LastVal = VA.getValNo();
if (VA.isRegLoc()) {
EVT RegVT = VA.getLocVT();
const TargetRegisterClass *RC;
if (RegVT == MVT::i32)
RC = &X86::GR32RegClass;
else if (Is64Bit && RegVT == MVT::i64)
RC = &X86::GR64RegClass;
else if (RegVT == MVT::f32)
RC = &X86::FR32RegClass;
else if (RegVT == MVT::f64)
RC = &X86::FR64RegClass;
else if (RegVT.is512BitVector())
RC = &X86::VR512RegClass;
else if (RegVT.is256BitVector())
RC = &X86::VR256RegClass;
else if (RegVT.is128BitVector())
RC = &X86::VR128RegClass;
else if (RegVT == MVT::x86mmx)
RC = &X86::VR64RegClass;
else if (RegVT == MVT::i1)
RC = &X86::VK1RegClass;
else if (RegVT == MVT::v8i1)
RC = &X86::VK8RegClass;
else if (RegVT == MVT::v16i1)
RC = &X86::VK16RegClass;
else if (RegVT == MVT::v32i1)
RC = &X86::VK32RegClass;
else if (RegVT == MVT::v64i1)
RC = &X86::VK64RegClass;
else
llvm_unreachable("Unknown argument type!");
unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
if (VA.getLocInfo() == CCValAssign::SExt)
ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
DAG.getValueType(VA.getValVT()));
else if (VA.getLocInfo() == CCValAssign::ZExt)
ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
DAG.getValueType(VA.getValVT()));
else if (VA.getLocInfo() == CCValAssign::BCvt)
ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
if (VA.isExtInLoc()) {
if (RegVT.isVector())
ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
else
ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
}
} else {
assert(VA.isMemLoc());
ArgValue = LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, i);
}
if (VA.getLocInfo() == CCValAssign::Indirect)
ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue,
MachinePointerInfo(), false, false, false, 0);
InVals.push_back(ArgValue);
}
if (Subtarget->is64Bit() || Subtarget->isTargetKnownWindowsMSVC()) {
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
if (Ins[i].Flags.isSRet()) {
unsigned Reg = FuncInfo->getSRetReturnReg();
if (!Reg) {
MVT PtrTy = getPointerTy();
Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
FuncInfo->setSRetReturnReg(Reg);
}
SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[i]);
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
break;
}
}
}
unsigned StackSize = CCInfo.getNextStackOffset();
if (FuncIsMadeTailCallSafe(CallConv,
MF.getTarget().Options.GuaranteedTailCallOpt))
StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
if (MFI->hasVAStart() &&
(Is64Bit || (CallConv != CallingConv::X86_FastCall &&
CallConv != CallingConv::X86_ThisCall))) {
FuncInfo->setVarArgsFrameIndex(
MFI->CreateFixedObject(1, StackSize, true));
}
assert(!(MF.getTarget().Options.UseSoftFloat &&
Fn->hasFnAttribute(Attribute::NoImplicitFloat)) &&
"SSE register cannot be used when SSE is disabled!");
if (Is64Bit && isVarArg && MFI->hasVAStart()) {
ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
ArrayRef<MCPhysReg> ArgXMMs = get64BitArgumentXMMs(MF, CallConv, Subtarget);
unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
assert(!(NumXMMRegs && !Subtarget->hasSSE1()) &&
"SSE register cannot be used when SSE is disabled!");
SmallVector<SDValue, 6> LiveGPRs;
SmallVector<SDValue, 8> LiveXMMRegs;
SDValue ALVal;
for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
unsigned GPR = MF.addLiveIn(Reg, &X86::GR64RegClass);
LiveGPRs.push_back(
DAG.getCopyFromReg(Chain, dl, GPR, MVT::i64));
}
if (!ArgXMMs.empty()) {
unsigned AL = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
ALVal = DAG.getCopyFromReg(Chain, dl, AL, MVT::i8);
for (MCPhysReg Reg : ArgXMMs.slice(NumXMMRegs)) {
unsigned XMMReg = MF.addLiveIn(Reg, &X86::VR128RegClass);
LiveXMMRegs.push_back(
DAG.getCopyFromReg(Chain, dl, XMMReg, MVT::v4f32));
}
}
if (IsWin64) {
const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
FuncInfo->setRegSaveFrameIndex(
MFI->CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
if (NumIntRegs < 4)
FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
} else {
FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
FuncInfo->setRegSaveFrameIndex(MFI->CreateStackObject(
ArgGPRs.size() * 8 + ArgXMMs.size() * 16, 16, false));
}
SmallVector<SDValue, 8> MemOps;
SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
getPointerTy());
unsigned Offset = FuncInfo->getVarArgsGPOffset();
for (SDValue Val : LiveGPRs) {
SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN,
DAG.getIntPtrConstant(Offset));
SDValue Store =
DAG.getStore(Val.getValue(1), dl, Val, FIN,
MachinePointerInfo::getFixedStack(
FuncInfo->getRegSaveFrameIndex(), Offset),
false, false, 0);
MemOps.push_back(Store);
Offset += 8;
}
if (!ArgXMMs.empty() && NumXMMRegs != ArgXMMs.size()) {
SmallVector<SDValue, 12> SaveXMMOps;
SaveXMMOps.push_back(Chain);
SaveXMMOps.push_back(ALVal);
SaveXMMOps.push_back(DAG.getIntPtrConstant(
FuncInfo->getRegSaveFrameIndex()));
SaveXMMOps.push_back(DAG.getIntPtrConstant(
FuncInfo->getVarArgsFPOffset()));
SaveXMMOps.insert(SaveXMMOps.end(), LiveXMMRegs.begin(),
LiveXMMRegs.end());
MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl,
MVT::Other, SaveXMMOps));
}
if (!MemOps.empty())
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
}
if (isVarArg && MFI->hasMustTailInVarArgFunc()) {
MVT VecVT = MVT::Other;
if (Subtarget->hasAVX512() &&
(Is64Bit || (CallConv == CallingConv::X86_VectorCall ||
CallConv == CallingConv::Intel_OCL_BI)))
VecVT = MVT::v16f32;
else if (Subtarget->hasAVX())
VecVT = MVT::v8f32;
else if (Subtarget->hasSSE2())
VecVT = MVT::v4f32;
SmallVector<MVT, 2> RegParmTypes;
MVT IntVT = Is64Bit ? MVT::i64 : MVT::i32;
RegParmTypes.push_back(IntVT);
if (VecVT != MVT::Other)
RegParmTypes.push_back(VecVT);
SmallVectorImpl<ForwardedRegister> &Forwards =
FuncInfo->getForwardedMustTailRegParms();
CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
if (Is64Bit && !CCInfo.isAllocated(X86::AL)) {
unsigned ALVReg = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
}
for (ForwardedRegister &F : Forwards) {
SDValue RegVal = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
F.VReg = MF.getRegInfo().createVirtualRegister(getRegClassFor(F.VT));
Chain = DAG.getCopyToReg(Chain, dl, F.VReg, RegVal);
}
}
if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
MF.getTarget().Options.GuaranteedTailCallOpt)) {
FuncInfo->setBytesToPopOnReturn(StackSize); } else {
FuncInfo->setBytesToPopOnReturn(0); if (!Is64Bit && !IsTailCallConvention(CallConv) &&
!Subtarget->getTargetTriple().isOSMSVCRT() &&
argsAreStructReturn(Ins) == StackStructReturn)
FuncInfo->setBytesToPopOnReturn(4);
}
if (!Is64Bit) {
FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
if (CallConv == CallingConv::X86_FastCall ||
CallConv == CallingConv::X86_ThisCall)
FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
}
FuncInfo->setArgumentStackSize(StackSize);
return Chain;
}
SDValue
X86TargetLowering::LowerMemOpCallTo(SDValue Chain,
SDValue StackPtr, SDValue Arg,
SDLoc dl, SelectionDAG &DAG,
const CCValAssign &VA,
ISD::ArgFlagsTy Flags) const {
unsigned LocMemOffset = VA.getLocMemOffset();
SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
if (Flags.isByVal())
return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
return DAG.getStore(Chain, dl, Arg, PtrOff,
MachinePointerInfo::getStack(LocMemOffset),
false, false, 0);
}
SDValue
X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG,
SDValue &OutRetAddr, SDValue Chain,
bool IsTailCall, bool Is64Bit,
int FPDiff, SDLoc dl) const {
EVT VT = getPointerTy();
OutRetAddr = getReturnAddressFrameIndex(DAG);
OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo(),
false, false, false, 0);
return SDValue(OutRetAddr.getNode(), 1);
}
static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
SDValue Chain, SDValue RetAddrFrIdx,
EVT PtrVT, unsigned SlotSize,
int FPDiff, SDLoc dl) {
if (!FPDiff) return Chain;
int NewReturnAddrFI =
MF.getFrameInfo()->CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
false);
SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
MachinePointerInfo::getFixedStack(NewReturnAddrFI),
false, false, 0);
return Chain;
}
SDValue
X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
SelectionDAG &DAG = CLI.DAG;
SDLoc &dl = CLI.DL;
SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
SDValue Chain = CLI.Chain;
SDValue Callee = CLI.Callee;
CallingConv::ID CallConv = CLI.CallConv;
bool &isTailCall = CLI.IsTailCall;
bool isVarArg = CLI.IsVarArg;
MachineFunction &MF = DAG.getMachineFunction();
bool Is64Bit = Subtarget->is64Bit();
bool IsWin64 = Subtarget->isCallingConvWin64(CallConv);
StructReturnType SR = callIsStructReturn(Outs);
bool IsSibcall = false;
X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
if (MF.getTarget().Options.DisableTailCalls)
isTailCall = false;
bool IsMustTail = CLI.CS && CLI.CS->isMustTailCall();
if (IsMustTail) {
isTailCall = true;
} else if (isTailCall) {
isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
isVarArg, SR != NotStructReturn,
MF.getFunction()->hasStructRetAttr(), CLI.RetTy,
Outs, OutVals, Ins, DAG);
if (!MF.getTarget().Options.GuaranteedTailCallOpt && isTailCall)
IsSibcall = true;
if (isTailCall)
++NumTailCalls;
}
assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
"Var args not supported with calling convention fastcc, ghc or hipe");
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
if (IsWin64)
CCInfo.AllocateStack(32, 8);
CCInfo.AnalyzeCallOperands(Outs, CC_X86);
unsigned NumBytes = CCInfo.getNextStackOffset();
if (IsSibcall)
NumBytes = 0;
else if (MF.getTarget().Options.GuaranteedTailCallOpt &&
IsTailCallConvention(CallConv))
NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
int FPDiff = 0;
if (isTailCall && !IsSibcall && !IsMustTail) {
unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
FPDiff = NumBytesCallerPushed - NumBytes;
if (FPDiff < X86Info->getTCReturnAddrDelta())
X86Info->setTCReturnAddrDelta(FPDiff);
}
unsigned NumBytesToPush = NumBytes;
unsigned NumBytesToPop = NumBytes;
if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
NumBytesToPush = 0;
if (!ArgLocs.back().isMemLoc())
report_fatal_error("cannot use inalloca attribute on a register "
"parameter");
if (ArgLocs.back().getLocMemOffset() != 0)
report_fatal_error("any parameter with the inalloca attribute must be "
"the only memory argument");
}
if (!IsSibcall)
Chain = DAG.getCALLSEQ_START(
Chain, DAG.getIntPtrConstant(NumBytesToPush, true), dl);
SDValue RetAddrFrIdx;
if (isTailCall && FPDiff)
Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
Is64Bit, FPDiff, dl);
SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
SmallVector<SDValue, 8> MemOpChains;
SDValue StackPtr;
const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
ISD::ArgFlagsTy Flags = Outs[i].Flags;
if (Flags.isInAlloca())
continue;
CCValAssign &VA = ArgLocs[i];
EVT RegVT = VA.getLocVT();
SDValue Arg = OutVals[i];
bool isByVal = Flags.isByVal();
switch (VA.getLocInfo()) {
default: llvm_unreachable("Unknown loc info!");
case CCValAssign::Full: break;
case CCValAssign::SExt:
Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
break;
case CCValAssign::ZExt:
Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
break;
case CCValAssign::AExt:
if (RegVT.is128BitVector()) {
Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
} else
Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
break;
case CCValAssign::BCvt:
Arg = DAG.getNode(ISD::BITCAST, dl, RegVT, Arg);
break;
case CCValAssign::Indirect: {
SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
Chain = DAG.getStore(Chain, dl, Arg, SpillSlot,
MachinePointerInfo::getFixedStack(FI),
false, false, 0);
Arg = SpillSlot;
break;
}
}
if (VA.isRegLoc()) {
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
if (isVarArg && IsWin64) {
unsigned ShadowReg = 0;
switch (VA.getLocReg()) {
case X86::XMM0: ShadowReg = X86::RCX; break;
case X86::XMM1: ShadowReg = X86::RDX; break;
case X86::XMM2: ShadowReg = X86::R8; break;
case X86::XMM3: ShadowReg = X86::R9; break;
}
if (ShadowReg)
RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
}
} else if (!IsSibcall && (!isTailCall || isByVal)) {
assert(VA.isMemLoc());
if (!StackPtr.getNode())
StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
getPointerTy());
MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
dl, DAG, VA, Flags));
}
}
if (!MemOpChains.empty())
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
if (Subtarget->isPICStyleGOT()) {
if (!isTailCall) {
RegsToPass.push_back(std::make_pair(unsigned(X86::EBX),
DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), getPointerTy())));
} else {
GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
if (G && !G->getGlobal()->hasHiddenVisibility() &&
!G->getGlobal()->hasProtectedVisibility())
Callee = LowerGlobalAddress(Callee, DAG);
else if (isa<ExternalSymbolSDNode>(Callee))
Callee = LowerExternalSymbol(Callee, DAG);
}
}
if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail) {
static const MCPhysReg XMMArgRegs[] = {
X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
};
unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
assert((Subtarget->hasSSE1() || !NumXMMRegs)
&& "SSE registers cannot be used when SSE is disabled");
RegsToPass.push_back(std::make_pair(unsigned(X86::AL),
DAG.getConstant(NumXMMRegs, MVT::i8)));
}
if (isVarArg && IsMustTail) {
const auto &Forwards = X86Info->getForwardedMustTailRegParms();
for (const auto &F : Forwards) {
SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
RegsToPass.push_back(std::make_pair(unsigned(F.PReg), Val));
}
}
if (!IsSibcall && isTailCall) {
SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
SmallVector<SDValue, 8> MemOpChains2;
SDValue FIN;
int FI = 0;
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
if (VA.isRegLoc())
continue;
assert(VA.isMemLoc());
SDValue Arg = OutVals[i];
ISD::ArgFlagsTy Flags = Outs[i].Flags;
if (Flags.isInAlloca())
continue;
int32_t Offset = VA.getLocMemOffset()+FPDiff;
uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
FIN = DAG.getFrameIndex(FI, getPointerTy());
if (Flags.isByVal()) {
SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset());
if (!StackPtr.getNode())
StackPtr = DAG.getCopyFromReg(Chain, dl,
RegInfo->getStackRegister(),
getPointerTy());
Source = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, Source);
MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
ArgChain,
Flags, DAG, dl));
} else {
MemOpChains2.push_back(
DAG.getStore(ArgChain, dl, Arg, FIN,
MachinePointerInfo::getFixedStack(FI),
false, false, 0));
}
}
if (!MemOpChains2.empty())
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
getPointerTy(), RegInfo->getSlotSize(),
FPDiff, dl);
}
SDValue InFlag;
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
RegsToPass[i].second, InFlag);
InFlag = Chain.getValue(1);
}
if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
} else if (Callee->getOpcode() == ISD::GlobalAddress) {
GlobalAddressSDNode* G = cast<GlobalAddressSDNode>(Callee);
const GlobalValue *GV = G->getGlobal();
if (!GV->hasDLLImportStorageClass()) {
unsigned char OpFlags = 0;
bool ExtraLoad = false;
unsigned WrapperKind = ISD::DELETED_NODE;
if (Subtarget->isTargetELF() &&
DAG.getTarget().getRelocationModel() == Reloc::PIC_ &&
GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) {
OpFlags = X86II::MO_PLT;
} else if (Subtarget->isPICStyleStubAny() &&
(GV->isDeclaration() || GV->isWeakForLinker()) &&
(!Subtarget->getTargetTriple().isMacOSX() ||
Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
OpFlags = X86II::MO_DARWIN_STUB;
} else if (Subtarget->isPICStyleRIPRel() && isa<Function>(GV) &&
cast<Function>(GV)->hasFnAttribute(Attribute::NonLazyBind)) {
OpFlags = X86II::MO_GOTPCREL;
WrapperKind = X86ISD::WrapperRIP;
ExtraLoad = true;
}
Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(),
G->getOffset(), OpFlags);
if (WrapperKind != ISD::DELETED_NODE)
Callee = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Callee);
if (ExtraLoad)
Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Callee,
MachinePointerInfo::getGOT(),
false, false, false, 0);
}
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
unsigned char OpFlags = 0;
if (Subtarget->isTargetELF() &&
DAG.getTarget().getRelocationModel() == Reloc::PIC_) {
OpFlags = X86II::MO_PLT;
} else if (Subtarget->isPICStyleStubAny() &&
(!Subtarget->getTargetTriple().isMacOSX() ||
Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
OpFlags = X86II::MO_DARWIN_STUB;
}
Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(),
OpFlags);
} else if (Subtarget->isTarget64BitILP32() &&
Callee->getValueType(0) == MVT::i32) {
Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
}
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
SmallVector<SDValue, 8> Ops;
if (!IsSibcall && isTailCall) {
Chain = DAG.getCALLSEQ_END(Chain,
DAG.getIntPtrConstant(NumBytesToPop, true),
DAG.getIntPtrConstant(0, true), InFlag, dl);
InFlag = Chain.getValue(1);
}
Ops.push_back(Chain);
Ops.push_back(Callee);
if (isTailCall)
Ops.push_back(DAG.getConstant(FPDiff, MVT::i32));
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
Ops.push_back(DAG.getRegister(RegsToPass[i].first,
RegsToPass[i].second.getValueType()));
const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
if (InFlag.getNode())
Ops.push_back(InFlag);
if (isTailCall) {
MF.getFrameInfo()->setHasTailCall();
return DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops);
}
Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
InFlag = Chain.getValue(1);
unsigned NumBytesForCalleeToPop;
if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
DAG.getTarget().Options.GuaranteedTailCallOpt))
NumBytesForCalleeToPop = NumBytes; else if (!Is64Bit && !IsTailCallConvention(CallConv) &&
!Subtarget->getTargetTriple().isOSMSVCRT() &&
SR == StackStructReturn)
NumBytesForCalleeToPop = 4;
else
NumBytesForCalleeToPop = 0;
if (!IsSibcall) {
Chain = DAG.getCALLSEQ_END(Chain,
DAG.getIntPtrConstant(NumBytesToPop, true),
DAG.getIntPtrConstant(NumBytesForCalleeToPop,
true),
InFlag, dl);
InFlag = Chain.getValue(1);
}
return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
Ins, dl, DAG, InVals);
}
unsigned
X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
SelectionDAG& DAG) const {
const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
unsigned StackAlignment = TFI.getStackAlignment();
uint64_t AlignMask = StackAlignment - 1;
int64_t Offset = StackSize;
unsigned SlotSize = RegInfo->getSlotSize();
if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) {
Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask));
} else {
Offset = ((~AlignMask) & Offset) + StackAlignment +
(StackAlignment-SlotSize);
}
return Offset;
}
static
bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
MachineFrameInfo *MFI, const MachineRegisterInfo *MRI,
const X86InstrInfo *TII) {
unsigned Bytes = Arg.getValueType().getSizeInBits() / 8;
int FI = INT_MAX;
if (Arg.getOpcode() == ISD::CopyFromReg) {
unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
if (!TargetRegisterInfo::isVirtualRegister(VR))
return false;
MachineInstr *Def = MRI->getVRegDef(VR);
if (!Def)
return false;
if (!Flags.isByVal()) {
if (!TII->isLoadFromStackSlot(Def, FI))
return false;
} else {
unsigned Opcode = Def->getOpcode();
if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
Opcode == X86::LEA64_32r) &&
Def->getOperand(1).isFI()) {
FI = Def->getOperand(1).getIndex();
Bytes = Flags.getByValSize();
} else
return false;
}
} else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
if (Flags.isByVal())
return false;
SDValue Ptr = Ld->getBasePtr();
FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
if (!FINode)
return false;
FI = FINode->getIndex();
} else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
FI = FINode->getIndex();
Bytes = Flags.getByValSize();
} else
return false;
assert(FI != INT_MAX);
if (!MFI->isFixedObjectIndex(FI))
return false;
return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI);
}
bool
X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
CallingConv::ID CalleeCC,
bool isVarArg,
bool isCalleeStructRet,
bool isCallerStructRet,
Type *RetTy,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
SelectionDAG &DAG) const {
if (!IsTailCallConvention(CalleeCC) && !IsCCallConvention(CalleeCC))
return false;
const MachineFunction &MF = DAG.getMachineFunction();
const Function *CallerF = MF.getFunction();
if (CallerF->getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty())
return false;
CallingConv::ID CallerCC = CallerF->getCallingConv();
bool CCMatch = CallerCC == CalleeCC;
bool IsCalleeWin64 = Subtarget->isCallingConvWin64(CalleeCC);
bool IsCallerWin64 = Subtarget->isCallingConvWin64(CallerCC);
if (DAG.getTarget().Options.GuaranteedTailCallOpt) {
if (IsTailCallConvention(CalleeCC) && CCMatch)
return true;
return false;
}
const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
if (RegInfo->needsStackRealignment(MF))
return false;
if (isCalleeStructRet || isCallerStructRet)
return false;
if (!CCMatch && (CallerCC == CallingConv::X86_StdCall ||
CallerCC == CallingConv::X86_ThisCall))
return false;
if (isVarArg && !Outs.empty()) {
if (IsCalleeWin64 || IsCallerWin64)
return false;
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), ArgLocs,
*DAG.getContext());
CCInfo.AnalyzeCallOperands(Outs, CC_X86);
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
if (!ArgLocs[i].isRegLoc())
return false;
}
bool Unused = false;
for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
if (!Ins[i].Used) {
Unused = true;
break;
}
}
if (Unused) {
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CalleeCC, false, DAG.getMachineFunction(), RVLocs,
*DAG.getContext());
CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
CCValAssign &VA = RVLocs[i];
if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
return false;
}
}
if (!CCMatch) {
SmallVector<CCValAssign, 16> RVLocs1;
CCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(), RVLocs1,
*DAG.getContext());
CCInfo1.AnalyzeCallResult(Ins, RetCC_X86);
SmallVector<CCValAssign, 16> RVLocs2;
CCState CCInfo2(CallerCC, false, DAG.getMachineFunction(), RVLocs2,
*DAG.getContext());
CCInfo2.AnalyzeCallResult(Ins, RetCC_X86);
if (RVLocs1.size() != RVLocs2.size())
return false;
for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
return false;
if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
return false;
if (RVLocs1[i].isRegLoc()) {
if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
return false;
} else {
if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())
return false;
}
}
}
if (!Outs.empty()) {
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), ArgLocs,
*DAG.getContext());
if (IsCalleeWin64)
CCInfo.AllocateStack(32, 8);
CCInfo.AnalyzeCallOperands(Outs, CC_X86);
if (CCInfo.getNextStackOffset()) {
MachineFunction &MF = DAG.getMachineFunction();
if (MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn())
return false;
MachineFrameInfo *MFI = MF.getFrameInfo();
const MachineRegisterInfo *MRI = &MF.getRegInfo();
const X86InstrInfo *TII = Subtarget->getInstrInfo();
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
SDValue Arg = OutVals[i];
ISD::ArgFlagsTy Flags = Outs[i].Flags;
if (VA.getLocInfo() == CCValAssign::Indirect)
return false;
if (!VA.isRegLoc()) {
if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
MFI, MRI, TII))
return false;
}
}
}
if (!Subtarget->is64Bit() &&
((!isa<GlobalAddressSDNode>(Callee) &&
!isa<ExternalSymbolSDNode>(Callee)) ||
DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
unsigned NumInRegs = 0;
unsigned MaxInRegs =
(DAG.getTarget().getRelocationModel() == Reloc::PIC_) ? 2 : 3;
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
if (!VA.isRegLoc())
continue;
unsigned Reg = VA.getLocReg();
switch (Reg) {
default: break;
case X86::EAX: case X86::EDX: case X86::ECX:
if (++NumInRegs == MaxInRegs)
return false;
break;
}
}
}
}
return true;
}
FastISel *
X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo) const {
return X86::createFastISel(funcInfo, libInfo);
}
static bool MayFoldLoad(SDValue Op) {
return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode());
}
static bool MayFoldIntoStore(SDValue Op) {
return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
}
static bool isTargetShuffle(unsigned Opcode) {
switch(Opcode) {
default: return false;
case X86ISD::BLENDI:
case X86ISD::PSHUFB:
case X86ISD::PSHUFD:
case X86ISD::PSHUFHW:
case X86ISD::PSHUFLW:
case X86ISD::SHUFP:
case X86ISD::PALIGNR:
case X86ISD::MOVLHPS:
case X86ISD::MOVLHPD:
case X86ISD::MOVHLPS:
case X86ISD::MOVLPS:
case X86ISD::MOVLPD:
case X86ISD::MOVSHDUP:
case X86ISD::MOVSLDUP:
case X86ISD::MOVDDUP:
case X86ISD::MOVSS:
case X86ISD::MOVSD:
case X86ISD::UNPCKL:
case X86ISD::UNPCKH:
case X86ISD::VPERMILPI:
case X86ISD::VPERM2X128:
case X86ISD::VPERMI:
return true;
}
}
static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT,
SDValue V1, SelectionDAG &DAG) {
switch(Opc) {
default: llvm_unreachable("Unknown x86 shuffle node");
case X86ISD::MOVSHDUP:
case X86ISD::MOVSLDUP:
case X86ISD::MOVDDUP:
return DAG.getNode(Opc, dl, VT, V1);
}
}
static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT,
SDValue V1, unsigned TargetMask,
SelectionDAG &DAG) {
switch(Opc) {
default: llvm_unreachable("Unknown x86 shuffle node");
case X86ISD::PSHUFD:
case X86ISD::PSHUFHW:
case X86ISD::PSHUFLW:
case X86ISD::VPERMILPI:
case X86ISD::VPERMI:
return DAG.getNode(Opc, dl, VT, V1, DAG.getConstant(TargetMask, MVT::i8));
}
}
static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT,
SDValue V1, SDValue V2, unsigned TargetMask,
SelectionDAG &DAG) {
switch(Opc) {
default: llvm_unreachable("Unknown x86 shuffle node");
case X86ISD::PALIGNR:
case X86ISD::VALIGN:
case X86ISD::SHUFP:
case X86ISD::VPERM2X128:
return DAG.getNode(Opc, dl, VT, V1, V2,
DAG.getConstant(TargetMask, MVT::i8));
}
}
static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT,
SDValue V1, SDValue V2, SelectionDAG &DAG) {
switch(Opc) {
default: llvm_unreachable("Unknown x86 shuffle node");
case X86ISD::MOVLHPS:
case X86ISD::MOVLHPD:
case X86ISD::MOVHLPS:
case X86ISD::MOVLPS:
case X86ISD::MOVLPD:
case X86ISD::MOVSS:
case X86ISD::MOVSD:
case X86ISD::UNPCKL:
case X86ISD::UNPCKH:
return DAG.getNode(Opc, dl, VT, V1, V2);
}
}
SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
int ReturnAddrIndex = FuncInfo->getRAIndex();
if (ReturnAddrIndex == 0) {
unsigned SlotSize = RegInfo->getSlotSize();
ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize,
-(int64_t)SlotSize,
false);
FuncInfo->setRAIndex(ReturnAddrIndex);
}
return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy());
}
bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
bool hasSymbolicDisplacement) {
if (!isInt<32>(Offset))
return false;
if (!hasSymbolicDisplacement)
return true;
if (M != CodeModel::Small && M != CodeModel::Kernel)
return false;
if (M == CodeModel::Small && Offset < 16*1024*1024)
return true;
if (M == CodeModel::Kernel && Offset >= 0)
return true;
return false;
}
bool X86::isCalleePop(CallingConv::ID CallingConv,
bool is64Bit, bool IsVarArg, bool TailCallOpt) {
switch (CallingConv) {
default:
return false;
case CallingConv::X86_StdCall:
case CallingConv::X86_FastCall:
case CallingConv::X86_ThisCall:
return !is64Bit;
case CallingConv::Fast:
case CallingConv::GHC:
case CallingConv::HiPE:
if (IsVarArg)
return false;
return TailCallOpt;
}
}
static bool isX86CCUnsigned(unsigned X86CC) {
switch (X86CC) {
default: llvm_unreachable("Invalid integer condition!");
case X86::COND_E: return true;
case X86::COND_G: return false;
case X86::COND_GE: return false;
case X86::COND_L: return false;
case X86::COND_LE: return false;
case X86::COND_NE: return true;
case X86::COND_B: return true;
case X86::COND_A: return true;
case X86::COND_BE: return true;
case X86::COND_AE: return true;
}
llvm_unreachable("covered switch fell through?!");
}
static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, bool isFP,
SDValue &LHS, SDValue &RHS, SelectionDAG &DAG) {
if (!isFP) {
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) {
RHS = DAG.getConstant(0, RHS.getValueType());
return X86::COND_NS;
}
if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) {
return X86::COND_S;
}
if (SetCCOpcode == ISD::SETLT && RHSC->getZExtValue() == 1) {
RHS = DAG.getConstant(0, RHS.getValueType());
return X86::COND_LE;
}
}
switch (SetCCOpcode) {
default: llvm_unreachable("Invalid integer condition!");
case ISD::SETEQ: return X86::COND_E;
case ISD::SETGT: return X86::COND_G;
case ISD::SETGE: return X86::COND_GE;
case ISD::SETLT: return X86::COND_L;
case ISD::SETLE: return X86::COND_LE;
case ISD::SETNE: return X86::COND_NE;
case ISD::SETULT: return X86::COND_B;
case ISD::SETUGT: return X86::COND_A;
case ISD::SETULE: return X86::COND_BE;
case ISD::SETUGE: return X86::COND_AE;
}
}
if (ISD::isNON_EXTLoad(LHS.getNode()) &&
!ISD::isNON_EXTLoad(RHS.getNode())) {
SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
std::swap(LHS, RHS);
}
switch (SetCCOpcode) {
default: break;
case ISD::SETOLT:
case ISD::SETOLE:
case ISD::SETUGT:
case ISD::SETUGE:
std::swap(LHS, RHS);
break;
}
switch (SetCCOpcode) {
default: llvm_unreachable("Condcode should be pre-legalized away");
case ISD::SETUEQ:
case ISD::SETEQ: return X86::COND_E;
case ISD::SETOLT: case ISD::SETOGT:
case ISD::SETGT: return X86::COND_A;
case ISD::SETOLE: case ISD::SETOGE:
case ISD::SETGE: return X86::COND_AE;
case ISD::SETUGT: case ISD::SETULT:
case ISD::SETLT: return X86::COND_B;
case ISD::SETUGE: case ISD::SETULE:
case ISD::SETLE: return X86::COND_BE;
case ISD::SETONE:
case ISD::SETNE: return X86::COND_NE;
case ISD::SETUO: return X86::COND_P;
case ISD::SETO: return X86::COND_NP;
case ISD::SETOEQ:
case ISD::SETUNE: return X86::COND_INVALID;
}
}
static bool hasFPCMov(unsigned X86CC) {
switch (X86CC) {
default:
return false;
case X86::COND_B:
case X86::COND_BE:
case X86::COND_E:
case X86::COND_P:
case X86::COND_A:
case X86::COND_AE:
case X86::COND_NE:
case X86::COND_NP:
return true;
}
}
bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
for (unsigned i = 0, e = LegalFPImmediates.size(); i != e; ++i) {
if (Imm.bitwiseIsEqual(LegalFPImmediates[i]))
return true;
}
return false;
}
bool X86TargetLowering::shouldReduceLoadWidth(SDNode *Load,
ISD::LoadExtType ExtTy,
EVT NewVT) const {
SDValue BasePtr = cast<LoadSDNode>(Load)->getBasePtr();
if (BasePtr.getOpcode() == X86ISD::WrapperRIP)
if (const auto *GA = dyn_cast<GlobalAddressSDNode>(BasePtr.getOperand(0)))
return GA->getTargetFlags() != X86II::MO_GOTTPOFF;
return true;
}
bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
Type *Ty) const {
assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits();
if (BitSize == 0 || BitSize > 64)
return false;
return true;
}
bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT,
unsigned Index) const {
if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
return false;
return (Index == 0 || Index == ResVT.getVectorNumElements());
}
bool X86TargetLowering::isCheapToSpeculateCttz() const {
return Subtarget->hasBMI();
}
bool X86TargetLowering::isCheapToSpeculateCtlz() const {
return Subtarget->hasLZCNT();
}
static bool isUndefOrInRange(int Val, int Low, int Hi) {
return (Val < 0) || (Val >= Low && Val < Hi);
}
static bool isUndefOrEqual(int Val, int CmpVal) {
return (Val < 0 || Val == CmpVal);
}
static bool isSequentialOrUndefInRange(ArrayRef<int> Mask,
unsigned Pos, unsigned Size, int Low) {
for (unsigned i = Pos, e = Pos+Size; i != e; ++i, ++Low)
if (!isUndefOrEqual(Mask[i], Low))
return false;
return true;
}
static bool isPSHUFDMask(ArrayRef<int> Mask, MVT VT,
bool TestSecondOperand = false) {
if (VT != MVT::v4f32 && VT != MVT::v4i32 &&
VT != MVT::v2f64 && VT != MVT::v2i64)
return false;
unsigned NumElems = VT.getVectorNumElements();
unsigned Lo = TestSecondOperand ? NumElems : 0;
unsigned Hi = Lo + NumElems;
for (unsigned i = 0; i < NumElems; ++i)
if (!isUndefOrInRange(Mask[i], (int)Lo, (int)Hi))
return false;
return true;
}
static bool isPSHUFHWMask(ArrayRef<int> Mask, MVT VT, bool HasInt256) {
if (VT != MVT::v8i16 && (!HasInt256 || VT != MVT::v16i16))
return false;
if (!isSequentialOrUndefInRange(Mask, 0, 4, 0))
return false;
for (unsigned i = 4; i != 8; ++i)
if (!isUndefOrInRange(Mask[i], 4, 8))
return false;
if (VT == MVT::v16i16) {
if (!isSequentialOrUndefInRange(Mask, 8, 4, 8))
return false;
for (unsigned i = 12; i != 16; ++i)
if (!isUndefOrInRange(Mask[i], 12, 16))
return false;
}
return true;
}
static bool isPSHUFLWMask(ArrayRef<int> Mask, MVT VT, bool HasInt256) {
if (VT != MVT::v8i16 && (!HasInt256 || VT != MVT::v16i16))
return false;
if (!isSequentialOrUndefInRange(Mask, 4, 4, 4))
return false;
for (unsigned i = 0; i != 4; ++i)
if (!isUndefOrInRange(Mask[i], 0, 4))
return false;
if (VT == MVT::v16i16) {
if (!isSequentialOrUndefInRange(Mask, 12, 4, 12))
return false;
for (unsigned i = 8; i != 12; ++i)
if (!isUndefOrInRange(Mask[i], 8, 12))
return false;
}
return true;
}
static bool isAlignrMask(ArrayRef<int> Mask, MVT VT, bool InterLane) {
unsigned NumElts = VT.getVectorNumElements();
unsigned NumLanes = InterLane ? 1: VT.getSizeInBits()/128;
unsigned NumLaneElts = NumElts/NumLanes;
if (NumLaneElts == 2)
return false;
for (unsigned l = 0; l != NumElts; l+=NumLaneElts) {
unsigned i;
for (i = 0; i != NumLaneElts; ++i) {
if (Mask[i+l] >= 0)
break;
}
if (i == NumLaneElts)
continue;
int Start = Mask[i+l];
if (!isUndefOrInRange(Start, l, l+NumLaneElts) &&
!isUndefOrInRange(Start, l+NumElts, l+NumElts+NumLaneElts))
return false;
if (l != 0 && Mask[i] >= 0 && !isUndefOrEqual(Start, Mask[i]+l))
return false;
if (Start >= (int)NumElts)
Start -= NumElts - NumLaneElts;
if (Start <= (int)(i+l))
return false;
Start -= i;
for (++i; i != NumLaneElts; ++i) {
int Idx = Mask[i+l];
if (!isUndefOrInRange(Idx, l, l+NumLaneElts) &&
!isUndefOrInRange(Idx, l+NumElts, l+NumElts+NumLaneElts))
return false;
if (l != 0 && Mask[i] >= 0 && !isUndefOrEqual(Idx, Mask[i]+l))
return false;
if (Idx >= (int)NumElts)
Idx -= NumElts - NumLaneElts;
if (!isUndefOrEqual(Idx, Start+i))
return false;
}
}
return true;
}
static bool isPALIGNRMask(ArrayRef<int> Mask, MVT VT,
const X86Subtarget *Subtarget) {
if ((VT.is128BitVector() && !Subtarget->hasSSSE3()) ||
(VT.is256BitVector() && !Subtarget->hasInt256()) ||
VT.is512BitVector())
return false;
return isAlignrMask(Mask, VT, false);
}
static bool isVALIGNMask(ArrayRef<int> Mask, MVT VT,
const X86Subtarget *Subtarget) {
if (!VT.is512BitVector() || !Subtarget->hasAVX512())
return false;
return isAlignrMask(Mask, VT, true);
}
static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask,
unsigned NumElems) {
for (unsigned i = 0; i != NumElems; ++i) {
int idx = Mask[i];
if (idx < 0)
continue;
else if (idx < (int)NumElems)
Mask[i] = idx + NumElems;
else
Mask[i] = idx - NumElems;
}
}
static bool isSHUFPMask(ArrayRef<int> Mask, MVT VT, bool Commuted = false) {
unsigned NumElems = VT.getVectorNumElements();
unsigned NumLanes = VT.getSizeInBits()/128;
unsigned NumLaneElems = NumElems/NumLanes;
if (NumLaneElems != 2 && NumLaneElems != 4)
return false;
unsigned EltSize = VT.getVectorElementType().getSizeInBits();
bool symetricMaskRequired =
(VT.getSizeInBits() >= 256) && (EltSize == 32);
SmallVector<int, 4> MaskVal(NumLaneElems, -1);
unsigned HalfLaneElems = NumLaneElems/2;
for (unsigned l = 0; l != NumElems; l += NumLaneElems) {
for (unsigned i = 0; i != NumLaneElems; ++i) {
int Idx = Mask[i+l];
unsigned RngStart = l + ((Commuted == (i<HalfLaneElems)) ? NumElems : 0);
if (!isUndefOrInRange(Idx, RngStart, RngStart+NumLaneElems))
return false;
if (!symetricMaskRequired || Idx < 0)
continue;
if (MaskVal[i] < 0) {
MaskVal[i] = Idx - l;
continue;
}
if ((signed)(Idx - l) != MaskVal[i])
return false;
}
}
return true;
}
static bool isMOVHLPSMask(ArrayRef<int> Mask, MVT VT) {
if (!VT.is128BitVector())
return false;
unsigned NumElems = VT.getVectorNumElements();
if (NumElems != 4)
return false;
return isUndefOrEqual(Mask[0], 6) &&
isUndefOrEqual(Mask[1], 7) &&
isUndefOrEqual(Mask[2], 2) &&
isUndefOrEqual(Mask[3], 3);
}
static bool isMOVHLPS_v_undef_Mask(ArrayRef<int> Mask, MVT VT) {
if (!VT.is128BitVector())
return false;
unsigned NumElems = VT.getVectorNumElements();
if (NumElems != 4)
return false;
return isUndefOrEqual(Mask[0], 2) &&
isUndefOrEqual(Mask[1], 3) &&
isUndefOrEqual(Mask[2], 2) &&
isUndefOrEqual(Mask[3], 3);
}
static bool isMOVLPMask(ArrayRef<int> Mask, MVT VT) {
if (!VT.is128BitVector())
return false;
unsigned NumElems = VT.getVectorNumElements();
if (NumElems != 2 && NumElems != 4)
return false;
for (unsigned i = 0, e = NumElems/2; i != e; ++i)
if (!isUndefOrEqual(Mask[i], i + NumElems))
return false;
for (unsigned i = NumElems/2, e = NumElems; i != e; ++i)
if (!isUndefOrEqual(Mask[i], i))
return false;
return true;
}
static bool isMOVLHPSMask(ArrayRef<int> Mask, MVT VT) {
if (!VT.is128BitVector())
return false;
unsigned NumElems = VT.getVectorNumElements();
if (NumElems != 2 && NumElems != 4)
return false;
for (unsigned i = 0, e = NumElems/2; i != e; ++i)
if (!isUndefOrEqual(Mask[i], i))
return false;
for (unsigned i = 0, e = NumElems/2; i != e; ++i)
if (!isUndefOrEqual(Mask[i + e], i + NumElems))
return false;
return true;
}
static bool isINSERTPSMask(ArrayRef<int> Mask, MVT VT) {
if (!VT.is128BitVector() || (VT != MVT::v4f32 && VT != MVT::v4i32))
return false;
unsigned CorrectPosV1 = 0;
unsigned CorrectPosV2 = 0;
for (int i = 0, e = (int)VT.getVectorNumElements(); i != e; ++i) {
if (Mask[i] == -1) {
++CorrectPosV1;
++CorrectPosV2;
continue;
}
if (Mask[i] == i)
++CorrectPosV1;
else if (Mask[i] == i + 4)
++CorrectPosV2;
}
if (CorrectPosV1 == 3 || CorrectPosV2 == 3)
return true;
return false;
}
static
SDValue Compact8x32ShuffleNode(ShuffleVectorSDNode *SVOp,
SelectionDAG &DAG) {
MVT VT = SVOp->getSimpleValueType(0);
SDLoc dl(SVOp);
if (VT != MVT::v8i32 && VT != MVT::v8f32)
return SDValue();
ArrayRef<int> Mask = SVOp->getMask();
static const int MaskToOptimizeEven[] = {0, 8, 2, 10, 4, 12, 6, 14};
static const int MaskToOptimizeOdd[] = {1, 9, 3, 11, 5, 13, 7, 15};
bool MatchEvenMask = true;
bool MatchOddMask = true;
for (int i=0; i<8; ++i) {
if (!isUndefOrEqual(Mask[i], MaskToOptimizeEven[i]))
MatchEvenMask = false;
if (!isUndefOrEqual(Mask[i], MaskToOptimizeOdd[i]))
MatchOddMask = false;
}
if (!MatchEvenMask && !MatchOddMask)
return SDValue();
SDValue UndefNode = DAG.getNode(ISD::UNDEF, dl, VT);
SDValue Op0 = SVOp->getOperand(0);
SDValue Op1 = SVOp->getOperand(1);
if (MatchEvenMask) {
static const int ShiftRightMask[] = {-1, 0, -1, 2, -1, 4, -1, 6 };
Op1 = DAG.getVectorShuffle(VT, dl, Op1, UndefNode, ShiftRightMask);
} else {
static const int ShiftLeftMask[] = {1, -1, 3, -1, 5, -1, 7, -1 };
Op0 = DAG.getVectorShuffle(VT, dl, Op0, UndefNode, ShiftLeftMask);
}
static const int BlendMask[] = {0, 9, 2, 11, 4, 13, 6, 15};
return DAG.getVectorShuffle(VT, dl, Op0, Op1, BlendMask);
}
static bool isUNPCKLMask(ArrayRef<int> Mask, MVT VT,
bool HasInt256, bool V2IsSplat = false) {
assert(VT.getSizeInBits() >= 128 &&
"Unsupported vector type for unpckl");
unsigned NumElts = VT.getVectorNumElements();
if (VT.is256BitVector() && NumElts != 4 && NumElts != 8 &&
(!HasInt256 || (NumElts != 16 && NumElts != 32)))
return false;
assert((!VT.is512BitVector() || VT.getScalarType().getSizeInBits() >= 32) &&
"Unsupported vector type for unpckh");
unsigned NumLanes = VT.getSizeInBits()/128;
unsigned NumLaneElts = NumElts/NumLanes;
for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
for (unsigned i = 0, j = l; i != NumLaneElts; i += 2, ++j) {
int BitI = Mask[l+i];
int BitI1 = Mask[l+i+1];
if (!isUndefOrEqual(BitI, j))
return false;
if (V2IsSplat) {
if (!isUndefOrEqual(BitI1, NumElts))
return false;
} else {
if (!isUndefOrEqual(BitI1, j + NumElts))
return false;
}
}
}
return true;
}
static bool isUNPCKHMask(ArrayRef<int> Mask, MVT VT,
bool HasInt256, bool V2IsSplat = false) {
assert(VT.getSizeInBits() >= 128 &&
"Unsupported vector type for unpckh");
unsigned NumElts = VT.getVectorNumElements();
if (VT.is256BitVector() && NumElts != 4 && NumElts != 8 &&
(!HasInt256 || (NumElts != 16 && NumElts != 32)))
return false;
assert((!VT.is512BitVector() || VT.getScalarType().getSizeInBits() >= 32) &&
"Unsupported vector type for unpckh");
unsigned NumLanes = VT.getSizeInBits()/128;
unsigned NumLaneElts = NumElts/NumLanes;
for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
for (unsigned i = 0, j = l+NumLaneElts/2; i != NumLaneElts; i += 2, ++j) {
int BitI = Mask[l+i];
int BitI1 = Mask[l+i+1];
if (!isUndefOrEqual(BitI, j))
return false;
if (V2IsSplat) {
if (isUndefOrEqual(BitI1, NumElts))
return false;
} else {
if (!isUndefOrEqual(BitI1, j+NumElts))
return false;
}
}
}
return true;
}
static bool isUNPCKL_v_undef_Mask(ArrayRef<int> Mask, MVT VT, bool HasInt256) {
unsigned NumElts = VT.getVectorNumElements();
bool Is256BitVec = VT.is256BitVector();
if (VT.is512BitVector())
return false;
assert((VT.is128BitVector() || VT.is256BitVector()) &&
"Unsupported vector type for unpckh");
if (Is256BitVec && NumElts != 4 && NumElts != 8 &&
(!HasInt256 || (NumElts != 16 && NumElts != 32)))
return false;
if (NumElts == 4 && Is256BitVec)
return false;
unsigned NumLanes = VT.getSizeInBits()/128;
unsigned NumLaneElts = NumElts/NumLanes;
for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
for (unsigned i = 0, j = l; i != NumLaneElts; i += 2, ++j) {
int BitI = Mask[l+i];
int BitI1 = Mask[l+i+1];
if (!isUndefOrEqual(BitI, j))
return false;
if (!isUndefOrEqual(BitI1, j))
return false;
}
}
return true;
}
static bool isUNPCKH_v_undef_Mask(ArrayRef<int> Mask, MVT VT, bool HasInt256) {
unsigned NumElts = VT.getVectorNumElements();
if (VT.is512BitVector())
return false;
assert((VT.is128BitVector() || VT.is256BitVector()) &&
"Unsupported vector type for unpckh");
if (VT.is256BitVector() && NumElts != 4 && NumElts != 8 &&
(!HasInt256 || (NumElts != 16 && NumElts != 32)))
return false;
unsigned NumLanes = VT.getSizeInBits()/128;
unsigned NumLaneElts = NumElts/NumLanes;
for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
for (unsigned i = 0, j = l+NumLaneElts/2; i != NumLaneElts; i += 2, ++j) {
int BitI = Mask[l+i];
int BitI1 = Mask[l+i+1];
if (!isUndefOrEqual(BitI, j))
return false;
if (!isUndefOrEqual(BitI1, j))
return false;
}
}
return true;
}
static bool isINSERT64x4Mask(ArrayRef<int> Mask, MVT VT, unsigned int *Imm) {
if (!VT.is512BitVector())
return false;
unsigned NumElts = VT.getVectorNumElements();
unsigned HalfSize = NumElts/2;
if (isSequentialOrUndefInRange(Mask, 0, HalfSize, 0)) {
if (isSequentialOrUndefInRange(Mask, HalfSize, HalfSize, NumElts)) {
*Imm = 1;
return true;
}
}
if (isSequentialOrUndefInRange(Mask, 0, HalfSize, NumElts)) {
if (isSequentialOrUndefInRange(Mask, HalfSize, HalfSize, HalfSize)) {
*Imm = 0;
return true;
}
}
return false;
}
static bool isMOVLMask(ArrayRef<int> Mask, EVT VT) {
if (VT.getVectorElementType().getSizeInBits() < 32)
return false;
if (!VT.is128BitVector())
return false;
unsigned NumElts = VT.getVectorNumElements();
if (!isUndefOrEqual(Mask[0], NumElts))
return false;
for (unsigned i = 1; i != NumElts; ++i)
if (!isUndefOrEqual(Mask[i], i))
return false;
return true;
}
static bool isVPERM2X128Mask(ArrayRef<int> Mask, MVT VT, bool HasFp256) {
if (!HasFp256 || !VT.is256BitVector())
return false;
unsigned HalfSize = VT.getVectorNumElements()/2;
bool MatchA = false, MatchB = false;
for (unsigned Half = 0; Half != 4; ++Half) {
if (isSequentialOrUndefInRange(Mask, 0, HalfSize, Half*HalfSize)) {
MatchA = true;
break;
}
}
for (unsigned Half = 0; Half != 4; ++Half) {
if (isSequentialOrUndefInRange(Mask, HalfSize, HalfSize, Half*HalfSize)) {
MatchB = true;
break;
}
}
return MatchA && MatchB;
}
static unsigned getShuffleVPERM2X128Immediate(ShuffleVectorSDNode *SVOp) {
MVT VT = SVOp->getSimpleValueType(0);
unsigned HalfSize = VT.getVectorNumElements()/2;
unsigned FstHalf = 0, SndHalf = 0;
for (unsigned i = 0; i < HalfSize; ++i) {
if (SVOp->getMaskElt(i) > 0) {
FstHalf = SVOp->getMaskElt(i)/HalfSize;
break;
}
}
for (unsigned i = HalfSize; i < HalfSize*2; ++i) {
if (SVOp->getMaskElt(i) > 0) {
SndHalf = SVOp->getMaskElt(i)/HalfSize;
break;
}
}
return (FstHalf | (SndHalf << 4));
}
static bool isPermImmMask(ArrayRef<int> Mask, MVT VT, unsigned& Imm8) {
unsigned EltSize = VT.getVectorElementType().getSizeInBits();
if (EltSize < 32)
return false;
unsigned NumElts = VT.getVectorNumElements();
Imm8 = 0;
if (VT.is128BitVector() || (VT.is256BitVector() && EltSize == 64)) {
for (unsigned i = 0; i != NumElts; ++i) {
if (Mask[i] < 0)
continue;
Imm8 |= Mask[i] << (i*2);
}
return true;
}
unsigned LaneSize = 4;
SmallVector<int, 4> MaskVal(LaneSize, -1);
for (unsigned l = 0; l != NumElts; l += LaneSize) {
for (unsigned i = 0; i != LaneSize; ++i) {
if (!isUndefOrInRange(Mask[i+l], l, l+LaneSize))
return false;
if (Mask[i+l] < 0)
continue;
if (MaskVal[i] < 0) {
MaskVal[i] = Mask[i+l] - l;
Imm8 |= MaskVal[i] << (i*2);
continue;
}
if (Mask[i+l] != (signed)(MaskVal[i]+l))
return false;
}
}
return true;
}
static bool isVPERMILPMask(ArrayRef<int> Mask, MVT VT) {
unsigned EltSize = VT.getVectorElementType().getSizeInBits();
if (VT.getSizeInBits() < 256 || EltSize < 32)
return false;
bool symetricMaskRequired = (EltSize == 32);
unsigned NumElts = VT.getVectorNumElements();
unsigned NumLanes = VT.getSizeInBits()/128;
unsigned LaneSize = NumElts/NumLanes;
SmallVector<int, 4> ExpectedMaskVal(LaneSize, -1);
for (unsigned l = 0; l != NumElts; l += LaneSize) {
for (unsigned i = 0; i != LaneSize; ++i) {
if (!isUndefOrInRange(Mask[i+l], l, l+LaneSize))
return false;
if (symetricMaskRequired) {
if (ExpectedMaskVal[i] < 0 && Mask[i+l] >= 0) {
ExpectedMaskVal[i] = Mask[i+l] - l;
continue;
}
if (!isUndefOrEqual(Mask[i+l], ExpectedMaskVal[i]+l))
return false;
}
}
}
return true;
}
static bool isCommutedMOVLMask(ArrayRef<int> Mask, MVT VT,
bool V2IsSplat = false, bool V2IsUndef = false) {
if (!VT.is128BitVector())
return false;
unsigned NumOps = VT.getVectorNumElements();
if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16)
return false;
if (!isUndefOrEqual(Mask[0], 0))
return false;
for (unsigned i = 1; i != NumOps; ++i)
if (!(isUndefOrEqual(Mask[i], i+NumOps) ||
(V2IsUndef && isUndefOrInRange(Mask[i], NumOps, NumOps*2)) ||
(V2IsSplat && isUndefOrEqual(Mask[i], NumOps))))
return false;
return true;
}
static bool isMOVSHDUPMask(ArrayRef<int> Mask, MVT VT,
const X86Subtarget *Subtarget) {
if (!Subtarget->hasSSE3())
return false;
unsigned NumElems = VT.getVectorNumElements();
if ((VT.is128BitVector() && NumElems != 4) ||
(VT.is256BitVector() && NumElems != 8) ||
(VT.is512BitVector() && NumElems != 16))
return false;
for (unsigned i = 0; i != NumElems; i += 2)
if (!isUndefOrEqual(Mask[i], i+1) ||
!isUndefOrEqual(Mask[i+1], i+1))
return false;
return true;
}
static bool isMOVSLDUPMask(ArrayRef<int> Mask, MVT VT,
const X86Subtarget *Subtarget) {
if (!Subtarget->hasSSE3())
return false;
unsigned NumElems = VT.getVectorNumElements();
if ((VT.is128BitVector() && NumElems != 4) ||
(VT.is256BitVector() && NumElems != 8) ||
(VT.is512BitVector() && NumElems != 16))
return false;
for (unsigned i = 0; i != NumElems; i += 2)
if (!isUndefOrEqual(Mask[i], i) ||
!isUndefOrEqual(Mask[i+1], i))
return false;
return true;
}
static bool isMOVDDUPYMask(ArrayRef<int> Mask, MVT VT, bool HasFp256) {
if (!HasFp256 || !VT.is256BitVector())
return false;
unsigned NumElts = VT.getVectorNumElements();
if (NumElts != 4)
return false;
for (unsigned i = 0; i != NumElts/2; ++i)
if (!isUndefOrEqual(Mask[i], 0))
return false;
for (unsigned i = NumElts/2; i != NumElts; ++i)
if (!isUndefOrEqual(Mask[i], NumElts/2))
return false;
return true;
}
static bool isMOVDDUPMask(ArrayRef<int> Mask, MVT VT) {
if (!VT.is128BitVector())
return false;
unsigned e = VT.getVectorNumElements() / 2;
for (unsigned i = 0; i != e; ++i)
if (!isUndefOrEqual(Mask[i], i))
return false;
for (unsigned i = 0; i != e; ++i)
if (!isUndefOrEqual(Mask[e+i], i))
return false;
return true;
}
static bool isVEXTRACTIndex(SDNode *N, unsigned vecWidth) {
assert((vecWidth == 128 || vecWidth == 256) && "Unexpected vector width");
if (!isa<ConstantSDNode>(N->getOperand(1).getNode()))
return false;
uint64_t Index =
cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
MVT VT = N->getSimpleValueType(0);
unsigned ElSize = VT.getVectorElementType().getSizeInBits();
bool Result = (Index * ElSize) % vecWidth == 0;
return Result;
}
static bool isVINSERTIndex(SDNode *N, unsigned vecWidth) {
assert((vecWidth == 128 || vecWidth == 256) && "Unexpected vector width");
if (!isa<ConstantSDNode>(N->getOperand(2).getNode()))
return false;
uint64_t Index =
cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
MVT VT = N->getSimpleValueType(0);
unsigned ElSize = VT.getVectorElementType().getSizeInBits();
bool Result = (Index * ElSize) % vecWidth == 0;
return Result;
}
bool X86::isVINSERT128Index(SDNode *N) {
return isVINSERTIndex(N, 128);
}
bool X86::isVINSERT256Index(SDNode *N) {
return isVINSERTIndex(N, 256);
}
bool X86::isVEXTRACT128Index(SDNode *N) {
return isVEXTRACTIndex(N, 128);
}
bool X86::isVEXTRACT256Index(SDNode *N) {
return isVEXTRACTIndex(N, 256);
}
static unsigned getShuffleSHUFImmediate(ShuffleVectorSDNode *N) {
MVT VT = N->getSimpleValueType(0);
assert((VT.getSizeInBits() >= 128) &&
"Unsupported vector type for PSHUF/SHUFP");
unsigned NumElts = VT.getVectorNumElements();
unsigned NumLanes = VT.getSizeInBits()/128;
unsigned NumLaneElts = NumElts/NumLanes;
assert((NumLaneElts == 2 || NumLaneElts == 4 || NumLaneElts == 8) &&
"Only supports 2, 4 or 8 elements per lane");
unsigned Shift = (NumLaneElts >= 4) ? 1 : 0;
unsigned Mask = 0;
for (unsigned i = 0; i != NumElts; ++i) {
int Elt = N->getMaskElt(i);
if (Elt < 0) continue;
Elt &= NumLaneElts - 1;
unsigned ShAmt = (i << Shift) % 8;
Mask |= Elt << ShAmt;
}
return Mask;
}
static unsigned getShufflePSHUFHWImmediate(ShuffleVectorSDNode *N) {
MVT VT = N->getSimpleValueType(0);
assert((VT == MVT::v8i16 || VT == MVT::v16i16) &&
"Unsupported vector type for PSHUFHW");
unsigned NumElts = VT.getVectorNumElements();
unsigned Mask = 0;
for (unsigned l = 0; l != NumElts; l += 8) {
for (unsigned i = 0; i < 4; ++i) {
int Elt = N->getMaskElt(l+i+4);
if (Elt < 0) continue;
Elt &= 0x3; Mask |= Elt << (i * 2);
}
}
return Mask;
}
static unsigned getShufflePSHUFLWImmediate(ShuffleVectorSDNode *N) {
MVT VT = N->getSimpleValueType(0);
assert((VT == MVT::v8i16 || VT == MVT::v16i16) &&
"Unsupported vector type for PSHUFHW");
unsigned NumElts = VT.getVectorNumElements();
unsigned Mask = 0;
for (unsigned l = 0; l != NumElts; l += 8) {
for (unsigned i = 0; i < 4; ++i) {
int Elt = N->getMaskElt(l+i);
if (Elt < 0) continue;
Elt &= 0x3; Mask |= Elt << (i * 2);
}
}
return Mask;
}
static unsigned getShuffleAlignrImmediate(ShuffleVectorSDNode *SVOp,
bool InterLane) {
MVT VT = SVOp->getSimpleValueType(0);
unsigned EltSize = InterLane ? 1 :
VT.getVectorElementType().getSizeInBits() >> 3;
unsigned NumElts = VT.getVectorNumElements();
unsigned NumLanes = VT.is512BitVector() ? 1 : VT.getSizeInBits()/128;
unsigned NumLaneElts = NumElts/NumLanes;
int Val = 0;
unsigned i;
for (i = 0; i != NumElts; ++i) {
Val = SVOp->getMaskElt(i);
if (Val >= 0)
break;
}
if (Val >= (int)NumElts)
Val -= NumElts - NumLaneElts;
assert(Val - i > 0 && "PALIGNR imm should be positive");
return (Val - i) * EltSize;
}
static unsigned getShufflePALIGNRImmediate(ShuffleVectorSDNode *SVOp) {
return getShuffleAlignrImmediate(SVOp, false);
}
static unsigned getShuffleVALIGNImmediate(ShuffleVectorSDNode *SVOp) {
return getShuffleAlignrImmediate(SVOp, true);
}
static unsigned getExtractVEXTRACTImmediate(SDNode *N, unsigned vecWidth) {
assert((vecWidth == 128 || vecWidth == 256) && "Unsupported vector width");
if (!isa<ConstantSDNode>(N->getOperand(1).getNode()))
llvm_unreachable("Illegal extract subvector for VEXTRACT");
uint64_t Index =
cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
MVT VecVT = N->getOperand(0).getSimpleValueType();
MVT ElVT = VecVT.getVectorElementType();
unsigned NumElemsPerChunk = vecWidth / ElVT.getSizeInBits();
return Index / NumElemsPerChunk;
}
static unsigned getInsertVINSERTImmediate(SDNode *N, unsigned vecWidth) {
assert((vecWidth == 128 || vecWidth == 256) && "Unsupported vector width");
if (!isa<ConstantSDNode>(N->getOperand(2).getNode()))
llvm_unreachable("Illegal insert subvector for VINSERT");
uint64_t Index =
cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
MVT VecVT = N->getSimpleValueType(0);
MVT ElVT = VecVT.getVectorElementType();
unsigned NumElemsPerChunk = vecWidth / ElVT.getSizeInBits();
return Index / NumElemsPerChunk;
}
unsigned X86::getExtractVEXTRACT128Immediate(SDNode *N) {
return getExtractVEXTRACTImmediate(N, 128);
}
unsigned X86::getExtractVEXTRACT256Immediate(SDNode *N) {
return getExtractVEXTRACTImmediate(N, 256);
}
unsigned X86::getInsertVINSERT128Immediate(SDNode *N) {
return getInsertVINSERTImmediate(N, 128);
}
unsigned X86::getInsertVINSERT256Immediate(SDNode *N) {
return getInsertVINSERTImmediate(N, 256);
}
static bool isZero(SDValue V) {
ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
return C && C->isNullValue();
}
bool X86::isZeroNode(SDValue Elt) {
if (isZero(Elt))
return true;
if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Elt))
return CFP->getValueAPF().isPosZero();
return false;
}
static bool ShouldXformToMOVHLPS(ArrayRef<int> Mask, MVT VT) {
if (!VT.is128BitVector())
return false;
if (VT.getVectorNumElements() != 4)
return false;
for (unsigned i = 0, e = 2; i != e; ++i)
if (!isUndefOrEqual(Mask[i], i+2))
return false;
for (unsigned i = 2; i != 4; ++i)
if (!isUndefOrEqual(Mask[i], i+4))
return false;
return true;
}
static bool isScalarLoadToVector(SDNode *N, LoadSDNode **LD = nullptr) {
if (N->getOpcode() != ISD::SCALAR_TO_VECTOR)
return false;
N = N->getOperand(0).getNode();
if (!ISD::isNON_EXTLoad(N))
return false;
if (LD)
*LD = cast<LoadSDNode>(N);
return true;
}
static bool WillBeConstantPoolLoad(SDNode *N) {
if (N->getOpcode() != ISD::BUILD_VECTOR)
return false;
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
switch (N->getOperand(i).getNode()->getOpcode()) {
case ISD::UNDEF:
case ISD::ConstantFP:
case ISD::Constant:
break;
default:
return false;
}
return !ISD::isBuildVectorAllZeros(N) &&
!ISD::isBuildVectorAllOnes(N);
}
static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2,
ArrayRef<int> Mask, MVT VT) {
if (!VT.is128BitVector())
return false;
if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1))
return false;
if (ISD::isNON_EXTLoad(V2) || WillBeConstantPoolLoad(V2))
return false;
unsigned NumElems = VT.getVectorNumElements();
if (NumElems != 2 && NumElems != 4)
return false;
for (unsigned i = 0, e = NumElems/2; i != e; ++i)
if (!isUndefOrEqual(Mask[i], i))
return false;
for (unsigned i = NumElems/2, e = NumElems; i != e; ++i)
if (!isUndefOrEqual(Mask[i], i+NumElems))
return false;
return true;
}
static bool isZeroShuffle(ShuffleVectorSDNode *N) {
SDValue V1 = N->getOperand(0);
SDValue V2 = N->getOperand(1);
unsigned NumElems = N->getValueType(0).getVectorNumElements();
for (unsigned i = 0; i != NumElems; ++i) {
int Idx = N->getMaskElt(i);
if (Idx >= (int)NumElems) {
unsigned Opc = V2.getOpcode();
if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V2.getNode()))
continue;
if (Opc != ISD::BUILD_VECTOR ||
!X86::isZeroNode(V2.getOperand(Idx-NumElems)))
return false;
} else if (Idx >= 0) {
unsigned Opc = V1.getOpcode();
if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V1.getNode()))
continue;
if (Opc != ISD::BUILD_VECTOR ||
!X86::isZeroNode(V1.getOperand(Idx)))
return false;
}
}
return true;
}
static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget,
SelectionDAG &DAG, SDLoc dl) {
assert(VT.isVector() && "Expected a vector type");
SDValue Vec;
if (VT.is128BitVector()) { if (Subtarget->hasSSE2()) { SDValue Cst = DAG.getConstant(0, MVT::i32);
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
} else { SDValue Cst = DAG.getConstantFP(+0.0, MVT::f32);
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f32, Cst, Cst, Cst, Cst);
}
} else if (VT.is256BitVector()) { if (Subtarget->hasInt256()) { SDValue Cst = DAG.getConstant(0, MVT::i32);
SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops);
} else {
SDValue Cst = DAG.getConstantFP(+0.0, MVT::f32);
SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops);
}
} else if (VT.is512BitVector()) { SDValue Cst = DAG.getConstant(0, MVT::i32);
SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst,
Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i32, Ops);
} else if (VT.getScalarType() == MVT::i1) {
assert(VT.getVectorNumElements() <= 16 && "Unexpected vector type");
SDValue Cst = DAG.getConstant(0, MVT::i1);
SmallVector<SDValue, 16> Ops(VT.getVectorNumElements(), Cst);
return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
} else
llvm_unreachable("Unexpected vector type");
return DAG.getNode(ISD::BITCAST, dl, VT, Vec);
}
static SDValue getOnesVector(MVT VT, bool HasInt256, SelectionDAG &DAG,
SDLoc dl) {
assert(VT.isVector() && "Expected a vector type");
SDValue Cst = DAG.getConstant(~0U, MVT::i32);
SDValue Vec;
if (VT.is256BitVector()) {
if (HasInt256) { SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops);
} else { Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
Vec = Concat128BitVectors(Vec, Vec, MVT::v8i32, 8, DAG, dl);
}
} else if (VT.is128BitVector()) {
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
} else
llvm_unreachable("Unexpected vector type");
return DAG.getNode(ISD::BITCAST, dl, VT, Vec);
}
static void NormalizeMask(SmallVectorImpl<int> &Mask, unsigned NumElems) {
for (unsigned i = 0; i != NumElems; ++i) {
if (Mask[i] > (int)NumElems) {
Mask[i] = NumElems;
}
}
}
static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1,
SDValue V2) {
unsigned NumElems = VT.getVectorNumElements();
SmallVector<int, 8> Mask;
Mask.push_back(NumElems);
for (unsigned i = 1; i != NumElems; ++i)
Mask.push_back(i);
return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
}
static SDValue getUnpackl(SelectionDAG &DAG, SDLoc dl, MVT VT, SDValue V1,
SDValue V2) {
unsigned NumElems = VT.getVectorNumElements();
SmallVector<int, 8> Mask;
for (unsigned i = 0, e = NumElems/2; i != e; ++i) {
Mask.push_back(i);
Mask.push_back(i + NumElems);
}
return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
}
static SDValue getUnpackh(SelectionDAG &DAG, SDLoc dl, MVT VT, SDValue V1,
SDValue V2) {
unsigned NumElems = VT.getVectorNumElements();
SmallVector<int, 8> Mask;
for (unsigned i = 0, Half = NumElems/2; i != Half; ++i) {
Mask.push_back(i + Half);
Mask.push_back(i + NumElems + Half);
}
return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
}
static SDValue PromoteSplati8i16(SDValue V, SelectionDAG &DAG, int &EltNo) {
MVT VT = V.getSimpleValueType();
int NumElems = VT.getVectorNumElements();
SDLoc dl(V);
while (NumElems > 4) {
if (EltNo < NumElems/2) {
V = getUnpackl(DAG, dl, VT, V, V);
} else {
V = getUnpackh(DAG, dl, VT, V, V);
EltNo -= NumElems/2;
}
NumElems >>= 1;
}
return V;
}
static SDValue getLegalSplat(SelectionDAG &DAG, SDValue V, int EltNo) {
MVT VT = V.getSimpleValueType();
SDLoc dl(V);
if (VT.is128BitVector()) {
V = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, V);
int SplatMask[4] = { EltNo, EltNo, EltNo, EltNo };
V = DAG.getVectorShuffle(MVT::v4f32, dl, V, DAG.getUNDEF(MVT::v4f32),
&SplatMask[0]);
} else if (VT.is256BitVector()) {
int SplatMask[8] = { EltNo, EltNo, EltNo, EltNo,
EltNo+4, EltNo+4, EltNo+4, EltNo+4 };
V = DAG.getNode(ISD::BITCAST, dl, MVT::v8f32, V);
V = DAG.getVectorShuffle(MVT::v8f32, dl, V, DAG.getUNDEF(MVT::v8f32),
&SplatMask[0]);
} else
llvm_unreachable("Vector size not supported");
return DAG.getNode(ISD::BITCAST, dl, VT, V);
}
static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) {
MVT SrcVT = SV->getSimpleValueType(0);
SDValue V1 = SV->getOperand(0);
SDLoc dl(SV);
int EltNo = SV->getSplatIndex();
int NumElems = SrcVT.getVectorNumElements();
bool Is256BitVec = SrcVT.is256BitVector();
assert(((SrcVT.is128BitVector() && NumElems > 4) || Is256BitVec) &&
"Unknown how to promote splat for type");
if (Is256BitVec) {
V1 = Extract128BitVector(V1, EltNo, DAG, dl);
if (EltNo >= NumElems/2)
EltNo -= NumElems/2;
}
MVT EltVT = SrcVT.getVectorElementType();
if (EltVT == MVT::i8 || EltVT == MVT::i16)
V1 = PromoteSplati8i16(V1, DAG, EltNo);
if (Is256BitVec) {
V1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, SrcVT, V1, V1);
}
return getLegalSplat(DAG, V1, EltNo);
}
static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx,
bool IsZero,
const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
MVT VT = V2.getSimpleValueType();
SDValue V1 = IsZero
? getZeroVector(VT, Subtarget, DAG, SDLoc(V2)) : DAG.getUNDEF(VT);
unsigned NumElems = VT.getVectorNumElements();
SmallVector<int, 16> MaskVec;
for (unsigned i = 0; i != NumElems; ++i)
MaskVec.push_back(i == Idx ? NumElems : i);
return DAG.getVectorShuffle(VT, SDLoc(V2), V1, V2, &MaskVec[0]);
}
static bool getTargetShuffleMask(SDNode *N, MVT VT,
SmallVectorImpl<int> &Mask, bool &IsUnary) {
unsigned NumElems = VT.getVectorNumElements();
SDValue ImmN;
IsUnary = false;
bool IsFakeUnary = false;
switch(N->getOpcode()) {
case X86ISD::BLENDI:
ImmN = N->getOperand(N->getNumOperands()-1);
DecodeBLENDMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
break;
case X86ISD::SHUFP:
ImmN = N->getOperand(N->getNumOperands()-1);
DecodeSHUFPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
break;
case X86ISD::UNPCKH:
DecodeUNPCKHMask(VT, Mask);
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
break;
case X86ISD::UNPCKL:
DecodeUNPCKLMask(VT, Mask);
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
break;
case X86ISD::MOVHLPS:
DecodeMOVHLPSMask(NumElems, Mask);
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
break;
case X86ISD::MOVLHPS:
DecodeMOVLHPSMask(NumElems, Mask);
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
break;
case X86ISD::PALIGNR:
ImmN = N->getOperand(N->getNumOperands()-1);
DecodePALIGNRMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
break;
case X86ISD::PSHUFD:
case X86ISD::VPERMILPI:
ImmN = N->getOperand(N->getNumOperands()-1);
DecodePSHUFMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
IsUnary = true;
break;
case X86ISD::PSHUFHW:
ImmN = N->getOperand(N->getNumOperands()-1);
DecodePSHUFHWMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
IsUnary = true;
break;
case X86ISD::PSHUFLW:
ImmN = N->getOperand(N->getNumOperands()-1);
DecodePSHUFLWMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
IsUnary = true;
break;
case X86ISD::PSHUFB: {
IsUnary = true;
SDValue MaskNode = N->getOperand(1);
while (MaskNode->getOpcode() == ISD::BITCAST)
MaskNode = MaskNode->getOperand(0);
if (MaskNode->getOpcode() == ISD::BUILD_VECTOR) {
EVT VT = MaskNode.getValueType();
assert(VT.isVector() &&
"Can't produce a non-vector with a build_vector!");
if (!VT.isInteger())
return false;
int NumBytesPerElement = VT.getVectorElementType().getSizeInBits() / 8;
SmallVector<uint64_t, 32> RawMask;
for (int i = 0, e = MaskNode->getNumOperands(); i < e; ++i) {
SDValue Op = MaskNode->getOperand(i);
if (Op->getOpcode() == ISD::UNDEF) {
RawMask.push_back((uint64_t)SM_SentinelUndef);
continue;
}
auto *CN = dyn_cast<ConstantSDNode>(Op.getNode());
if (!CN)
return false;
APInt MaskElement = CN->getAPIntValue();
for (int j = 0; j < NumBytesPerElement; ++j) {
RawMask.push_back(MaskElement.getLoBits(8).getZExtValue());
MaskElement = MaskElement.lshr(8);
}
}
DecodePSHUFBMask(RawMask, Mask);
break;
}
auto *MaskLoad = dyn_cast<LoadSDNode>(MaskNode);
if (!MaskLoad)
return false;
SDValue Ptr = MaskLoad->getBasePtr();
if (Ptr->getOpcode() == X86ISD::Wrapper)
Ptr = Ptr->getOperand(0);
auto *MaskCP = dyn_cast<ConstantPoolSDNode>(Ptr);
if (!MaskCP || MaskCP->isMachineConstantPoolEntry())
return false;
if (auto *C = dyn_cast<Constant>(MaskCP->getConstVal())) {
DecodePSHUFBMask(C, Mask);
if (Mask.empty())
return false;
break;
}
return false;
}
case X86ISD::VPERMI:
ImmN = N->getOperand(N->getNumOperands()-1);
DecodeVPERMMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
IsUnary = true;
break;
case X86ISD::MOVSS:
case X86ISD::MOVSD:
DecodeScalarMoveMask(VT, false, Mask);
break;
case X86ISD::VPERM2X128:
ImmN = N->getOperand(N->getNumOperands()-1);
DecodeVPERM2X128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
if (Mask.empty()) return false;
break;
case X86ISD::MOVSLDUP:
DecodeMOVSLDUPMask(VT, Mask);
IsUnary = true;
break;
case X86ISD::MOVSHDUP:
DecodeMOVSHDUPMask(VT, Mask);
IsUnary = true;
break;
case X86ISD::MOVDDUP:
DecodeMOVDDUPMask(VT, Mask);
IsUnary = true;
break;
case X86ISD::MOVLHPD:
case X86ISD::MOVLPD:
case X86ISD::MOVLPS:
return false;
default: llvm_unreachable("unknown target shuffle node");
}
if (IsFakeUnary)
for (int &M : Mask)
if (M >= (int)Mask.size())
M -= Mask.size();
return true;
}
static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG,
unsigned Depth) {
if (Depth == 6)
return SDValue();
SDValue V = SDValue(N, 0);
EVT VT = V.getValueType();
unsigned Opcode = V.getOpcode();
if (const ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(N)) {
int Elt = SV->getMaskElt(Index);
if (Elt < 0)
return DAG.getUNDEF(VT.getVectorElementType());
unsigned NumElems = VT.getVectorNumElements();
SDValue NewV = (Elt < (int)NumElems) ? SV->getOperand(0)
: SV->getOperand(1);
return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG, Depth+1);
}
if (isTargetShuffle(Opcode)) {
MVT ShufVT = V.getSimpleValueType();
unsigned NumElems = ShufVT.getVectorNumElements();
SmallVector<int, 16> ShuffleMask;
bool IsUnary;
if (!getTargetShuffleMask(N, ShufVT, ShuffleMask, IsUnary))
return SDValue();
int Elt = ShuffleMask[Index];
if (Elt < 0)
return DAG.getUNDEF(ShufVT.getVectorElementType());
SDValue NewV = (Elt < (int)NumElems) ? N->getOperand(0)
: N->getOperand(1);
return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG,
Depth+1);
}
if (Opcode == ISD::BITCAST) {
V = V.getOperand(0);
EVT SrcVT = V.getValueType();
unsigned NumElems = VT.getVectorNumElements();
if (!SrcVT.isVector() || SrcVT.getVectorNumElements() != NumElems)
return SDValue();
}
if (V.getOpcode() == ISD::SCALAR_TO_VECTOR)
return (Index == 0) ? V.getOperand(0)
: DAG.getUNDEF(VT.getVectorElementType());
if (V.getOpcode() == ISD::BUILD_VECTOR)
return V.getOperand(Index);
return SDValue();
}
static unsigned getNumOfConsecutiveZeros(ShuffleVectorSDNode *SVOp,
unsigned NumElems, bool ZerosFromLeft,
SelectionDAG &DAG,
unsigned PreferredNum = -1U) {
unsigned NumZeros = 0;
for (unsigned i = 0; i != NumElems; ++i) {
unsigned Index = ZerosFromLeft ? i : NumElems - i - 1;
SDValue Elt = getShuffleScalarElt(SVOp, Index, DAG, 0);
if (!Elt.getNode())
break;
if (X86::isZeroNode(Elt))
++NumZeros;
else if (Elt.getOpcode() == ISD::UNDEF) NumZeros = std::min(NumZeros + 1, PreferredNum);
else
break;
}
return NumZeros;
}
static
bool isShuffleMaskConsecutive(ShuffleVectorSDNode *SVOp,
unsigned MaskI, unsigned MaskE, unsigned OpIdx,
unsigned NumElems, unsigned &OpNum) {
bool SeenV1 = false;
bool SeenV2 = false;
for (unsigned i = MaskI; i != MaskE; ++i, ++OpIdx) {
int Idx = SVOp->getMaskElt(i);
if (Idx < 0)
continue;
if (Idx < (int)NumElems)
SeenV1 = true;
else
SeenV2 = true;
if ((Idx % NumElems != OpIdx) || (SeenV1 && SeenV2))
return false;
}
OpNum = SeenV1 ? 0 : 1;
return true;
}
static bool isVectorShiftRight(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
unsigned NumElems =
SVOp->getSimpleValueType(0).getVectorNumElements();
unsigned NumZeros = getNumOfConsecutiveZeros(
SVOp, NumElems, false , DAG,
SVOp->getMaskElt(0));
unsigned OpSrc;
if (!NumZeros)
return false;
if (!isShuffleMaskConsecutive(SVOp,
0, NumElems-NumZeros, NumZeros, NumElems, OpSrc)) return false;
isLeft = false;
ShAmt = NumZeros;
ShVal = SVOp->getOperand(OpSrc);
return true;
}
static bool isVectorShiftLeft(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
unsigned NumElems =
SVOp->getSimpleValueType(0).getVectorNumElements();
unsigned NumZeros = getNumOfConsecutiveZeros(
SVOp, NumElems, true , DAG,
NumElems - SVOp->getMaskElt(NumElems - 1) - 1);
unsigned OpSrc;
if (!NumZeros)
return false;
if (!isShuffleMaskConsecutive(SVOp,
NumZeros, NumElems, 0, NumElems, OpSrc)) return false;
isLeft = true;
ShAmt = NumZeros;
ShVal = SVOp->getOperand(OpSrc);
return true;
}
static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
if (!SVOp->getSimpleValueType(0).is128BitVector())
return false;
if (isVectorShiftLeft(SVOp, DAG, isLeft, ShVal, ShAmt) ||
isVectorShiftRight(SVOp, DAG, isLeft, ShVal, ShAmt))
return true;
return false;
}
static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
unsigned NumNonZero, unsigned NumZero,
SelectionDAG &DAG,
const X86Subtarget* Subtarget,
const TargetLowering &TLI) {
if (NumNonZero > 8)
return SDValue();
SDLoc dl(Op);
SDValue V;
bool First = true;
for (unsigned i = 0; i < 16; ++i) {
bool ThisIsNonZero = (NonZeros & (1 << i)) != 0;
if (ThisIsNonZero && First) {
if (NumZero)
V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl);
else
V = DAG.getUNDEF(MVT::v8i16);
First = false;
}
if ((i & 1) != 0) {
SDValue ThisElt, LastElt;
bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0;
if (LastIsNonZero) {
LastElt = DAG.getNode(ISD::ZERO_EXTEND, dl,
MVT::i16, Op.getOperand(i-1));
}
if (ThisIsNonZero) {
ThisElt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Op.getOperand(i));
ThisElt = DAG.getNode(ISD::SHL, dl, MVT::i16,
ThisElt, DAG.getConstant(8, MVT::i8));
if (LastIsNonZero)
ThisElt = DAG.getNode(ISD::OR, dl, MVT::i16, ThisElt, LastElt);
} else
ThisElt = LastElt;
if (ThisElt.getNode())
V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V, ThisElt,
DAG.getIntPtrConstant(i/2));
}
}
return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, V);
}
static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
unsigned NumNonZero, unsigned NumZero,
SelectionDAG &DAG,
const X86Subtarget* Subtarget,
const TargetLowering &TLI) {
if (NumNonZero > 4)
return SDValue();
SDLoc dl(Op);
SDValue V;
bool First = true;
for (unsigned i = 0; i < 8; ++i) {
bool isNonZero = (NonZeros & (1 << i)) != 0;
if (isNonZero) {
if (First) {
if (NumZero)
V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl);
else
V = DAG.getUNDEF(MVT::v8i16);
First = false;
}
V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,
MVT::v8i16, V, Op.getOperand(i),
DAG.getIntPtrConstant(i));
}
}
return V;
}
static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG,
const X86Subtarget *Subtarget,
const TargetLowering &TLI) {
bool Zeroable[4];
for (int i=0; i < 4; ++i) {
SDValue Elt = Op->getOperand(i);
Zeroable[i] = (Elt.getOpcode() == ISD::UNDEF || X86::isZeroNode(Elt));
}
assert(std::count_if(&Zeroable[0], &Zeroable[4],
[](bool M) { return !M; }) > 1 &&
"We expect at least two non-zero elements!");
SDValue FirstNonZero;
unsigned FirstNonZeroIdx;
for (unsigned i=0; i < 4; ++i) {
if (Zeroable[i])
continue;
SDValue Elt = Op->getOperand(i);
if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
!isa<ConstantSDNode>(Elt.getOperand(1)))
return SDValue();
MVT VT = Elt.getOperand(0).getSimpleValueType();
if (!VT.is128BitVector())
return SDValue();
if (!FirstNonZero.getNode()) {
FirstNonZero = Elt;
FirstNonZeroIdx = i;
}
}
assert(FirstNonZero.getNode() && "Unexpected build vector of all zeros!");
SDValue V1 = FirstNonZero.getOperand(0);
MVT VT = V1.getSimpleValueType();
SDValue Elt;
unsigned EltMaskIdx, EltIdx;
int Mask[4];
for (EltIdx = 0; EltIdx < 4; ++EltIdx) {
if (Zeroable[EltIdx]) {
Mask[EltIdx] = EltIdx+4;
continue;
}
Elt = Op->getOperand(EltIdx);
EltMaskIdx = cast<ConstantSDNode>(Elt.getOperand(1))->getZExtValue();
if (Elt.getOperand(0) != V1 || EltMaskIdx != EltIdx)
break;
Mask[EltIdx] = EltIdx;
}
if (EltIdx == 4) {
SDValue VZero = getZeroVector(VT, Subtarget, DAG, SDLoc(Op));
if (V1.getSimpleValueType() != VT)
V1 = DAG.getNode(ISD::BITCAST, SDLoc(V1), VT, V1);
return DAG.getVectorShuffle(VT, SDLoc(V1), V1, VZero, &Mask[0]);
}
if (!Subtarget->hasSSE41())
return SDValue();
SDValue V2 = Elt.getOperand(0);
if (Elt == FirstNonZero && EltIdx == FirstNonZeroIdx)
V1 = SDValue();
bool CanFold = true;
for (unsigned i = EltIdx + 1; i < 4 && CanFold; ++i) {
if (Zeroable[i])
continue;
SDValue Current = Op->getOperand(i);
SDValue SrcVector = Current->getOperand(0);
if (!V1.getNode())
V1 = SrcVector;
CanFold = SrcVector == V1 &&
cast<ConstantSDNode>(Current.getOperand(1))->getZExtValue() == i;
}
if (!CanFold)
return SDValue();
assert(V1.getNode() && "Expected at least two non-zero elements!");
if (V1.getSimpleValueType() != MVT::v4f32)
V1 = DAG.getNode(ISD::BITCAST, SDLoc(V1), MVT::v4f32, V1);
if (V2.getSimpleValueType() != MVT::v4f32)
V2 = DAG.getNode(ISD::BITCAST, SDLoc(V2), MVT::v4f32, V2);
unsigned ZMask = 0;
for (int i = 0; i < 4; ++i)
if (Zeroable[i])
ZMask |= 1 << i;
unsigned InsertPSMask = EltMaskIdx << 6 | EltIdx << 4 | ZMask;
assert((InsertPSMask & ~0xFFu) == 0 && "Invalid mask!");
SDValue Result = DAG.getNode(X86ISD::INSERTPS, SDLoc(Op), MVT::v4f32, V1, V2,
DAG.getIntPtrConstant(InsertPSMask));
return DAG.getNode(ISD::BITCAST, SDLoc(Op), VT, Result);
}
static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp,
unsigned NumBits, SelectionDAG &DAG,
const TargetLowering &TLI, SDLoc dl) {
assert(VT.is128BitVector() && "Unknown type for VShift");
MVT ShVT = MVT::v2i64;
unsigned Opc = isLeft ? X86ISD::VSHLDQ : X86ISD::VSRLDQ;
SrcOp = DAG.getNode(ISD::BITCAST, dl, ShVT, SrcOp);
MVT ScalarShiftTy = TLI.getScalarShiftAmountTy(SrcOp.getValueType());
SDValue ShiftVal = DAG.getConstant(NumBits, ScalarShiftTy);
return DAG.getNode(ISD::BITCAST, dl, VT,
DAG.getNode(Opc, dl, ShVT, SrcOp, ShiftVal));
}
static SDValue
LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, SDLoc dl, SelectionDAG &DAG) {
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(SrcOp)) {
SDValue Ptr = LD->getBasePtr();
if (!ISD::isNormalLoad(LD) || LD->isVolatile())
return SDValue();
EVT PVT = LD->getValueType(0);
if (PVT != MVT::i32 && PVT != MVT::f32)
return SDValue();
int FI = -1;
int64_t Offset = 0;
if (FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr)) {
FI = FINode->getIndex();
Offset = 0;
} else if (DAG.isBaseWithConstantOffset(Ptr) &&
isa<FrameIndexSDNode>(Ptr.getOperand(0))) {
FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
Offset = Ptr.getConstantOperandVal(1);
Ptr = Ptr.getOperand(0);
} else {
return SDValue();
}
unsigned RequiredAlign = VT.getSizeInBits()/8;
SDValue Chain = LD->getChain();
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
if (DAG.InferPtrAlignment(Ptr) < RequiredAlign) {
if (MFI->isFixedObjectIndex(FI)) {
return SDValue();
} else {
MFI->setObjectAlignment(FI, RequiredAlign);
}
}
if (Offset < 0)
return SDValue();
if ((Offset % RequiredAlign) & 3)
return SDValue();
int64_t StartOffset = Offset & ~(RequiredAlign-1);
if (StartOffset)
Ptr = DAG.getNode(ISD::ADD, SDLoc(Ptr), Ptr.getValueType(),
Ptr,DAG.getConstant(StartOffset, Ptr.getValueType()));
int EltNo = (Offset - StartOffset) >> 2;
unsigned NumElems = VT.getVectorNumElements();
EVT NVT = EVT::getVectorVT(*DAG.getContext(), PVT, NumElems);
SDValue V1 = DAG.getLoad(NVT, dl, Chain, Ptr,
LD->getPointerInfo().getWithOffset(StartOffset),
false, false, false, 0);
SmallVector<int, 8> Mask;
for (unsigned i = 0; i != NumElems; ++i)
Mask.push_back(EltNo);
return DAG.getVectorShuffle(NVT, dl, V1, DAG.getUNDEF(NVT), &Mask[0]);
}
return SDValue();
}
static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
SDLoc &DL, SelectionDAG &DAG,
bool isAfterLegalize) {
unsigned NumElems = Elts.size();
LoadSDNode *LDBase = nullptr;
unsigned LastLoadedElt = -1U;
for (unsigned i = 0; i < NumElems; ++i) {
SDValue Elt = Elts[i];
if (Elt.getNode() && Elt.getOpcode() == ISD::BITCAST)
Elt = Elt.getOperand(0);
if (!Elt.getNode() ||
(Elt.getOpcode() != ISD::UNDEF && !ISD::isNON_EXTLoad(Elt.getNode())))
return SDValue();
if (!LDBase) {
if (Elt.getNode()->getOpcode() == ISD::UNDEF)
return SDValue();
LDBase = cast<LoadSDNode>(Elt.getNode());
LastLoadedElt = i;
continue;
}
if (Elt.getOpcode() == ISD::UNDEF)
continue;
LoadSDNode *LD = cast<LoadSDNode>(Elt);
EVT LdVT = Elt.getValueType();
if (LdVT.getSizeInBits() != VT.getSizeInBits() / NumElems)
return SDValue();
if (!DAG.isConsecutiveLoad(LD, LDBase, LdVT.getSizeInBits() / 8, i))
return SDValue();
LastLoadedElt = i;
}
if (LastLoadedElt == NumElems - 1) {
assert(LDBase && "Did not find base load for merging consecutive loads");
EVT EltVT = LDBase->getValueType(0);
if (VT.getSizeInBits() != EltVT.getSizeInBits() * NumElems)
return SDValue();
if (isAfterLegalize &&
!DAG.getTargetLoweringInfo().isOperationLegal(ISD::LOAD, VT))
return SDValue();
SDValue NewLd = SDValue();
NewLd = DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
LDBase->getPointerInfo(), LDBase->isVolatile(),
LDBase->isNonTemporal(), LDBase->isInvariant(),
LDBase->getAlignment());
if (LDBase->hasAnyUseOfValue(1)) {
SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
SDValue(LDBase, 1),
SDValue(NewLd.getNode(), 1));
DAG.ReplaceAllUsesOfValueWith(SDValue(LDBase, 1), NewChain);
DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(LDBase, 1),
SDValue(NewLd.getNode(), 1));
}
return NewLd;
}
EVT EltVT = VT.getVectorElementType();
if (NumElems == 4 && LastLoadedElt == 1 && (EltVT.getSizeInBits() == 32) &&
DAG.getTargetLoweringInfo().isTypeLegal(MVT::v2i64)) {
SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() };
SDValue ResNode =
DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys, Ops, MVT::i64,
LDBase->getPointerInfo(),
LDBase->getAlignment(),
false, true,
false);
if (LDBase->hasAnyUseOfValue(1)) {
SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
SDValue(LDBase, 1), SDValue(ResNode.getNode(), 1));
DAG.ReplaceAllUsesOfValueWith(SDValue(LDBase, 1), NewChain);
DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(LDBase, 1),
SDValue(ResNode.getNode(), 1));
}
return DAG.getNode(ISD::BITCAST, DL, VT, ResNode);
}
return SDValue();
}
static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget* Subtarget,
SelectionDAG &DAG) {
if (!Subtarget->hasAVX())
return SDValue();
MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) &&
"Unsupported vector type for broadcast.");
SDValue Ld;
bool ConstSplatVal;
switch (Op.getOpcode()) {
default:
return SDValue();
case ISD::BUILD_VECTOR: {
auto *BVOp = cast<BuildVectorSDNode>(Op.getNode());
BitVector UndefElements;
SDValue Splat = BVOp->getSplatValue(&UndefElements);
if (!Splat || (VT.getVectorNumElements() - UndefElements.count()) <= 1)
return SDValue();
Ld = Splat;
ConstSplatVal = (Ld.getOpcode() == ISD::Constant ||
Ld.getOpcode() == ISD::ConstantFP);
if (!ConstSplatVal && !BVOp->isOnlyUserOf(Ld.getNode()))
return SDValue();
break;
}
case ISD::VECTOR_SHUFFLE: {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
if ((!SVOp->isSplat()) || SVOp->getMaskElt(0) != 0)
return SDValue();
SDValue Sc = Op.getOperand(0);
if (Sc.getOpcode() != ISD::SCALAR_TO_VECTOR &&
Sc.getOpcode() != ISD::BUILD_VECTOR) {
if (!Subtarget->hasInt256())
return SDValue();
if (VT.getSizeInBits() >= 256)
Sc = Extract128BitVector(Sc, 0, DAG, dl);
return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Sc);
}
Ld = Sc.getOperand(0);
ConstSplatVal = (Ld.getOpcode() == ISD::Constant ||
Ld.getOpcode() == ISD::ConstantFP);
bool hasRegVer = Subtarget->hasAVX512() && VT.is512BitVector() &&
Ld.getValueType().getSizeInBits() >= 32;
if (!ConstSplatVal && ((!Sc.hasOneUse() || !Ld.hasOneUse()) &&
!hasRegVer))
return SDValue();
break;
}
}
unsigned ScalarSize = Ld.getValueType().getSizeInBits();
bool IsGE256 = (VT.getSizeInBits() >= 256);
const Function *F = DAG.getMachineFunction().getFunction();
bool OptForSize = F->hasFnAttribute(Attribute::OptimizeForSize);
if (ConstSplatVal && (Subtarget->hasAVX2() || OptForSize)) {
EVT CVT = Ld.getValueType();
assert(!CVT.isVector() && "Must not broadcast a vector type");
if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64) ||
(OptForSize && (ScalarSize == 64 || Subtarget->hasAVX2()))) {
const Constant *C = nullptr;
if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Ld))
C = CI->getConstantIntValue();
else if (ConstantFPSDNode *CF = dyn_cast<ConstantFPSDNode>(Ld))
C = CF->getConstantFPValue();
assert(C && "Invalid constant type");
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue CP = DAG.getConstantPool(C, TLI.getPointerTy());
unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
Ld = DAG.getLoad(CVT, dl, DAG.getEntryNode(), CP,
MachinePointerInfo::getConstantPool(),
false, false, false, Alignment);
return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
}
}
bool IsLoad = ISD::isNormalLoad(Ld.getNode());
if (!IsLoad && Subtarget->hasInt256() &&
(ScalarSize == 32 || (IsGE256 && ScalarSize == 64)))
return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
if (!IsLoad)
return SDValue();
if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64) ||
(Subtarget->hasVLX() && ScalarSize == 64))
return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
if (Subtarget->hasInt256() && Ld.getValueType().isInteger()) {
if (ScalarSize == 8 || ScalarSize == 16 || ScalarSize == 64)
return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
}
return SDValue();
}
static int getUnderlyingExtractedFromVec(SDValue &ExtractedFromVec,
SDValue ExtIdx) {
int Idx = cast<ConstantSDNode>(ExtIdx)->getZExtValue();
if (!isa<ShuffleVectorSDNode>(ExtractedFromVec))
return Idx;
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(ExtractedFromVec);
SDValue ShuffleVec = SVOp->getOperand(0);
MVT ShuffleVecVT = ShuffleVec.getSimpleValueType();
assert(ShuffleVecVT.getVectorElementType() ==
ExtractedFromVec.getSimpleValueType().getVectorElementType());
int ShuffleIdx = SVOp->getMaskElt(Idx);
if (isUndefOrInRange(ShuffleIdx, 0, ShuffleVecVT.getVectorNumElements())) {
ExtractedFromVec = ShuffleVec;
return ShuffleIdx;
}
return Idx;
}
static SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!TLI.isOperationLegalOrCustom(ISD::INSERT_VECTOR_ELT, VT))
return SDValue();
SDLoc DL(Op);
unsigned NumElems = Op.getNumOperands();
SDValue VecIn1;
SDValue VecIn2;
SmallVector<unsigned, 4> InsertIndices;
SmallVector<int, 8> Mask(NumElems, -1);
for (unsigned i = 0; i != NumElems; ++i) {
unsigned Opc = Op.getOperand(i).getOpcode();
if (Opc == ISD::UNDEF)
continue;
if (Opc != ISD::EXTRACT_VECTOR_ELT) {
if (InsertIndices.size() > 1)
return SDValue();
InsertIndices.push_back(i);
continue;
}
SDValue ExtractedFromVec = Op.getOperand(i).getOperand(0);
SDValue ExtIdx = Op.getOperand(i).getOperand(1);
if (!isa<ConstantSDNode>(ExtIdx))
return SDValue();
int Idx = getUnderlyingExtractedFromVec(ExtractedFromVec, ExtIdx);
if (ExtractedFromVec.getValueType() != VT)
return SDValue();
if (!VecIn1.getNode())
VecIn1 = ExtractedFromVec;
else if (VecIn1 != ExtractedFromVec) {
if (!VecIn2.getNode())
VecIn2 = ExtractedFromVec;
else if (VecIn2 != ExtractedFromVec)
return SDValue();
}
if (ExtractedFromVec == VecIn1)
Mask[i] = Idx;
else if (ExtractedFromVec == VecIn2)
Mask[i] = Idx + NumElems;
}
if (!VecIn1.getNode())
return SDValue();
VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
SDValue NV = DAG.getVectorShuffle(VT, DL, VecIn1, VecIn2, &Mask[0]);
for (unsigned i = 0, e = InsertIndices.size(); i != e; ++i) {
unsigned Idx = InsertIndices[i];
NV = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, NV, Op.getOperand(Idx),
DAG.getIntPtrConstant(Idx));
}
return NV;
}
SDValue
X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const {
MVT VT = Op.getSimpleValueType();
assert((VT.getVectorElementType() == MVT::i1) && (VT.getSizeInBits() <= 16) &&
"Unexpected type in LowerBUILD_VECTORvXi1!");
SDLoc dl(Op);
if (ISD::isBuildVectorAllZeros(Op.getNode())) {
SDValue Cst = DAG.getTargetConstant(0, MVT::i1);
SmallVector<SDValue, 16> Ops(VT.getVectorNumElements(), Cst);
return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
}
if (ISD::isBuildVectorAllOnes(Op.getNode())) {
SDValue Cst = DAG.getTargetConstant(1, MVT::i1);
SmallVector<SDValue, 16> Ops(VT.getVectorNumElements(), Cst);
return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
}
bool AllContants = true;
uint64_t Immediate = 0;
int NonConstIdx = -1;
bool IsSplat = true;
unsigned NumNonConsts = 0;
unsigned NumConsts = 0;
for (unsigned idx = 0, e = Op.getNumOperands(); idx < e; ++idx) {
SDValue In = Op.getOperand(idx);
if (In.getOpcode() == ISD::UNDEF)
continue;
if (!isa<ConstantSDNode>(In)) {
AllContants = false;
NonConstIdx = idx;
NumNonConsts++;
} else {
NumConsts++;
if (cast<ConstantSDNode>(In)->getZExtValue())
Immediate |= (1ULL << idx);
}
if (In != Op.getOperand(0))
IsSplat = false;
}
if (AllContants) {
SDValue FullMask = DAG.getNode(ISD::BITCAST, dl, MVT::v16i1,
DAG.getConstant(Immediate, MVT::i16));
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, FullMask,
DAG.getIntPtrConstant(0));
}
if (NumNonConsts == 1 && NonConstIdx != 0) {
SDValue DstVec;
if (NumConsts) {
SDValue VecAsImm = DAG.getConstant(Immediate,
MVT::getIntegerVT(VT.getSizeInBits()));
DstVec = DAG.getNode(ISD::BITCAST, dl, VT, VecAsImm);
}
else
DstVec = DAG.getUNDEF(VT);
return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DstVec,
Op.getOperand(NonConstIdx),
DAG.getIntPtrConstant(NonConstIdx));
}
if (!IsSplat && (NonConstIdx != 0))
llvm_unreachable("Unsupported BUILD_VECTOR operation");
MVT SelectVT = (VT == MVT::v16i1)? MVT::i16 : MVT::i8;
SDValue Select;
if (IsSplat)
Select = DAG.getNode(ISD::SELECT, dl, SelectVT, Op.getOperand(0),
DAG.getConstant(-1, SelectVT),
DAG.getConstant(0, SelectVT));
else
Select = DAG.getNode(ISD::SELECT, dl, SelectVT, Op.getOperand(0),
DAG.getConstant((Immediate | 1), SelectVT),
DAG.getConstant(Immediate, SelectVT));
return DAG.getNode(ISD::BITCAST, dl, VT, Select);
}
static bool isHorizontalBinOp(const BuildVectorSDNode *N, unsigned Opcode,
SelectionDAG &DAG,
unsigned BaseIdx, unsigned LastIdx,
SDValue &V0, SDValue &V1) {
EVT VT = N->getValueType(0);
assert(BaseIdx * 2 <= LastIdx && "Invalid Indices in input!");
assert(VT.isVector() && VT.getVectorNumElements() >= LastIdx &&
"Invalid Vector in input!");
bool IsCommutable = (Opcode == ISD::ADD || Opcode == ISD::FADD);
bool CanFold = true;
unsigned ExpectedVExtractIdx = BaseIdx;
unsigned NumElts = LastIdx - BaseIdx;
V0 = DAG.getUNDEF(VT);
V1 = DAG.getUNDEF(VT);
for (unsigned i = 0, e = NumElts; i != e && CanFold; ++i) {
SDValue Op = N->getOperand(i + BaseIdx);
if (Op->getOpcode() == ISD::UNDEF) {
if (i * 2 == NumElts)
ExpectedVExtractIdx = BaseIdx;
ExpectedVExtractIdx += 2;
continue;
}
CanFold = Op->getOpcode() == Opcode && Op->hasOneUse();
if (!CanFold)
break;
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
CanFold = (Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
Op1.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
Op0.getOperand(0) == Op1.getOperand(0) &&
isa<ConstantSDNode>(Op0.getOperand(1)) &&
isa<ConstantSDNode>(Op1.getOperand(1)));
if (!CanFold)
break;
unsigned I0 = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
unsigned I1 = cast<ConstantSDNode>(Op1.getOperand(1))->getZExtValue();
if (i * 2 < NumElts) {
if (V0.getOpcode() == ISD::UNDEF) {
V0 = Op0.getOperand(0);
if (V0.getValueType() != VT)
return false;
}
} else {
if (V1.getOpcode() == ISD::UNDEF) {
V1 = Op0.getOperand(0);
if (V1.getValueType() != VT)
return false;
}
if (i * 2 == NumElts)
ExpectedVExtractIdx = BaseIdx;
}
SDValue Expected = (i * 2 < NumElts) ? V0 : V1;
if (I0 == ExpectedVExtractIdx)
CanFold = I1 == I0 + 1 && Op0.getOperand(0) == Expected;
else if (IsCommutable && I1 == ExpectedVExtractIdx) {
CanFold = I0 == I1 + 1 && Op1.getOperand(0) == Expected;
} else
CanFold = false;
ExpectedVExtractIdx += 2;
}
return CanFold;
}
static SDValue ExpandHorizontalBinOp(const SDValue &V0, const SDValue &V1,
SDLoc DL, SelectionDAG &DAG,
unsigned X86Opcode, bool Mode,
bool isUndefLO, bool isUndefHI) {
EVT VT = V0.getValueType();
assert(VT.is256BitVector() && VT == V1.getValueType() &&
"Invalid nodes in input!");
unsigned NumElts = VT.getVectorNumElements();
SDValue V0_LO = Extract128BitVector(V0, 0, DAG, DL);
SDValue V0_HI = Extract128BitVector(V0, NumElts/2, DAG, DL);
SDValue V1_LO = Extract128BitVector(V1, 0, DAG, DL);
SDValue V1_HI = Extract128BitVector(V1, NumElts/2, DAG, DL);
EVT NewVT = V0_LO.getValueType();
SDValue LO = DAG.getUNDEF(NewVT);
SDValue HI = DAG.getUNDEF(NewVT);
if (Mode) {
if (!isUndefLO && V0->getOpcode() != ISD::UNDEF)
LO = DAG.getNode(X86Opcode, DL, NewVT, V0_LO, V0_HI);
if (!isUndefHI && V1->getOpcode() != ISD::UNDEF)
HI = DAG.getNode(X86Opcode, DL, NewVT, V1_LO, V1_HI);
} else {
if (!isUndefLO && (V0_LO->getOpcode() != ISD::UNDEF ||
V1_LO->getOpcode() != ISD::UNDEF))
LO = DAG.getNode(X86Opcode, DL, NewVT, V0_LO, V1_LO);
if (!isUndefHI && (V0_HI->getOpcode() != ISD::UNDEF ||
V1_HI->getOpcode() != ISD::UNDEF))
HI = DAG.getNode(X86Opcode, DL, NewVT, V0_HI, V1_HI);
}
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LO, HI);
}
static SDValue matchAddSub(const BuildVectorSDNode *BV, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
SDLoc DL(BV);
EVT VT = BV->getValueType(0);
unsigned NumElts = VT.getVectorNumElements();
SDValue InVec0 = DAG.getUNDEF(VT);
SDValue InVec1 = DAG.getUNDEF(VT);
assert((VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v4f32 ||
VT == MVT::v2f64) && "build_vector with an invalid type found!");
unsigned ExpectedOpcode = ISD::FSUB;
unsigned NextExpectedOpcode = ISD::FADD;
bool AddFound = false;
bool SubFound = false;
for (unsigned i = 0, e = NumElts; i != e; i++) {
SDValue Op = BV->getOperand(i);
unsigned Opcode = Op.getOpcode();
if (Opcode == ISD::UNDEF) {
std::swap(ExpectedOpcode, NextExpectedOpcode);
continue;
}
if (Opcode != ExpectedOpcode)
return SDValue();
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
if (Op0.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
Op1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
!isa<ConstantSDNode>(Op0.getOperand(1)) ||
!isa<ConstantSDNode>(Op1.getOperand(1)) ||
Op0.getOperand(1) != Op1.getOperand(1))
return SDValue();
unsigned I0 = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
if (I0 != i)
return SDValue();
if (i & 1)
AddFound = true;
else
SubFound = true;
if (InVec0.getOpcode() == ISD::UNDEF) {
InVec0 = Op0.getOperand(0);
if (InVec0.getValueType() != VT)
return SDValue();
}
if (InVec1.getOpcode() == ISD::UNDEF) {
InVec1 = Op1.getOperand(0);
if (InVec1.getValueType() != VT)
return SDValue();
}
if (InVec0 != Op0.getOperand(0)) {
if (ExpectedOpcode == ISD::FSUB)
return SDValue();
std::swap(Op0, Op1);
if (InVec0 != Op0.getOperand(0))
return SDValue();
}
if (InVec1 != Op1.getOperand(0))
return SDValue();
std::swap(ExpectedOpcode, NextExpectedOpcode);
}
if (AddFound && SubFound && InVec0.getOpcode() != ISD::UNDEF &&
InVec1.getOpcode() != ISD::UNDEF)
return DAG.getNode(X86ISD::ADDSUB, DL, VT, InVec0, InVec1);
return SDValue();
}
static SDValue PerformBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
SDLoc DL(N);
EVT VT = N->getValueType(0);
unsigned NumElts = VT.getVectorNumElements();
BuildVectorSDNode *BV = cast<BuildVectorSDNode>(N);
SDValue InVec0, InVec1;
if ((Subtarget->hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) ||
(Subtarget->hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) {
SDValue Value = matchAddSub(BV, DAG, Subtarget);
if (Value.getNode())
return Value;
}
unsigned NumUndefsLO = 0;
unsigned NumUndefsHI = 0;
unsigned Half = NumElts/2;
for (unsigned i = 0, e = Half; i != e; ++i)
if (BV->getOperand(i)->getOpcode() == ISD::UNDEF)
NumUndefsLO++;
for (unsigned i = Half, e = NumElts; i != e; ++i)
if (BV->getOperand(i)->getOpcode() == ISD::UNDEF)
NumUndefsHI++;
if (NumUndefsLO + NumUndefsHI + 1 >= NumElts)
return SDValue();
if ((VT == MVT::v4f32 || VT == MVT::v2f64) && Subtarget->hasSSE3()) {
if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, NumElts, InVec0, InVec1))
return DAG.getNode(X86ISD::FHADD, DL, VT, InVec0, InVec1);
if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, NumElts, InVec0, InVec1))
return DAG.getNode(X86ISD::FHSUB, DL, VT, InVec0, InVec1);
} else if ((VT == MVT::v4i32 || VT == MVT::v8i16) && Subtarget->hasSSSE3()) {
if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, NumElts, InVec0, InVec1))
return DAG.getNode(X86ISD::HADD, DL, VT, InVec0, InVec1);
if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, NumElts, InVec0, InVec1))
return DAG.getNode(X86ISD::HSUB, DL, VT, InVec0, InVec1);
}
if (!Subtarget->hasAVX())
return SDValue();
if ((VT == MVT::v8f32 || VT == MVT::v4f64)) {
SDValue InVec2, InVec3;
if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, Half, InVec0, InVec1) &&
isHorizontalBinOp(BV, ISD::FADD, DAG, Half, NumElts, InVec2, InVec3) &&
((InVec0.getOpcode() == ISD::UNDEF ||
InVec2.getOpcode() == ISD::UNDEF) || InVec0 == InVec2) &&
((InVec1.getOpcode() == ISD::UNDEF ||
InVec3.getOpcode() == ISD::UNDEF) || InVec1 == InVec3))
return DAG.getNode(X86ISD::FHADD, DL, VT, InVec0, InVec1);
if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, Half, InVec0, InVec1) &&
isHorizontalBinOp(BV, ISD::FSUB, DAG, Half, NumElts, InVec2, InVec3) &&
((InVec0.getOpcode() == ISD::UNDEF ||
InVec2.getOpcode() == ISD::UNDEF) || InVec0 == InVec2) &&
((InVec1.getOpcode() == ISD::UNDEF ||
InVec3.getOpcode() == ISD::UNDEF) || InVec1 == InVec3))
return DAG.getNode(X86ISD::FHSUB, DL, VT, InVec0, InVec1);
} else if (VT == MVT::v8i32 || VT == MVT::v16i16) {
SDValue InVec2, InVec3;
unsigned X86Opcode;
bool CanFold = true;
if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, Half, InVec0, InVec1) &&
isHorizontalBinOp(BV, ISD::ADD, DAG, Half, NumElts, InVec2, InVec3) &&
((InVec0.getOpcode() == ISD::UNDEF ||
InVec2.getOpcode() == ISD::UNDEF) || InVec0 == InVec2) &&
((InVec1.getOpcode() == ISD::UNDEF ||
InVec3.getOpcode() == ISD::UNDEF) || InVec1 == InVec3))
X86Opcode = X86ISD::HADD;
else if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, Half, InVec0, InVec1) &&
isHorizontalBinOp(BV, ISD::SUB, DAG, Half, NumElts, InVec2, InVec3) &&
((InVec0.getOpcode() == ISD::UNDEF ||
InVec2.getOpcode() == ISD::UNDEF) || InVec0 == InVec2) &&
((InVec1.getOpcode() == ISD::UNDEF ||
InVec3.getOpcode() == ISD::UNDEF) || InVec1 == InVec3))
X86Opcode = X86ISD::HSUB;
else
CanFold = false;
if (CanFold) {
if (Subtarget->hasAVX2())
return DAG.getNode(X86Opcode, DL, VT, InVec0, InVec1);
if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half)
return SDValue();
bool isUndefLO = NumUndefsLO == Half;
bool isUndefHI = NumUndefsHI == Half;
return ExpandHorizontalBinOp(InVec0, InVec1, DL, DAG, X86Opcode, false,
isUndefLO, isUndefHI);
}
}
if ((VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v8i32 ||
VT == MVT::v16i16) && Subtarget->hasAVX()) {
unsigned X86Opcode;
if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, NumElts, InVec0, InVec1))
X86Opcode = X86ISD::HADD;
else if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, NumElts, InVec0, InVec1))
X86Opcode = X86ISD::HSUB;
else if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, NumElts, InVec0, InVec1))
X86Opcode = X86ISD::FHADD;
else if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, NumElts, InVec0, InVec1))
X86Opcode = X86ISD::FHSUB;
else
return SDValue();
if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half)
return SDValue();
bool isUndefLO = NumUndefsLO == Half;
bool isUndefHI = NumUndefsHI == Half;
return ExpandHorizontalBinOp(InVec0, InVec1, DL, DAG, X86Opcode, true,
isUndefLO, isUndefHI);
}
return SDValue();
}
SDValue
X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
MVT VT = Op.getSimpleValueType();
MVT ExtVT = VT.getVectorElementType();
unsigned NumElems = Op.getNumOperands();
if (VT.getScalarType() == MVT::i1 && Subtarget->hasAVX512())
return LowerBUILD_VECTORvXi1(Op, DAG);
if (ISD::isBuildVectorAllZeros(Op.getNode())) {
if (VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32)
return Op;
return getZeroVector(VT, Subtarget, DAG, dl);
}
if (Subtarget->hasSSE2() && ISD::isBuildVectorAllOnes(Op.getNode())) {
if (VT == MVT::v4i32 || (VT == MVT::v8i32 && Subtarget->hasInt256()))
return Op;
if (!VT.is512BitVector())
return getOnesVector(VT, Subtarget->hasInt256(), DAG, dl);
}
SDValue Broadcast = LowerVectorBroadcast(Op, Subtarget, DAG);
if (Broadcast.getNode())
return Broadcast;
unsigned EVTBits = ExtVT.getSizeInBits();
unsigned NumZero = 0;
unsigned NumNonZero = 0;
unsigned NonZeros = 0;
bool IsAllConstants = true;
SmallSet<SDValue, 8> Values;
for (unsigned i = 0; i < NumElems; ++i) {
SDValue Elt = Op.getOperand(i);
if (Elt.getOpcode() == ISD::UNDEF)
continue;
Values.insert(Elt);
if (Elt.getOpcode() != ISD::Constant &&
Elt.getOpcode() != ISD::ConstantFP)
IsAllConstants = false;
if (X86::isZeroNode(Elt))
NumZero++;
else {
NonZeros |= (1 << i);
NumNonZero++;
}
}
if (NumNonZero == 0)
return DAG.getUNDEF(VT);
if (NumNonZero == 1) {
unsigned Idx = countTrailingZeros(NonZeros);
SDValue Item = Op.getOperand(Idx);
if (ExtVT == MVT::i64 && !Subtarget->is64Bit() &&
(!IsAllConstants || Idx == 0)) {
if (DAG.MaskedValueIsZero(Item, APInt::getBitsSet(64, 32, 64))) {
assert(VT == MVT::v2i64 && "Expected an SSE value type!");
EVT VecVT = MVT::v4i32;
unsigned VecElts = 4;
Item = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Item);
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Item);
if (ExperimentalVectorShuffleLowering)
return DAG.getNode(
ISD::BITCAST, dl, VT,
getShuffleVectorZeroOrUndef(Item, Idx * 2, true, Subtarget, DAG));
Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
if (Idx != 0) {
SmallVector<int, 4> Mask;
Mask.push_back(Idx);
for (unsigned i = 1; i != VecElts; ++i)
Mask.push_back(i);
Item = DAG.getVectorShuffle(VecVT, dl, Item, DAG.getUNDEF(VecVT),
&Mask[0]);
}
return DAG.getNode(ISD::BITCAST, dl, VT, Item);
}
}
if (Idx == 0) {
if (NumZero == 0)
return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
if (ExtVT == MVT::i32 || ExtVT == MVT::f32 || ExtVT == MVT::f64 ||
(ExtVT == MVT::i64 && Subtarget->is64Bit())) {
if (VT.is256BitVector() || VT.is512BitVector()) {
SDValue ZeroVec = getZeroVector(VT, Subtarget, DAG, dl);
return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, ZeroVec,
Item, DAG.getIntPtrConstant(0));
}
assert(VT.is128BitVector() && "Expected an SSE value type!");
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
}
if (ExtVT == MVT::i16 || ExtVT == MVT::i8) {
Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item);
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item);
if (VT.is256BitVector()) {
SDValue ZeroVec = getZeroVector(MVT::v8i32, Subtarget, DAG, dl);
Item = Insert128BitVector(ZeroVec, Item, 0, DAG, dl);
} else {
assert(VT.is128BitVector() && "Expected an SSE value type!");
Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
}
return DAG.getNode(ISD::BITCAST, dl, VT, Item);
}
}
if (NumElems == 2 && Idx == 1 &&
X86::isZeroNode(Op.getOperand(0)) &&
!X86::isZeroNode(Op.getOperand(1))) {
unsigned NumBits = VT.getSizeInBits();
return getVShift(true, VT,
DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
VT, Op.getOperand(1)),
NumBits/2, DAG, *this, dl);
}
if (IsAllConstants) return SDValue();
if (EVTBits == 32) {
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
if (ExperimentalVectorShuffleLowering)
return getShuffleVectorZeroOrUndef(Item, Idx, NumZero > 0, Subtarget, DAG);
Item = getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0, Subtarget, DAG);
SmallVector<int, 8> MaskVec;
for (unsigned i = 0; i != NumElems; ++i)
MaskVec.push_back(i == Idx ? 0 : 1);
return DAG.getVectorShuffle(VT, dl, Item, DAG.getUNDEF(VT), &MaskVec[0]);
}
}
if (Values.size() == 1) {
if (EVTBits == 32) {
unsigned Idx = countTrailingZeros(NonZeros);
SDValue Item = Op.getOperand(Idx);
if (Op.getNode()->isOnlyUserOf(Item.getNode()))
return LowerAsSplatVectorLoad(Item, VT, dl, DAG);
}
return SDValue();
}
if (IsAllConstants)
return SDValue();
if (VT.is256BitVector() || VT.is512BitVector()) {
SmallVector<SDValue, 64> V;
for (unsigned i = 0; i != NumElems; ++i)
V.push_back(Op.getOperand(i));
if (SDValue LD = EltsFromConsecutiveLoads(VT, V, dl, DAG, false))
return LD;
EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElems/2);
SDValue Lower = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT,
makeArrayRef(&V[0], NumElems/2));
SDValue Upper = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT,
makeArrayRef(&V[NumElems / 2], NumElems/2));
if (VT.is256BitVector())
return Concat128BitVectors(Lower, Upper, VT, NumElems, DAG, dl);
return Concat256BitVectors(Lower, Upper, VT, NumElems, DAG, dl);
}
if (EVTBits == 64) {
if (NumNonZero == 1) {
unsigned Idx = countTrailingZeros(NonZeros);
SDValue V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT,
Op.getOperand(Idx));
return getShuffleVectorZeroOrUndef(V2, Idx, true, Subtarget, DAG);
}
return SDValue();
}
if (EVTBits == 8 && NumElems == 16) {
SDValue V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG,
Subtarget, *this);
if (V.getNode()) return V;
}
if (EVTBits == 16 && NumElems == 8) {
SDValue V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG,
Subtarget, *this);
if (V.getNode()) return V;
}
if (EVTBits == 32 && NumElems == 4) {
SDValue V = LowerBuildVectorv4x32(Op, DAG, Subtarget, *this);
if (V.getNode())
return V;
}
SmallVector<SDValue, 8> V(NumElems);
if (NumElems == 4 && NumZero > 0) {
for (unsigned i = 0; i < 4; ++i) {
bool isZero = !(NonZeros & (1 << i));
if (isZero)
V[i] = getZeroVector(VT, Subtarget, DAG, dl);
else
V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i));
}
for (unsigned i = 0; i < 2; ++i) {
switch ((NonZeros & (0x3 << i*2)) >> (i*2)) {
default: break;
case 0:
V[i] = V[i*2]; break;
case 1:
V[i] = getMOVL(DAG, dl, VT, V[i*2+1], V[i*2]);
break;
case 2:
V[i] = getMOVL(DAG, dl, VT, V[i*2], V[i*2+1]);
break;
case 3:
V[i] = getUnpackl(DAG, dl, VT, V[i*2], V[i*2+1]);
break;
}
}
bool Reverse1 = (NonZeros & 0x3) == 2;
bool Reverse2 = ((NonZeros & (0x3 << 2)) >> 2) == 2;
int MaskVec[] = {
Reverse1 ? 1 : 0,
Reverse1 ? 0 : 1,
static_cast<int>(Reverse2 ? NumElems+1 : NumElems),
static_cast<int>(Reverse2 ? NumElems : NumElems+1)
};
return DAG.getVectorShuffle(VT, dl, V[0], V[1], &MaskVec[0]);
}
if (Values.size() > 1 && VT.is128BitVector()) {
for (unsigned i = 0; i < NumElems; ++i)
V[i] = Op.getOperand(i);
SDValue LD = EltsFromConsecutiveLoads(VT, V, dl, DAG, false);
if (LD.getNode())
return LD;
SDValue Sh = buildFromShuffleMostly(Op, DAG);
if (Sh.getNode())
return Sh;
if (Subtarget->hasSSE41()) {
SDValue Result;
if (Op.getOperand(0).getOpcode() != ISD::UNDEF)
Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(0));
else
Result = DAG.getUNDEF(VT);
for (unsigned i = 1; i < NumElems; ++i) {
if (Op.getOperand(i).getOpcode() == ISD::UNDEF) continue;
Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Result,
Op.getOperand(i), DAG.getIntPtrConstant(i));
}
return Result;
}
for (unsigned i = 0; i < NumElems; ++i) {
if (Op.getOperand(i).getOpcode() != ISD::UNDEF)
V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i));
else
V[i] = DAG.getUNDEF(VT);
}
unsigned EltStride = NumElems >> 1;
while (EltStride != 0) {
for (unsigned i = 0; i < EltStride; ++i) {
if (V[i+EltStride].getOpcode() == ISD::UNDEF &&
EltStride == NumElems/2)
continue;
V[i] = getUnpackl(DAG, dl, VT, V[i], V[i + EltStride]);
}
EltStride >>= 1;
}
return V[0];
}
return SDValue();
}
static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
SDLoc dl(Op);
MVT ResVT = Op.getSimpleValueType();
assert((ResVT.is256BitVector() ||
ResVT.is512BitVector()) && "Value type must be 256-/512-bit wide");
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
unsigned NumElems = ResVT.getVectorNumElements();
if(ResVT.is256BitVector())
return Concat128BitVectors(V1, V2, ResVT, NumElems, DAG, dl);
if (Op.getNumOperands() == 4) {
MVT HalfVT = MVT::getVectorVT(ResVT.getScalarType(),
ResVT.getVectorNumElements()/2);
SDValue V3 = Op.getOperand(2);
SDValue V4 = Op.getOperand(3);
return Concat256BitVectors(Concat128BitVectors(V1, V2, HalfVT, NumElems/2, DAG, dl),
Concat128BitVectors(V3, V4, HalfVT, NumElems/2, DAG, dl), ResVT, NumElems, DAG, dl);
}
return Concat256BitVectors(V1, V2, ResVT, NumElems, DAG, dl);
}
static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
MVT LLVM_ATTRIBUTE_UNUSED VT = Op.getSimpleValueType();
assert((VT.is256BitVector() && Op.getNumOperands() == 2) ||
(VT.is512BitVector() && (Op.getNumOperands() == 2 ||
Op.getNumOperands() == 4)));
return LowerAVXCONCAT_VECTORS(Op, DAG);
}
static bool isNoopShuffleMask(ArrayRef<int> Mask) {
for (int i = 0, Size = Mask.size(); i < Size; ++i)
if (Mask[i] != -1 && Mask[i] != i)
return false;
return true;
}
static bool isSingleInputShuffleMask(ArrayRef<int> Mask) {
for (int M : Mask)
if (M >= (int)Mask.size())
return false;
return true;
}
static bool is128BitLaneCrossingShuffleMask(MVT VT, ArrayRef<int> Mask) {
int LaneSize = 128 / VT.getScalarSizeInBits();
int Size = Mask.size();
for (int i = 0; i < Size; ++i)
if (Mask[i] >= 0 && (Mask[i] % Size) / LaneSize != i / LaneSize)
return true;
return false;
}
static bool
is128BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask,
SmallVectorImpl<int> &RepeatedMask) {
int LaneSize = 128 / VT.getScalarSizeInBits();
RepeatedMask.resize(LaneSize, -1);
int Size = Mask.size();
for (int i = 0; i < Size; ++i) {
if (Mask[i] < 0)
continue;
if ((Mask[i] % Size) / LaneSize != i / LaneSize)
return false;
if (RepeatedMask[i % LaneSize] == -1)
RepeatedMask[i % LaneSize] =
Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + Size;
else if (RepeatedMask[i % LaneSize] + (i / LaneSize) * LaneSize != Mask[i])
return false;
}
return true;
}
namespace {
bool isShuffleEquivalentImpl(ArrayRef<int> Mask, ArrayRef<const int *> Args) {
if (Mask.size() != Args.size())
return false;
for (int i = 0, e = Mask.size(); i < e; ++i) {
assert(*Args[i] >= 0 && "Arguments must be positive integers!");
if (Mask[i] != -1 && Mask[i] != *Args[i])
return false;
}
return true;
}
}
static const VariadicFunction1<
bool, ArrayRef<int>, int, isShuffleEquivalentImpl> isShuffleEquivalent = {};
static SDValue getV4X86ShuffleImm8ForMask(ArrayRef<int> Mask,
SelectionDAG &DAG) {
assert(Mask.size() == 4 && "Only 4-lane shuffle masks");
assert(Mask[0] >= -1 && Mask[0] < 4 && "Out of bound mask element!");
assert(Mask[1] >= -1 && Mask[1] < 4 && "Out of bound mask element!");
assert(Mask[2] >= -1 && Mask[2] < 4 && "Out of bound mask element!");
assert(Mask[3] >= -1 && Mask[3] < 4 && "Out of bound mask element!");
unsigned Imm = 0;
Imm |= (Mask[0] == -1 ? 0 : Mask[0]) << 0;
Imm |= (Mask[1] == -1 ? 1 : Mask[1]) << 2;
Imm |= (Mask[2] == -1 ? 2 : Mask[2]) << 4;
Imm |= (Mask[3] == -1 ? 3 : Mask[3]) << 6;
return DAG.getConstant(Imm, MVT::i8);
}
static SDValue lowerVectorShuffleAsBlend(SDLoc DL, MVT VT, SDValue V1,
SDValue V2, ArrayRef<int> Mask,
const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
unsigned BlendMask = 0;
for (int i = 0, Size = Mask.size(); i < Size; ++i) {
if (Mask[i] >= Size) {
if (Mask[i] != i + Size)
return SDValue(); BlendMask |= 1u << i;
continue;
}
if (Mask[i] >= 0 && Mask[i] != i)
return SDValue(); }
switch (VT.SimpleTy) {
case MVT::v2f64:
case MVT::v4f32:
case MVT::v4f64:
case MVT::v8f32:
return DAG.getNode(X86ISD::BLENDI, DL, VT, V1, V2,
DAG.getConstant(BlendMask, MVT::i8));
case MVT::v4i64:
case MVT::v8i32:
assert(Subtarget->hasAVX2() && "256-bit integer blends require AVX2!");
case MVT::v2i64:
case MVT::v4i32:
if (Subtarget->hasAVX2()) {
int Scale = VT.getScalarSizeInBits() / 32;
BlendMask = 0;
for (int i = 0, Size = Mask.size(); i < Size; ++i)
if (Mask[i] >= Size)
for (int j = 0; j < Scale; ++j)
BlendMask |= 1u << (i * Scale + j);
MVT BlendVT = VT.getSizeInBits() > 128 ? MVT::v8i32 : MVT::v4i32;
V1 = DAG.getNode(ISD::BITCAST, DL, BlendVT, V1);
V2 = DAG.getNode(ISD::BITCAST, DL, BlendVT, V2);
return DAG.getNode(ISD::BITCAST, DL, VT,
DAG.getNode(X86ISD::BLENDI, DL, BlendVT, V1, V2,
DAG.getConstant(BlendMask, MVT::i8)));
}
case MVT::v8i16: {
int Scale = 8 / VT.getVectorNumElements();
BlendMask = 0;
for (int i = 0, Size = Mask.size(); i < Size; ++i)
if (Mask[i] >= Size)
for (int j = 0; j < Scale; ++j)
BlendMask |= 1u << (i * Scale + j);
V1 = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V1);
V2 = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V2);
return DAG.getNode(ISD::BITCAST, DL, VT,
DAG.getNode(X86ISD::BLENDI, DL, MVT::v8i16, V1, V2,
DAG.getConstant(BlendMask, MVT::i8)));
}
case MVT::v16i16: {
assert(Subtarget->hasAVX2() && "256-bit integer blends require AVX2!");
SmallVector<int, 8> RepeatedMask;
if (is128BitLaneRepeatedShuffleMask(MVT::v16i16, Mask, RepeatedMask)) {
assert(RepeatedMask.size() == 8 && "Repeated mask size doesn't match!");
BlendMask = 0;
for (int i = 0; i < 8; ++i)
if (RepeatedMask[i] >= 16)
BlendMask |= 1u << i;
return DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2,
DAG.getConstant(BlendMask, MVT::i8));
}
}
case MVT::v32i8: {
assert(Subtarget->hasAVX2() && "256-bit integer blends require AVX2!");
int Scale = VT.getScalarSizeInBits() / 8;
assert(Mask.size() * Scale == 32 && "Not a 256-bit vector!");
SDValue VSELECTMask[32];
for (int i = 0, Size = Mask.size(); i < Size; ++i)
for (int j = 0; j < Scale; ++j)
VSELECTMask[Scale * i + j] =
Mask[i] < 0 ? DAG.getUNDEF(MVT::i8)
: DAG.getConstant(Mask[i] < Size ? -1 : 0, MVT::i8);
V1 = DAG.getNode(ISD::BITCAST, DL, MVT::v32i8, V1);
V2 = DAG.getNode(ISD::BITCAST, DL, MVT::v32i8, V2);
return DAG.getNode(
ISD::BITCAST, DL, VT,
DAG.getNode(ISD::VSELECT, DL, MVT::v32i8,
DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v32i8, VSELECTMask),
V1, V2));
}
default:
llvm_unreachable("Not a supported integer vector type!");
}
}
static SDValue lowerVectorShuffleAsDecomposedShuffleBlend(SDLoc DL, MVT VT,
SDValue V1,
SDValue V2,
ArrayRef<int> Mask,
SelectionDAG &DAG) {
SmallVector<int, 32> V1Mask(Mask.size(), -1);
SmallVector<int, 32> V2Mask(Mask.size(), -1);
SmallVector<int, 32> BlendMask(Mask.size(), -1);
for (int i = 0, Size = Mask.size(); i < Size; ++i)
if (Mask[i] >= 0 && Mask[i] < Size) {
V1Mask[i] = Mask[i];
BlendMask[i] = i;
} else if (Mask[i] >= Size) {
V2Mask[i] = Mask[i] - Size;
BlendMask[i] = i + Size;
}
V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), V1Mask);
V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Mask);
return DAG.getVectorShuffle(VT, DL, V1, V2, BlendMask);
}
static SDValue lowerVectorShuffleAsByteRotate(SDLoc DL, MVT VT, SDValue V1,
SDValue V2,
ArrayRef<int> Mask,
const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
assert(!isNoopShuffleMask(Mask) && "We shouldn't lower no-op shuffles!");
int Rotation = 0;
SDValue Lo, Hi;
for (int i = 0, Size = Mask.size(); i < Size; ++i) {
if (Mask[i] == -1)
continue;
assert(Mask[i] >= 0 && "Only -1 is a valid negative mask element!");
int StartIdx = i - (Mask[i] % Size);
if (StartIdx == 0)
return SDValue();
int CandidateRotation = StartIdx < 0 ? -StartIdx : Size - StartIdx;
if (Rotation == 0)
Rotation = CandidateRotation;
else if (Rotation != CandidateRotation)
return SDValue();
SDValue MaskV = Mask[i] < Size ? V1 : V2;
SDValue &TargetV = StartIdx < 0 ? Hi : Lo;
if (!TargetV)
TargetV = MaskV;
else if (TargetV != MaskV)
return SDValue();
}
assert(Rotation != 0 && "Failed to locate a viable rotation!");
assert((Lo || Hi) && "Failed to find a rotated input vector!");
if (!Lo)
Lo = Hi;
else if (!Hi)
Hi = Lo;
assert(VT.getSizeInBits() == 128 &&
"Rotate-based lowering only supports 128-bit lowering!");
assert(Mask.size() <= 16 &&
"Can shuffle at most 16 bytes in a 128-bit vector!");
int Scale = 16 / Mask.size();
if (Subtarget->hasSSSE3()) {
Lo = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Lo);
Hi = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Hi);
return DAG.getNode(ISD::BITCAST, DL, VT,
DAG.getNode(X86ISD::PALIGNR, DL, MVT::v16i8, Hi, Lo,
DAG.getConstant(Rotation * Scale, MVT::i8)));
}
int LoByteShift = 16 - Rotation * Scale;
int HiByteShift = Rotation * Scale;
Lo = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Lo);
Hi = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Hi);
SDValue LoShift = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v2i64, Lo,
DAG.getConstant(8 * LoByteShift, MVT::i8));
SDValue HiShift = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v2i64, Hi,
DAG.getConstant(8 * HiByteShift, MVT::i8));
return DAG.getNode(ISD::BITCAST, DL, VT,
DAG.getNode(ISD::OR, DL, MVT::v2i64, LoShift, HiShift));
}
static SmallBitVector computeZeroableShuffleElements(ArrayRef<int> Mask,
SDValue V1, SDValue V2) {
SmallBitVector Zeroable(Mask.size(), false);
bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
for (int i = 0, Size = Mask.size(); i < Size; ++i) {
int M = Mask[i];
if (M < 0 || (M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
Zeroable[i] = true;
continue;
}
SDValue V = M < Size ? V1 : V2;
if (V.getOpcode() != ISD::BUILD_VECTOR)
continue;
SDValue Input = V.getOperand(M % Size);
if (Input.getOpcode() == ISD::UNDEF || X86::isZeroNode(Input))
Zeroable[i] = true;
}
return Zeroable;
}
static SDValue lowerVectorShuffleAsBitMask(SDLoc DL, MVT VT, SDValue V1,
SDValue V2, ArrayRef<int> Mask,
SelectionDAG &DAG) {
MVT EltVT = VT.getScalarType();
int NumEltBits = EltVT.getSizeInBits();
MVT IntEltVT = MVT::getIntegerVT(NumEltBits);
SDValue Zero = DAG.getConstant(0, IntEltVT);
SDValue AllOnes = DAG.getConstant(APInt::getAllOnesValue(NumEltBits), IntEltVT);
if (EltVT.isFloatingPoint()) {
Zero = DAG.getNode(ISD::BITCAST, DL, EltVT, Zero);
AllOnes = DAG.getNode(ISD::BITCAST, DL, EltVT, AllOnes);
}
SmallVector<SDValue, 16> VMaskOps(Mask.size(), Zero);
SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
SDValue V;
for (int i = 0, Size = Mask.size(); i < Size; ++i) {
if (Zeroable[i])
continue;
if (Mask[i] % Size != i)
return SDValue(); if (!V)
V = Mask[i] < Size ? V1 : V2;
else if (V != (Mask[i] < Size ? V1 : V2))
return SDValue();
VMaskOps[i] = AllOnes;
}
if (!V)
return SDValue();
SDValue VMask = DAG.getNode(ISD::BUILD_VECTOR, DL, VT, VMaskOps);
V = DAG.getNode(VT.isFloatingPoint()
? (unsigned) X86ISD::FAND : (unsigned) ISD::AND,
DL, VT, V, VMask);
return V;
}
static SDValue lowerVectorShuffleAsByteShift(SDLoc DL, MVT VT, SDValue V1,
SDValue V2, ArrayRef<int> Mask,
SelectionDAG &DAG) {
assert(!isNoopShuffleMask(Mask) && "We shouldn't lower no-op shuffles!");
SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
int Size = Mask.size();
int Scale = 16 / Size;
for (int Shift = 1; Shift < Size; Shift++) {
int ByteShift = Shift * Scale;
bool ZeroableRight = true;
for (int i = Size - Shift; i < Size; i++) {
ZeroableRight &= Zeroable[i];
}
if (ZeroableRight) {
bool ValidShiftRight1 =
isSequentialOrUndefInRange(Mask, 0, Size - Shift, Shift);
bool ValidShiftRight2 =
isSequentialOrUndefInRange(Mask, 0, Size - Shift, Size + Shift);
if (ValidShiftRight1 || ValidShiftRight2) {
SDValue &TargetV = ValidShiftRight1 ? V1 : V2;
SDValue V = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, TargetV);
SDValue Shifted = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v2i64, V,
DAG.getConstant(ByteShift * 8, MVT::i8));
return DAG.getNode(ISD::BITCAST, DL, VT, Shifted);
}
}
bool ZeroableLeft = true;
for (int i = 0; i < Shift; i++) {
ZeroableLeft &= Zeroable[i];
}
if (ZeroableLeft) {
bool ValidShiftLeft1 =
isSequentialOrUndefInRange(Mask, Shift, Size - Shift, 0);
bool ValidShiftLeft2 =
isSequentialOrUndefInRange(Mask, Shift, Size - Shift, Size);
if (ValidShiftLeft1 || ValidShiftLeft2) {
SDValue &TargetV = ValidShiftLeft1 ? V1 : V2;
SDValue V = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, TargetV);
SDValue Shifted = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v2i64, V,
DAG.getConstant(ByteShift * 8, MVT::i8));
return DAG.getNode(ISD::BITCAST, DL, VT, Shifted);
}
}
}
return SDValue();
}
static SDValue lowerVectorShuffleAsBitShift(SDLoc DL, MVT VT, SDValue V1,
SDValue V2, ArrayRef<int> Mask,
SelectionDAG &DAG) {
SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
int Size = Mask.size();
assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
auto MatchBitShift = [&](int Shift, int Scale) -> SDValue {
MVT ShiftSVT = MVT::getIntegerVT(VT.getScalarSizeInBits() * Scale);
MVT ShiftVT = MVT::getVectorVT(ShiftSVT, Size / Scale);
assert(DAG.getTargetLoweringInfo().isTypeLegal(ShiftVT) &&
"Illegal integer vector type");
bool MatchLeft = true, MatchRight = true;
for (int i = 0; i != Size; i += Scale) {
for (int j = 0; j != Shift; j++) {
MatchLeft &= Zeroable[i + j];
}
for (int j = Scale - Shift; j != Scale; j++) {
MatchRight &= Zeroable[i + j];
}
}
if (!(MatchLeft || MatchRight))
return SDValue();
bool MatchV1 = true, MatchV2 = true;
for (int i = 0; i != Size; i += Scale) {
unsigned Pos = MatchLeft ? i + Shift : i;
unsigned Low = MatchLeft ? i : i + Shift;
unsigned Len = Scale - Shift;
MatchV1 &= isSequentialOrUndefInRange(Mask, Pos, Len, Low);
MatchV2 &= isSequentialOrUndefInRange(Mask, Pos, Len, Low + Size);
}
if (!(MatchV1 || MatchV2))
return SDValue();
unsigned OpCode = MatchLeft ? X86ISD::VSHLI : X86ISD::VSRLI;
int ShiftAmt = Shift * VT.getScalarSizeInBits();
SDValue V = MatchV1 ? V1 : V2;
V = DAG.getNode(ISD::BITCAST, DL, ShiftVT, V);
V = DAG.getNode(OpCode, DL, ShiftVT, V, DAG.getConstant(ShiftAmt, MVT::i8));
return DAG.getNode(ISD::BITCAST, DL, VT, V);
};
for (int Scale = 2; Scale * VT.getScalarSizeInBits() <= 64; Scale *= 2)
for (int Shift = 1; Shift != Scale; Shift++)
if (SDValue BitShift = MatchBitShift(Shift, Scale))
return BitShift;
return SDValue();
}
static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
SDLoc DL, MVT VT, int Scale, bool AnyExt, SDValue InputV,
const X86Subtarget *Subtarget, SelectionDAG &DAG) {
assert(Scale > 1 && "Need a scale to extend.");
int NumElements = VT.getVectorNumElements();
int EltBits = VT.getScalarSizeInBits();
assert((EltBits == 8 || EltBits == 16 || EltBits == 32) &&
"Only 8, 16, and 32 bit elements can be extended.");
assert(Scale * EltBits <= 64 && "Cannot zero extend past 64 bits.");
if (Subtarget->hasSSE41()) {
MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Scale),
NumElements / Scale);
return DAG.getNode(ISD::BITCAST, DL, VT,
DAG.getNode(X86ISD::VZEXT, DL, ExtVT, InputV));
}
if (AnyExt && EltBits == 32) {
int PSHUFDMask[4] = {0, -1, 1, -1};
return DAG.getNode(
ISD::BITCAST, DL, VT,
DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32,
DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, InputV),
getV4X86ShuffleImm8ForMask(PSHUFDMask, DAG)));
}
if (AnyExt && EltBits == 16 && Scale > 2) {
int PSHUFDMask[4] = {0, -1, 0, -1};
InputV = DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32,
DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, InputV),
getV4X86ShuffleImm8ForMask(PSHUFDMask, DAG));
int PSHUFHWMask[4] = {1, -1, -1, -1};
return DAG.getNode(
ISD::BITCAST, DL, VT,
DAG.getNode(X86ISD::PSHUFHW, DL, MVT::v8i16,
DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, InputV),
getV4X86ShuffleImm8ForMask(PSHUFHWMask, DAG)));
}
if (Scale > 4 && EltBits == 8 && Subtarget->hasSSSE3()) {
assert(NumElements == 16 && "Unexpected byte vector width!");
SDValue PSHUFBMask[16];
for (int i = 0; i < 16; ++i)
PSHUFBMask[i] =
DAG.getConstant((i % Scale == 0) ? i / Scale : 0x80, MVT::i8);
InputV = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, InputV);
return DAG.getNode(ISD::BITCAST, DL, VT,
DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, InputV,
DAG.getNode(ISD::BUILD_VECTOR, DL,
MVT::v16i8, PSHUFBMask)));
}
do {
MVT InputVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits), NumElements);
SDValue Ext = AnyExt ? DAG.getUNDEF(InputVT)
: getZeroVector(InputVT, Subtarget, DAG, DL);
InputV = DAG.getNode(ISD::BITCAST, DL, InputVT, InputV);
InputV = DAG.getNode(X86ISD::UNPCKL, DL, InputVT, InputV, Ext);
Scale /= 2;
EltBits *= 2;
NumElements /= 2;
} while (Scale > 1);
return DAG.getNode(ISD::BITCAST, DL, VT, InputV);
}
static SDValue lowerVectorShuffleAsZeroOrAnyExtend(
SDLoc DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
const X86Subtarget *Subtarget, SelectionDAG &DAG) {
SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
int Bits = VT.getSizeInBits();
int NumElements = VT.getVectorNumElements();
assert(VT.getScalarSizeInBits() <= 32 &&
"Exceeds 32-bit integer zero extension limit");
assert((int)Mask.size() == NumElements && "Unexpected shuffle mask size");
auto Lower = [&](int Scale) -> SDValue {
SDValue InputV;
bool AnyExt = true;
for (int i = 0; i < NumElements; ++i) {
if (Mask[i] == -1)
continue; if (i % Scale != 0) {
if (!Zeroable[i])
return SDValue();
AnyExt = false;
continue;
}
SDValue V = Mask[i] < NumElements ? V1 : V2;
if (!InputV)
InputV = V;
else if (InputV != V)
return SDValue();
if (Mask[i] % NumElements != i / Scale)
return SDValue(); }
if (!InputV)
return SDValue();
return lowerVectorShuffleAsSpecificZeroOrAnyExtend(
DL, VT, Scale, AnyExt, InputV, Subtarget, DAG);
};
assert(Bits % 64 == 0 &&
"The number of bits in a vector must be divisible by 64 on x86!");
int NumExtElements = Bits / 64;
for (; NumExtElements < NumElements; NumExtElements *= 2) {
assert(NumElements % NumExtElements == 0 &&
"The input vector size must be divisible by the extended size.");
if (SDValue V = Lower(NumElements / NumExtElements))
return V;
}
if (Bits != 128)
return SDValue();
auto CanZExtLowHalf = [&]() {
for (int i = NumElements / 2; i != NumElements; i++)
if (!Zeroable[i])
return SDValue();
if (isSequentialOrUndefInRange(Mask, 0, NumElements / 2, 0))
return V1;
if (isSequentialOrUndefInRange(Mask, 0, NumElements / 2, NumElements))
return V2;
return SDValue();
};
if (SDValue V = CanZExtLowHalf()) {
V = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, V);
V = DAG.getNode(X86ISD::VZEXT_MOVL, DL, MVT::v2i64, V);
return DAG.getNode(ISD::BITCAST, DL, VT, V);
}
return SDValue();
}
static SDValue getScalarValueForVectorElement(SDValue V, int Idx,
SelectionDAG &DAG) {
MVT VT = V.getSimpleValueType();
MVT EltVT = VT.getVectorElementType();
while (V.getOpcode() == ISD::BITCAST)
V = V.getOperand(0);
MVT NewVT = V.getSimpleValueType();
if (!NewVT.isVector() || NewVT.getScalarSizeInBits() != VT.getScalarSizeInBits())
return SDValue();
if (V.getOpcode() == ISD::BUILD_VECTOR ||
(Idx == 0 && V.getOpcode() == ISD::SCALAR_TO_VECTOR))
return DAG.getNode(ISD::BITCAST, SDLoc(V), EltVT, V.getOperand(Idx));
return SDValue();
}
static bool isShuffleFoldableLoad(SDValue V) {
while (V.getOpcode() == ISD::BITCAST)
V = V.getOperand(0);
return ISD::isNON_EXTLoad(V.getNode());
}
static SDValue lowerVectorShuffleAsElementInsertion(
MVT VT, SDLoc DL, SDValue V1, SDValue V2, ArrayRef<int> Mask,
const X86Subtarget *Subtarget, SelectionDAG &DAG) {
SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
MVT ExtVT = VT;
MVT EltVT = VT.getVectorElementType();
int V2Index = std::find_if(Mask.begin(), Mask.end(),
[&Mask](int M) { return M >= (int)Mask.size(); }) -
Mask.begin();
bool IsV1Zeroable = true;
for (int i = 0, Size = Mask.size(); i < Size; ++i)
if (i != V2Index && !Zeroable[i]) {
IsV1Zeroable = false;
break;
}
if (SDValue V2S = getScalarValueForVectorElement(
V2, Mask[V2Index] - Mask.size(), DAG)) {
V2S = DAG.getNode(ISD::BITCAST, DL, EltVT, V2S);
if (EltVT == MVT::i8 || EltVT == MVT::i16) {
if (!IsV1Zeroable)
return SDValue();
ExtVT = MVT::v4i32;
V2S = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, V2S);
}
V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ExtVT, V2S);
} else if (Mask[V2Index] != (int)Mask.size() || EltVT == MVT::i8 ||
EltVT == MVT::i16) {
return SDValue();
}
if (!IsV1Zeroable) {
assert(VT == ExtVT && "Cannot change extended type when non-zeroable!");
if (!VT.isFloatingPoint() || V2Index != 0)
return SDValue();
SmallVector<int, 8> V1Mask(Mask.begin(), Mask.end());
V1Mask[V2Index] = -1;
if (!isNoopShuffleMask(V1Mask))
return SDValue();
if (Subtarget->hasSSE41())
return SDValue();
assert((EltVT == MVT::f32 || EltVT == MVT::f64) &&
"Only two types of floating point element types to handle!");
return DAG.getNode(EltVT == MVT::f32 ? X86ISD::MOVSS : X86ISD::MOVSD, DL,
ExtVT, V1, V2);
}
V2 = DAG.getNode(X86ISD::VZEXT_MOVL, DL, ExtVT, V2);
if (ExtVT != VT)
V2 = DAG.getNode(ISD::BITCAST, DL, VT, V2);
if (V2Index != 0) {
if (VT.isFloatingPoint() || VT.getVectorNumElements() <= 4) {
SmallVector<int, 4> V2Shuffle(Mask.size(), 1);
V2Shuffle[V2Index] = 0;
V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Shuffle);
} else {
V2 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, V2);
V2 = DAG.getNode(
X86ISD::VSHLDQ, DL, MVT::v2i64, V2,
DAG.getConstant(
V2Index * EltVT.getSizeInBits(),
DAG.getTargetLoweringInfo().getScalarShiftAmountTy(MVT::v2i64)));
V2 = DAG.getNode(ISD::BITCAST, DL, VT, V2);
}
}
return V2;
}
static SDValue lowerVectorShuffleAsBroadcast(MVT VT, SDLoc DL, SDValue V,
ArrayRef<int> Mask,
const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
if (!Subtarget->hasAVX())
return SDValue();
if (VT.isInteger() && !Subtarget->hasAVX2())
return SDValue();
int BroadcastIdx = -1;
for (int M : Mask)
if (M >= 0 && BroadcastIdx == -1)
BroadcastIdx = M;
else if (M >= 0 && M != BroadcastIdx)
return SDValue();
assert(BroadcastIdx < (int)Mask.size() && "We only expect to be called with "
"a sorted mask where the broadcast "
"comes from V1.");
for (;;) {
switch (V.getOpcode()) {
case ISD::CONCAT_VECTORS: {
int OperandSize = Mask.size() / V.getNumOperands();
V = V.getOperand(BroadcastIdx / OperandSize);
BroadcastIdx %= OperandSize;
continue;
}
case ISD::INSERT_SUBVECTOR: {
SDValue VOuter = V.getOperand(0), VInner = V.getOperand(1);
auto ConstantIdx = dyn_cast<ConstantSDNode>(V.getOperand(2));
if (!ConstantIdx)
break;
int BeginIdx = (int)ConstantIdx->getZExtValue();
int EndIdx =
BeginIdx + (int)VInner.getValueType().getVectorNumElements();
if (BroadcastIdx >= BeginIdx && BroadcastIdx < EndIdx) {
BroadcastIdx -= BeginIdx;
V = VInner;
} else {
V = VOuter;
}
continue;
}
}
break;
}
if (V.getOpcode() == ISD::BUILD_VECTOR ||
(V.getOpcode() == ISD::SCALAR_TO_VECTOR && BroadcastIdx == 0)) {
V = V.getOperand(BroadcastIdx);
if (!Subtarget->hasAVX2() && !isShuffleFoldableLoad(V))
return SDValue();
} else if (BroadcastIdx != 0 || !Subtarget->hasAVX2()) {
return SDValue();
}
return DAG.getNode(X86ISD::VBROADCAST, DL, VT, V);
}
static SDValue lowerVectorShuffleAsInsertPS(SDValue Op, SDValue V1, SDValue V2,
ArrayRef<int> Mask,
SelectionDAG &DAG) {
assert(Op.getSimpleValueType() == MVT::v4f32 && "Bad shuffle type!");
assert(V1.getSimpleValueType() == MVT::v4f32 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v4f32 && "Bad operand type!");
assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
unsigned ZMask = 0;
int V1DstIndex = -1;
int V2DstIndex = -1;
bool V1UsedInPlace = false;
for (int i = 0; i < 4; i++) {
if (Zeroable[i]) {
ZMask |= 1 << i;
continue;
}
if (i == Mask[i]) {
V1UsedInPlace = true;
continue;
}
if (V1DstIndex != -1 || V2DstIndex != -1)
return SDValue();
if (Mask[i] < 4) {
V1DstIndex = i;
} else {
V2DstIndex = i;
}
}
if (V1DstIndex == -1 && V2DstIndex == -1)
return SDValue();
unsigned V2SrcIndex = 0;
if (V1DstIndex != -1) {
V2SrcIndex = Mask[V1DstIndex];
V2DstIndex = V1DstIndex;
V2 = V1;
} else {
V2SrcIndex = Mask[V2DstIndex] - 4;
}
if (!V1UsedInPlace)
V1 = DAG.getUNDEF(MVT::v4f32);
unsigned InsertPSMask = V2SrcIndex << 6 | V2DstIndex << 4 | ZMask;
assert((InsertPSMask & ~0xFFu) == 0 && "Invalid mask!");
SDLoc DL(Op);
return DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, V1, V2,
DAG.getConstant(InsertPSMask, MVT::i8));
}
static SDValue lowerV2F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
SDLoc DL(Op);
assert(Op.getSimpleValueType() == MVT::v2f64 && "Bad shuffle type!");
assert(V1.getSimpleValueType() == MVT::v2f64 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v2f64 && "Bad operand type!");
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
ArrayRef<int> Mask = SVOp->getMask();
assert(Mask.size() == 2 && "Unexpected mask size for v2 shuffle!");
if (isSingleInputShuffleMask(Mask)) {
if (Subtarget->hasSSE3())
if (isShuffleEquivalent(Mask, 0, 0))
return DAG.getNode(X86ISD::MOVDDUP, DL, MVT::v2f64, V1);
unsigned SHUFPDMask = (Mask[0] == 1) | ((Mask[1] == 1) << 1);
if (Subtarget->hasAVX()) {
return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v2f64, V1,
DAG.getConstant(SHUFPDMask, MVT::i8));
}
return DAG.getNode(X86ISD::SHUFP, SDLoc(Op), MVT::v2f64, V1, V1,
DAG.getConstant(SHUFPDMask, MVT::i8));
}
assert(Mask[0] >= 0 && Mask[0] < 2 && "Non-canonicalized blend!");
assert(Mask[1] >= 2 && "Non-canonicalized blend!");
if (isShuffleEquivalent(Mask, 0, 2))
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2f64, V1, V2);
if (isShuffleEquivalent(Mask, 1, 3))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v2f64, V1, V2);
if ((Mask[0] >= 2) + (Mask[1] >= 2) == 1) {
if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
MVT::v2f64, DL, V1, V2, Mask, Subtarget, DAG))
return Insertion;
int InverseMask[2] = {Mask[0] < 0 ? -1 : (Mask[0] ^ 2),
Mask[1] < 0 ? -1 : (Mask[1] ^ 2)};
if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
MVT::v2f64, DL, V2, V1, InverseMask, Subtarget, DAG))
return Insertion;
}
if (isShuffleEquivalent(Mask, 0, 3) || isShuffleEquivalent(Mask, 1, 3))
if (SDValue V1S = getScalarValueForVectorElement(V1, Mask[0], DAG))
return DAG.getNode(
isShuffleFoldableLoad(V1S) ? X86ISD::MOVLPD : X86ISD::MOVSD,
DL, MVT::v2f64, V2,
DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f64, V1S));
if (Subtarget->hasSSE41())
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v2f64, V1, V2, Mask,
Subtarget, DAG))
return Blend;
unsigned SHUFPDMask = (Mask[0] == 1) | (((Mask[1] - 2) == 1) << 1);
return DAG.getNode(X86ISD::SHUFP, SDLoc(Op), MVT::v2f64, V1, V2,
DAG.getConstant(SHUFPDMask, MVT::i8));
}
static SDValue lowerV2I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
SDLoc DL(Op);
assert(Op.getSimpleValueType() == MVT::v2i64 && "Bad shuffle type!");
assert(V1.getSimpleValueType() == MVT::v2i64 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v2i64 && "Bad operand type!");
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
ArrayRef<int> Mask = SVOp->getMask();
assert(Mask.size() == 2 && "Unexpected mask size for v2 shuffle!");
if (isSingleInputShuffleMask(Mask)) {
if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(MVT::v2i64, DL, V1,
Mask, Subtarget, DAG))
return Broadcast;
V1 = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, V1);
int WidenedMask[4] = {
std::max(Mask[0], 0) * 2, std::max(Mask[0], 0) * 2 + 1,
std::max(Mask[1], 0) * 2, std::max(Mask[1], 0) * 2 + 1};
return DAG.getNode(
ISD::BITCAST, DL, MVT::v2i64,
DAG.getNode(X86ISD::PSHUFD, SDLoc(Op), MVT::v4i32, V1,
getV4X86ShuffleImm8ForMask(WidenedMask, DAG)));
}
if (SDValue Shift = lowerVectorShuffleAsByteShift(
DL, MVT::v2i64, V1, V2, Mask, DAG))
return Shift;
if ((Mask[0] >= 2) + (Mask[1] >= 2) == 1) {
if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
MVT::v2i64, DL, V1, V2, Mask, Subtarget, DAG))
return Insertion;
int InverseMask[2] = {Mask[0] < 0 ? -1 : (Mask[0] ^ 2),
Mask[1] < 0 ? -1 : (Mask[1] ^ 2)};
if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
MVT::v2i64, DL, V2, V1, InverseMask, Subtarget, DAG))
return Insertion;
}
if (isShuffleEquivalent(Mask, 0, 2))
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2i64, V1, V2);
if (isShuffleEquivalent(Mask, 1, 3))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v2i64, V1, V2);
if (Subtarget->hasSSE41())
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v2i64, V1, V2, Mask,
Subtarget, DAG))
return Blend;
if (Subtarget->hasSSSE3())
if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
DL, MVT::v2i64, V1, V2, Mask, Subtarget, DAG))
return Rotate;
V1 = DAG.getNode(ISD::BITCAST, DL, MVT::v2f64, V1);
V2 = DAG.getNode(ISD::BITCAST, DL, MVT::v2f64, V2);
return DAG.getNode(ISD::BITCAST, DL, MVT::v2i64,
DAG.getVectorShuffle(MVT::v2f64, DL, V1, V2, Mask));
}
static SDValue lowerVectorShuffleWithSHUFPS(SDLoc DL, MVT VT,
ArrayRef<int> Mask, SDValue V1,
SDValue V2, SelectionDAG &DAG) {
SDValue LowV = V1, HighV = V2;
int NewMask[4] = {Mask[0], Mask[1], Mask[2], Mask[3]};
int NumV2Elements =
std::count_if(Mask.begin(), Mask.end(), [](int M) { return M >= 4; });
if (NumV2Elements == 1) {
int V2Index =
std::find_if(Mask.begin(), Mask.end(), [](int M) { return M >= 4; }) -
Mask.begin();
int V2AdjIndex = V2Index ^ 1;
if (Mask[V2AdjIndex] == -1) {
if (V2Index < 2)
std::swap(LowV, HighV);
NewMask[V2Index] -= 4;
} else {
int V1Index = V2AdjIndex;
int BlendMask[4] = {Mask[V2Index] - 4, 0, Mask[V1Index], 0};
V2 = DAG.getNode(X86ISD::SHUFP, DL, VT, V2, V1,
getV4X86ShuffleImm8ForMask(BlendMask, DAG));
if (V2Index < 2) {
LowV = V2;
HighV = V1;
} else {
HighV = V2;
}
NewMask[V1Index] = 2; NewMask[V2Index] = 0; }
} else if (NumV2Elements == 2) {
if (Mask[0] < 4 && Mask[1] < 4) {
NewMask[2] -= 4;
NewMask[3] -= 4;
} else if (Mask[2] < 4 && Mask[3] < 4) {
NewMask[0] -= 4;
NewMask[1] -= 4;
HighV = V1;
LowV = V2;
} else {
int BlendMask[4] = {Mask[0] < 4 ? Mask[0] : Mask[1],
Mask[2] < 4 ? Mask[2] : Mask[3],
(Mask[0] >= 4 ? Mask[0] : Mask[1]) - 4,
(Mask[2] >= 4 ? Mask[2] : Mask[3]) - 4};
V1 = DAG.getNode(X86ISD::SHUFP, DL, VT, V1, V2,
getV4X86ShuffleImm8ForMask(BlendMask, DAG));
LowV = HighV = V1;
NewMask[0] = Mask[0] < 4 ? 0 : 2;
NewMask[1] = Mask[0] < 4 ? 2 : 0;
NewMask[2] = Mask[2] < 4 ? 1 : 3;
NewMask[3] = Mask[2] < 4 ? 3 : 1;
}
}
return DAG.getNode(X86ISD::SHUFP, DL, VT, LowV, HighV,
getV4X86ShuffleImm8ForMask(NewMask, DAG));
}
static SDValue lowerV4F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
SDLoc DL(Op);
assert(Op.getSimpleValueType() == MVT::v4f32 && "Bad shuffle type!");
assert(V1.getSimpleValueType() == MVT::v4f32 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v4f32 && "Bad operand type!");
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
ArrayRef<int> Mask = SVOp->getMask();
assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
int NumV2Elements =
std::count_if(Mask.begin(), Mask.end(), [](int M) { return M >= 4; });
if (NumV2Elements == 0) {
if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(MVT::v4f32, DL, V1,
Mask, Subtarget, DAG))
return Broadcast;
if (Subtarget->hasSSE3()) {
if (isShuffleEquivalent(Mask, 0, 0, 2, 2))
return DAG.getNode(X86ISD::MOVSLDUP, DL, MVT::v4f32, V1);
if (isShuffleEquivalent(Mask, 1, 1, 3, 3))
return DAG.getNode(X86ISD::MOVSHDUP, DL, MVT::v4f32, V1);
}
if (Subtarget->hasAVX()) {
return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v4f32, V1,
getV4X86ShuffleImm8ForMask(Mask, DAG));
}
return DAG.getNode(X86ISD::SHUFP, DL, MVT::v4f32, V1, V1,
getV4X86ShuffleImm8ForMask(Mask, DAG));
}
if (isShuffleEquivalent(Mask, 0, 4, 1, 5))
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4f32, V1, V2);
if (isShuffleEquivalent(Mask, 2, 6, 3, 7))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f32, V1, V2);
if (NumV2Elements == 1 && Mask[0] >= 4)
if (SDValue V = lowerVectorShuffleAsElementInsertion(MVT::v4f32, DL, V1, V2,
Mask, Subtarget, DAG))
return V;
if (Subtarget->hasSSE41()) {
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4f32, V1, V2, Mask,
Subtarget, DAG))
return Blend;
if (SDValue V = lowerVectorShuffleAsInsertPS(Op, V1, V2, Mask, DAG))
return V;
}
return lowerVectorShuffleWithSHUFPS(DL, MVT::v4f32, Mask, V1, V2, DAG);
}
static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
SDLoc DL(Op);
assert(Op.getSimpleValueType() == MVT::v4i32 && "Bad shuffle type!");
assert(V1.getSimpleValueType() == MVT::v4i32 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v4i32 && "Bad operand type!");
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
ArrayRef<int> Mask = SVOp->getMask();
assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(DL, MVT::v4i32, V1, V2,
Mask, Subtarget, DAG))
return ZExt;
int NumV2Elements =
std::count_if(Mask.begin(), Mask.end(), [](int M) { return M >= 4; });
if (NumV2Elements == 0) {
if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(MVT::v4i32, DL, V1,
Mask, Subtarget, DAG))
return Broadcast;
const int UnpackLoMask[] = {0, 0, 1, 1};
const int UnpackHiMask[] = {2, 2, 3, 3};
if (isShuffleEquivalent(Mask, 0, 0, 1, 1))
Mask = UnpackLoMask;
else if (isShuffleEquivalent(Mask, 2, 2, 3, 3))
Mask = UnpackHiMask;
return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V1,
getV4X86ShuffleImm8ForMask(Mask, DAG));
}
if (SDValue Shift = lowerVectorShuffleAsBitShift(
DL, MVT::v4i32, V1, V2, Mask, DAG))
return Shift;
if (SDValue Shift = lowerVectorShuffleAsByteShift(
DL, MVT::v4i32, V1, V2, Mask, DAG))
return Shift;
if (NumV2Elements == 1)
if (SDValue V = lowerVectorShuffleAsElementInsertion(MVT::v4i32, DL, V1, V2,
Mask, Subtarget, DAG))
return V;
if (Subtarget->hasSSE41())
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4i32, V1, V2, Mask,
Subtarget, DAG))
return Blend;
if (SDValue Masked =
lowerVectorShuffleAsBitMask(DL, MVT::v4i32, V1, V2, Mask, DAG))
return Masked;
if (isShuffleEquivalent(Mask, 0, 4, 1, 5))
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4i32, V1, V2);
if (isShuffleEquivalent(Mask, 2, 6, 3, 7))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4i32, V1, V2);
if (Subtarget->hasSSSE3())
if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
DL, MVT::v4i32, V1, V2, Mask, Subtarget, DAG))
return Rotate;
return DAG.getNode(ISD::BITCAST, DL, MVT::v4i32,
DAG.getVectorShuffle(
MVT::v4f32, DL,
DAG.getNode(ISD::BITCAST, DL, MVT::v4f32, V1),
DAG.getNode(ISD::BITCAST, DL, MVT::v4f32, V2), Mask));
}
static SDValue lowerV8I16SingleInputVectorShuffle(
SDLoc DL, SDValue V, MutableArrayRef<int> Mask,
const X86Subtarget *Subtarget, SelectionDAG &DAG) {
assert(V.getSimpleValueType() == MVT::v8i16 && "Bad input type!");
MutableArrayRef<int> LoMask = Mask.slice(0, 4);
MutableArrayRef<int> HiMask = Mask.slice(4, 4);
SmallVector<int, 4> LoInputs;
std::copy_if(LoMask.begin(), LoMask.end(), std::back_inserter(LoInputs),
[](int M) { return M >= 0; });
std::sort(LoInputs.begin(), LoInputs.end());
LoInputs.erase(std::unique(LoInputs.begin(), LoInputs.end()), LoInputs.end());
SmallVector<int, 4> HiInputs;
std::copy_if(HiMask.begin(), HiMask.end(), std::back_inserter(HiInputs),
[](int M) { return M >= 0; });
std::sort(HiInputs.begin(), HiInputs.end());
HiInputs.erase(std::unique(HiInputs.begin(), HiInputs.end()), HiInputs.end());
int NumLToL =
std::lower_bound(LoInputs.begin(), LoInputs.end(), 4) - LoInputs.begin();
int NumHToL = LoInputs.size() - NumLToL;
int NumLToH =
std::lower_bound(HiInputs.begin(), HiInputs.end(), 4) - HiInputs.begin();
int NumHToH = HiInputs.size() - NumLToH;
MutableArrayRef<int> LToLInputs(LoInputs.data(), NumLToL);
MutableArrayRef<int> LToHInputs(HiInputs.data(), NumLToH);
MutableArrayRef<int> HToLInputs(LoInputs.data() + NumLToL, NumHToL);
MutableArrayRef<int> HToHInputs(HiInputs.data() + NumLToH, NumHToH);
if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(MVT::v8i16, DL, V,
Mask, Subtarget, DAG))
return Broadcast;
if (SDValue Shift = lowerVectorShuffleAsBitShift(
DL, MVT::v8i16, V, V, Mask, DAG))
return Shift;
if (SDValue Shift = lowerVectorShuffleAsByteShift(
DL, MVT::v8i16, V, V, Mask, DAG))
return Shift;
if (isShuffleEquivalent(Mask, 0, 0, 1, 1, 2, 2, 3, 3))
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i16, V, V);
if (isShuffleEquivalent(Mask, 4, 4, 5, 5, 6, 6, 7, 7))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i16, V, V);
if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
DL, MVT::v8i16, V, V, Mask, Subtarget, DAG))
return Rotate;
auto balanceSides = [&](ArrayRef<int> AToAInputs, ArrayRef<int> BToAInputs,
ArrayRef<int> BToBInputs, ArrayRef<int> AToBInputs,
int AOffset, int BOffset) {
assert((AToAInputs.size() == 3 || AToAInputs.size() == 1) &&
"Must call this with A having 3 or 1 inputs from the A half.");
assert((BToAInputs.size() == 1 || BToAInputs.size() == 3) &&
"Must call this with B having 1 or 3 inputs from the B half.");
assert(AToAInputs.size() + BToAInputs.size() == 4 &&
"Must call this with either 3:1 or 1:3 inputs (summing to 4).");
int ADWord, BDWord;
int &TripleDWord = AToAInputs.size() == 3 ? ADWord : BDWord;
int &OneInputDWord = AToAInputs.size() == 3 ? BDWord : ADWord;
int TripleInputOffset = AToAInputs.size() == 3 ? AOffset : BOffset;
ArrayRef<int> TripleInputs = AToAInputs.size() == 3 ? AToAInputs : BToAInputs;
int OneInput = AToAInputs.size() == 3 ? BToAInputs[0] : AToAInputs[0];
int TripleInputSum = 0 + 1 + 2 + 3 + (4 * TripleInputOffset);
int TripleNonInputIdx =
TripleInputSum - std::accumulate(TripleInputs.begin(), TripleInputs.end(), 0);
TripleDWord = TripleNonInputIdx / 2;
OneInputDWord = (OneInput / 2) ^ 1;
if (BToBInputs.size() == 2 && AToBInputs.size() == 2) {
int NumFlippedAToBInputs =
std::count(AToBInputs.begin(), AToBInputs.end(), 2 * ADWord) +
std::count(AToBInputs.begin(), AToBInputs.end(), 2 * ADWord + 1);
int NumFlippedBToBInputs =
std::count(BToBInputs.begin(), BToBInputs.end(), 2 * BDWord) +
std::count(BToBInputs.begin(), BToBInputs.end(), 2 * BDWord + 1);
if ((NumFlippedAToBInputs == 1 &&
(NumFlippedBToBInputs == 0 || NumFlippedBToBInputs == 2)) ||
(NumFlippedBToBInputs == 1 &&
(NumFlippedAToBInputs == 0 || NumFlippedAToBInputs == 2))) {
auto FixFlippedInputs = [&V, &DL, &Mask, &DAG](int PinnedIdx, int DWord,
ArrayRef<int> Inputs) {
int FixIdx = PinnedIdx ^ 1; bool IsFixIdxInput = std::find(Inputs.begin(), Inputs.end(),
PinnedIdx ^ 1) != Inputs.end();
int FixFreeIdx = 2 * (DWord ^ (PinnedIdx / 2 == DWord));
bool IsFixFreeIdxInput = std::find(Inputs.begin(), Inputs.end(),
FixFreeIdx) != Inputs.end();
if (IsFixIdxInput == IsFixFreeIdxInput)
FixFreeIdx += 1;
IsFixFreeIdxInput = std::find(Inputs.begin(), Inputs.end(),
FixFreeIdx) != Inputs.end();
assert(IsFixIdxInput != IsFixFreeIdxInput &&
"We need to be changing the number of flipped inputs!");
int PSHUFHalfMask[] = {0, 1, 2, 3};
std::swap(PSHUFHalfMask[FixFreeIdx % 4], PSHUFHalfMask[FixIdx % 4]);
V = DAG.getNode(FixIdx < 4 ? X86ISD::PSHUFLW : X86ISD::PSHUFHW, DL,
MVT::v8i16, V,
getV4X86ShuffleImm8ForMask(PSHUFHalfMask, DAG));
for (int &M : Mask)
if (M != -1 && M == FixIdx)
M = FixFreeIdx;
else if (M != -1 && M == FixFreeIdx)
M = FixIdx;
};
if (NumFlippedBToBInputs != 0) {
int BPinnedIdx =
BToAInputs.size() == 3 ? TripleNonInputIdx : OneInput;
FixFlippedInputs(BPinnedIdx, BDWord, BToBInputs);
} else {
assert(NumFlippedAToBInputs != 0 && "Impossible given predicates!");
int APinnedIdx =
AToAInputs.size() == 3 ? TripleNonInputIdx : OneInput;
FixFlippedInputs(APinnedIdx, ADWord, AToBInputs);
}
}
}
int PSHUFDMask[] = {0, 1, 2, 3};
PSHUFDMask[ADWord] = BDWord;
PSHUFDMask[BDWord] = ADWord;
V = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16,
DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32,
DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, V),
getV4X86ShuffleImm8ForMask(PSHUFDMask, DAG)));
for (int &M : Mask)
if (M != -1 && M/2 == ADWord)
M = 2 * BDWord + M % 2;
else if (M != -1 && M/2 == BDWord)
M = 2 * ADWord + M % 2;
return DAG.getVectorShuffle(MVT::v8i16, DL, V, DAG.getUNDEF(MVT::v8i16),
Mask);
};
if ((NumLToL == 3 && NumHToL == 1) || (NumLToL == 1 && NumHToL == 3))
return balanceSides(LToLInputs, HToLInputs, HToHInputs, LToHInputs, 0, 4);
else if ((NumHToH == 3 && NumLToH == 1) || (NumHToH == 1 && NumLToH == 3))
return balanceSides(HToHInputs, LToHInputs, LToLInputs, HToLInputs, 4, 0);
int PSHUFLMask[4] = {-1, -1, -1, -1};
int PSHUFHMask[4] = {-1, -1, -1, -1};
int PSHUFDMask[4] = {-1, -1, -1, -1};
auto fixInPlaceInputs =
[&PSHUFDMask](ArrayRef<int> InPlaceInputs, ArrayRef<int> IncomingInputs,
MutableArrayRef<int> SourceHalfMask,
MutableArrayRef<int> HalfMask, int HalfOffset) {
if (InPlaceInputs.empty())
return;
if (InPlaceInputs.size() == 1) {
SourceHalfMask[InPlaceInputs[0] - HalfOffset] =
InPlaceInputs[0] - HalfOffset;
PSHUFDMask[InPlaceInputs[0] / 2] = InPlaceInputs[0] / 2;
return;
}
if (IncomingInputs.empty()) {
for (int Input : InPlaceInputs) {
SourceHalfMask[Input - HalfOffset] = Input - HalfOffset;
PSHUFDMask[Input / 2] = Input / 2;
}
return;
}
assert(InPlaceInputs.size() == 2 && "Cannot handle 3 or 4 inputs!");
SourceHalfMask[InPlaceInputs[0] - HalfOffset] =
InPlaceInputs[0] - HalfOffset;
int AdjIndex = InPlaceInputs[0] ^ 1;
SourceHalfMask[AdjIndex - HalfOffset] = InPlaceInputs[1] - HalfOffset;
std::replace(HalfMask.begin(), HalfMask.end(), InPlaceInputs[1], AdjIndex);
PSHUFDMask[AdjIndex / 2] = AdjIndex / 2;
};
fixInPlaceInputs(LToLInputs, HToLInputs, PSHUFLMask, LoMask, 0);
fixInPlaceInputs(HToHInputs, LToHInputs, PSHUFHMask, HiMask, 4);
auto moveInputsToRightHalf = [&PSHUFDMask](
MutableArrayRef<int> IncomingInputs, ArrayRef<int> ExistingInputs,
MutableArrayRef<int> SourceHalfMask, MutableArrayRef<int> HalfMask,
MutableArrayRef<int> FinalSourceHalfMask, int SourceOffset,
int DestOffset) {
auto isWordClobbered = [](ArrayRef<int> SourceHalfMask, int Word) {
return SourceHalfMask[Word] != -1 && SourceHalfMask[Word] != Word;
};
auto isDWordClobbered = [&isWordClobbered](ArrayRef<int> SourceHalfMask,
int Word) {
int LowWord = Word & ~1;
int HighWord = Word | 1;
return isWordClobbered(SourceHalfMask, LowWord) ||
isWordClobbered(SourceHalfMask, HighWord);
};
if (IncomingInputs.empty())
return;
if (ExistingInputs.empty()) {
for (int Input : IncomingInputs) {
if (isWordClobbered(SourceHalfMask, Input - SourceOffset)) {
if (SourceHalfMask[SourceHalfMask[Input - SourceOffset]] == -1) {
SourceHalfMask[SourceHalfMask[Input - SourceOffset]] =
Input - SourceOffset;
for (int &M : HalfMask)
if (M == SourceHalfMask[Input - SourceOffset] + SourceOffset)
M = Input;
else if (M == Input)
M = SourceHalfMask[Input - SourceOffset] + SourceOffset;
} else {
assert(SourceHalfMask[SourceHalfMask[Input - SourceOffset]] ==
Input - SourceOffset &&
"Previous placement doesn't match!");
}
Input = SourceHalfMask[Input - SourceOffset] + SourceOffset;
}
if (PSHUFDMask[(Input - SourceOffset + DestOffset) / 2] == -1)
PSHUFDMask[(Input - SourceOffset + DestOffset) / 2] = Input / 2;
else
assert(PSHUFDMask[(Input - SourceOffset + DestOffset) / 2] ==
Input / 2 &&
"Previous placement doesn't match!");
}
for (int &M : HalfMask)
if (M >= SourceOffset && M < SourceOffset + 4) {
M = M - SourceOffset + DestOffset;
assert(M >= 0 && "This should never wrap below zero!");
}
return;
}
if (IncomingInputs.size() == 1) {
if (isWordClobbered(SourceHalfMask, IncomingInputs[0] - SourceOffset)) {
int InputFixed = std::find(std::begin(SourceHalfMask),
std::end(SourceHalfMask), -1) -
std::begin(SourceHalfMask) + SourceOffset;
SourceHalfMask[InputFixed - SourceOffset] =
IncomingInputs[0] - SourceOffset;
std::replace(HalfMask.begin(), HalfMask.end(), IncomingInputs[0],
InputFixed);
IncomingInputs[0] = InputFixed;
}
} else if (IncomingInputs.size() == 2) {
if (IncomingInputs[0] / 2 != IncomingInputs[1] / 2 ||
isDWordClobbered(SourceHalfMask, IncomingInputs[0] - SourceOffset)) {
int InputsFixed[2] = {IncomingInputs[0] - SourceOffset,
IncomingInputs[1] - SourceOffset};
if (!isWordClobbered(SourceHalfMask, InputsFixed[0]) &&
SourceHalfMask[InputsFixed[0] ^ 1] == -1) {
SourceHalfMask[InputsFixed[0]] = InputsFixed[0];
SourceHalfMask[InputsFixed[0] ^ 1] = InputsFixed[1];
InputsFixed[1] = InputsFixed[0] ^ 1;
} else if (!isWordClobbered(SourceHalfMask, InputsFixed[1]) &&
SourceHalfMask[InputsFixed[1] ^ 1] == -1) {
SourceHalfMask[InputsFixed[1]] = InputsFixed[1];
SourceHalfMask[InputsFixed[1] ^ 1] = InputsFixed[0];
InputsFixed[0] = InputsFixed[1] ^ 1;
} else if (SourceHalfMask[2 * ((InputsFixed[0] / 2) ^ 1)] == -1 &&
SourceHalfMask[2 * ((InputsFixed[0] / 2) ^ 1) + 1] == -1) {
SourceHalfMask[2 * ((InputsFixed[0] / 2) ^ 1)] = InputsFixed[0];
SourceHalfMask[2 * ((InputsFixed[0] / 2) ^ 1) + 1] = InputsFixed[1];
InputsFixed[0] = 2 * ((InputsFixed[0] / 2) ^ 1);
InputsFixed[1] = 2 * ((InputsFixed[0] / 2) ^ 1) + 1;
} else {
for (int i = 0; i < 4; ++i)
assert((SourceHalfMask[i] == -1 || SourceHalfMask[i] == i) &&
"We can't handle any clobbers here!");
assert(InputsFixed[1] != (InputsFixed[0] ^ 1) &&
"Cannot have adjacent inputs here!");
SourceHalfMask[InputsFixed[0] ^ 1] = InputsFixed[1];
SourceHalfMask[InputsFixed[1]] = InputsFixed[0] ^ 1;
for (int &M : FinalSourceHalfMask)
if (M == (InputsFixed[0] ^ 1) + SourceOffset)
M = InputsFixed[1] + SourceOffset;
else if (M == InputsFixed[1] + SourceOffset)
M = (InputsFixed[0] ^ 1) + SourceOffset;
InputsFixed[1] = InputsFixed[0] ^ 1;
}
for (int &M : HalfMask)
if (M == IncomingInputs[0])
M = InputsFixed[0] + SourceOffset;
else if (M == IncomingInputs[1])
M = InputsFixed[1] + SourceOffset;
IncomingInputs[0] = InputsFixed[0] + SourceOffset;
IncomingInputs[1] = InputsFixed[1] + SourceOffset;
}
} else {
llvm_unreachable("Unhandled input size!");
}
int FreeDWord = (PSHUFDMask[DestOffset / 2] == -1 ? 0 : 1) + DestOffset / 2;
assert(PSHUFDMask[FreeDWord] == -1 && "DWord not free");
PSHUFDMask[FreeDWord] = IncomingInputs[0] / 2;
for (int &M : HalfMask)
for (int Input : IncomingInputs)
if (M == Input)
M = FreeDWord * 2 + Input % 2;
};
moveInputsToRightHalf(HToLInputs, LToLInputs, PSHUFHMask, LoMask, HiMask,
4, 0);
moveInputsToRightHalf(LToHInputs, HToHInputs, PSHUFLMask, HiMask, LoMask,
0, 4);
if (!isNoopShuffleMask(PSHUFLMask))
V = DAG.getNode(X86ISD::PSHUFLW, DL, MVT::v8i16, V,
getV4X86ShuffleImm8ForMask(PSHUFLMask, DAG));
if (!isNoopShuffleMask(PSHUFHMask))
V = DAG.getNode(X86ISD::PSHUFHW, DL, MVT::v8i16, V,
getV4X86ShuffleImm8ForMask(PSHUFHMask, DAG));
if (!isNoopShuffleMask(PSHUFDMask))
V = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16,
DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32,
DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, V),
getV4X86ShuffleImm8ForMask(PSHUFDMask, DAG)));
assert(std::count_if(LoMask.begin(), LoMask.end(),
[](int M) { return M >= 4; }) == 0 &&
"Failed to lift all the high half inputs to the low mask!");
assert(std::count_if(HiMask.begin(), HiMask.end(),
[](int M) { return M >= 0 && M < 4; }) == 0 &&
"Failed to lift all the low half inputs to the high mask!");
if (!isNoopShuffleMask(LoMask))
V = DAG.getNode(X86ISD::PSHUFLW, DL, MVT::v8i16, V,
getV4X86ShuffleImm8ForMask(LoMask, DAG));
for (int &M : HiMask)
if (M >= 0)
M -= 4;
if (!isNoopShuffleMask(HiMask))
V = DAG.getNode(X86ISD::PSHUFHW, DL, MVT::v8i16, V,
getV4X86ShuffleImm8ForMask(HiMask, DAG));
return V;
}
static bool shouldLowerAsInterleaving(ArrayRef<int> Mask) {
int NumEvenInputs[2] = {0, 0};
int NumOddInputs[2] = {0, 0};
int NumLoInputs[2] = {0, 0};
int NumHiInputs[2] = {0, 0};
for (int i = 0, Size = Mask.size(); i < Size; ++i) {
if (Mask[i] < 0)
continue;
int InputIdx = Mask[i] >= Size;
if (i < Size / 2)
++NumLoInputs[InputIdx];
else
++NumHiInputs[InputIdx];
if ((i % 2) == 0)
++NumEvenInputs[InputIdx];
else
++NumOddInputs[InputIdx];
}
int InterleavedCrosses = std::min(NumEvenInputs[1] + NumOddInputs[0],
NumEvenInputs[0] + NumOddInputs[1]);
int SplitCrosses = std::min(NumLoInputs[1] + NumHiInputs[0],
NumLoInputs[0] + NumHiInputs[1]);
return InterleavedCrosses < SplitCrosses;
}
static SDValue lowerV8I16BasicBlendVectorShuffle(SDLoc DL, SDValue V1,
SDValue V2,
MutableArrayRef<int> Mask,
const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
assert(V1.getSimpleValueType() == MVT::v8i16 && "Bad input type!");
assert(V2.getSimpleValueType() == MVT::v8i16 && "Bad input type!");
SmallVector<int, 3> LoV1Inputs, HiV1Inputs, LoV2Inputs, HiV2Inputs;
for (int i = 0; i < 8; ++i)
if (Mask[i] >= 0 && Mask[i] < 4)
LoV1Inputs.push_back(i);
else if (Mask[i] >= 4 && Mask[i] < 8)
HiV1Inputs.push_back(i);
else if (Mask[i] >= 8 && Mask[i] < 12)
LoV2Inputs.push_back(i);
else if (Mask[i] >= 12)
HiV2Inputs.push_back(i);
int NumV1Inputs = LoV1Inputs.size() + HiV1Inputs.size();
int NumV2Inputs = LoV2Inputs.size() + HiV2Inputs.size();
(void)NumV1Inputs;
(void)NumV2Inputs;
assert(NumV1Inputs > 0 && NumV1Inputs <= 3 && "At most 3 inputs supported");
assert(NumV2Inputs > 0 && NumV2Inputs <= 3 && "At most 3 inputs supported");
assert(NumV1Inputs + NumV2Inputs <= 4 && "At most 4 combined inputs");
bool MergeFromLo = LoV1Inputs.size() + LoV2Inputs.size() >=
HiV1Inputs.size() + HiV2Inputs.size();
auto moveInputsToHalf = [&](SDValue V, ArrayRef<int> LoInputs,
ArrayRef<int> HiInputs, bool MoveToLo,
int MaskOffset) {
ArrayRef<int> GoodInputs = MoveToLo ? LoInputs : HiInputs;
ArrayRef<int> BadInputs = MoveToLo ? HiInputs : LoInputs;
if (BadInputs.empty())
return V;
int MoveMask[] = {-1, -1, -1, -1, -1, -1, -1, -1};
int MoveOffset = MoveToLo ? 0 : 4;
if (GoodInputs.empty()) {
for (int BadInput : BadInputs) {
MoveMask[Mask[BadInput] % 4 + MoveOffset] = Mask[BadInput] - MaskOffset;
Mask[BadInput] = Mask[BadInput] % 4 + MoveOffset + MaskOffset;
}
} else {
if (GoodInputs.size() == 2) {
MoveMask[MoveOffset] = Mask[GoodInputs[0]] - MaskOffset;
MoveMask[MoveOffset + 1] = Mask[GoodInputs[1]] - MaskOffset;
Mask[GoodInputs[0]] = MoveOffset + MaskOffset;
Mask[GoodInputs[1]] = MoveOffset + 1 + MaskOffset;
} else {
for (int GoodInput : GoodInputs)
MoveMask[Mask[GoodInput] - MaskOffset] = Mask[GoodInput] - MaskOffset;
}
if (BadInputs.size() == 2) {
int GoodMaskIdx =
std::find_if(std::begin(MoveMask) + MoveOffset, std::end(MoveMask),
[](int M) { return M >= 0; }) -
std::begin(MoveMask);
int MoveMaskIdx =
((((GoodMaskIdx - MoveOffset) & ~1) + 2) % 4) + MoveOffset;
assert(MoveMask[MoveMaskIdx] == -1 && "Expected empty slot");
assert(MoveMask[MoveMaskIdx + 1] == -1 && "Expected empty slot");
MoveMask[MoveMaskIdx] = Mask[BadInputs[0]] - MaskOffset;
MoveMask[MoveMaskIdx + 1] = Mask[BadInputs[1]] - MaskOffset;
Mask[BadInputs[0]] = MoveMaskIdx + MaskOffset;
Mask[BadInputs[1]] = MoveMaskIdx + 1 + MaskOffset;
} else {
assert(BadInputs.size() == 1 && "All sizes handled");
int MoveMaskIdx = std::find(std::begin(MoveMask) + MoveOffset,
std::end(MoveMask), -1) -
std::begin(MoveMask);
MoveMask[MoveMaskIdx] = Mask[BadInputs[0]] - MaskOffset;
Mask[BadInputs[0]] = MoveMaskIdx + MaskOffset;
}
}
return DAG.getVectorShuffle(MVT::v8i16, DL, V, DAG.getUNDEF(MVT::v8i16),
MoveMask);
};
V1 = moveInputsToHalf(V1, LoV1Inputs, HiV1Inputs, MergeFromLo,
0);
V2 = moveInputsToHalf(V2, LoV2Inputs, HiV2Inputs, MergeFromLo,
8);
for (int &M : Mask)
if (M != -1)
M = 2 * (M % 4) + (M / 8);
return DAG.getVectorShuffle(
MVT::v8i16, DL, DAG.getNode(MergeFromLo ? X86ISD::UNPCKL : X86ISD::UNPCKH,
DL, MVT::v8i16, V1, V2),
DAG.getUNDEF(MVT::v8i16), Mask);
}
static SDValue lowerV8I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
SDLoc DL(Op);
assert(Op.getSimpleValueType() == MVT::v8i16 && "Bad shuffle type!");
assert(V1.getSimpleValueType() == MVT::v8i16 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v8i16 && "Bad operand type!");
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
ArrayRef<int> OrigMask = SVOp->getMask();
int MaskStorage[8] = {OrigMask[0], OrigMask[1], OrigMask[2], OrigMask[3],
OrigMask[4], OrigMask[5], OrigMask[6], OrigMask[7]};
MutableArrayRef<int> Mask(MaskStorage);
assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(
DL, MVT::v8i16, V1, V2, OrigMask, Subtarget, DAG))
return ZExt;
auto isV1 = [](int M) { return M >= 0 && M < 8; };
auto isV2 = [](int M) { return M >= 8; };
int NumV1Inputs = std::count_if(Mask.begin(), Mask.end(), isV1);
int NumV2Inputs = std::count_if(Mask.begin(), Mask.end(), isV2);
if (NumV2Inputs == 0)
return lowerV8I16SingleInputVectorShuffle(DL, V1, Mask, Subtarget, DAG);
assert(NumV1Inputs > 0 && "All single-input shuffles should be canonicalized "
"to be V1-input shuffles.");
if (SDValue Shift = lowerVectorShuffleAsBitShift(
DL, MVT::v8i16, V1, V2, Mask, DAG))
return Shift;
if (SDValue Shift = lowerVectorShuffleAsByteShift(
DL, MVT::v8i16, V1, V2, Mask, DAG))
return Shift;
if (NumV2Inputs == 1)
if (SDValue V = lowerVectorShuffleAsElementInsertion(MVT::v8i16, DL, V1, V2,
Mask, Subtarget, DAG))
return V;
if (Subtarget->hasSSE41())
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8i16, V1, V2, Mask,
Subtarget, DAG))
return Blend;
if (SDValue Masked =
lowerVectorShuffleAsBitMask(DL, MVT::v8i16, V1, V2, Mask, DAG))
return Masked;
if (isShuffleEquivalent(Mask, 0, 8, 1, 9, 2, 10, 3, 11))
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i16, V1, V2);
if (isShuffleEquivalent(Mask, 4, 12, 5, 13, 6, 14, 7, 15))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i16, V1, V2);
if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
DL, MVT::v8i16, V1, V2, Mask, Subtarget, DAG))
return Rotate;
if (NumV1Inputs + NumV2Inputs <= 4)
return lowerV8I16BasicBlendVectorShuffle(DL, V1, V2, Mask, Subtarget, DAG);
if (shouldLowerAsInterleaving(Mask)) {
int EMask[8], OMask[8];
for (int i = 0; i < 4; ++i) {
EMask[i] = Mask[2*i];
OMask[i] = Mask[2*i + 1];
EMask[i + 4] = -1;
OMask[i + 4] = -1;
}
SDValue Evens = DAG.getVectorShuffle(MVT::v8i16, DL, V1, V2, EMask);
SDValue Odds = DAG.getVectorShuffle(MVT::v8i16, DL, V1, V2, OMask);
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i16, Evens, Odds);
}
int LoBlendMask[8] = {-1, -1, -1, -1, -1, -1, -1, -1};
int HiBlendMask[8] = {-1, -1, -1, -1, -1, -1, -1, -1};
for (int i = 0; i < 4; ++i) {
LoBlendMask[i] = Mask[i];
HiBlendMask[i] = Mask[i + 4];
}
SDValue LoV = DAG.getVectorShuffle(MVT::v8i16, DL, V1, V2, LoBlendMask);
SDValue HiV = DAG.getVectorShuffle(MVT::v8i16, DL, V1, V2, HiBlendMask);
LoV = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, LoV);
HiV = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, HiV);
return DAG.getNode(ISD::BITCAST, DL, MVT::v8i16,
DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2i64, LoV, HiV));
}
static int canLowerByDroppingEvenElements(ArrayRef<int> Mask) {
bool IsSingleInput = isSingleInputShuffleMask(Mask);
int ShuffleModulus = Mask.size() * (IsSingleInput ? 1 : 2);
assert(isPowerOf2_32((uint32_t)ShuffleModulus) &&
"We should only be called with masks with a power-of-2 size!");
uint64_t ModMask = (uint64_t)ShuffleModulus - 1;
bool ViableForN[3] = {true, true, true};
for (int i = 0, e = Mask.size(); i < e; ++i) {
if (Mask[i] == -1)
continue;
bool IsAnyViable = false;
for (unsigned j = 0; j != array_lengthof(ViableForN); ++j)
if (ViableForN[j]) {
uint64_t N = j + 1;
if ((uint64_t)Mask[i] == (((uint64_t)i << N) & ModMask))
IsAnyViable = true;
else
ViableForN[j] = false;
}
if (!IsAnyViable)
break;
}
for (unsigned j = 0; j != array_lengthof(ViableForN); ++j)
if (ViableForN[j])
return j + 1;
return 0;
}
static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
SDLoc DL(Op);
assert(Op.getSimpleValueType() == MVT::v16i8 && "Bad shuffle type!");
assert(V1.getSimpleValueType() == MVT::v16i8 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v16i8 && "Bad operand type!");
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
ArrayRef<int> OrigMask = SVOp->getMask();
assert(OrigMask.size() == 16 && "Unexpected mask size for v16 shuffle!");
if (SDValue Shift = lowerVectorShuffleAsBitShift(
DL, MVT::v16i8, V1, V2, OrigMask, DAG))
return Shift;
if (SDValue Shift = lowerVectorShuffleAsByteShift(
DL, MVT::v16i8, V1, V2, OrigMask, DAG))
return Shift;
if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
DL, MVT::v16i8, V1, V2, OrigMask, Subtarget, DAG))
return Rotate;
if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(
DL, MVT::v16i8, V1, V2, OrigMask, Subtarget, DAG))
return ZExt;
int MaskStorage[16] = {
OrigMask[0], OrigMask[1], OrigMask[2], OrigMask[3],
OrigMask[4], OrigMask[5], OrigMask[6], OrigMask[7],
OrigMask[8], OrigMask[9], OrigMask[10], OrigMask[11],
OrigMask[12], OrigMask[13], OrigMask[14], OrigMask[15]};
MutableArrayRef<int> Mask(MaskStorage);
MutableArrayRef<int> LoMask = Mask.slice(0, 8);
MutableArrayRef<int> HiMask = Mask.slice(8, 8);
int NumV2Elements =
std::count_if(Mask.begin(), Mask.end(), [](int M) { return M >= 16; });
if (NumV2Elements == 0) {
if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(MVT::v16i8, DL, V1,
Mask, Subtarget, DAG))
return Broadcast;
auto canWidenViaDuplication = [](ArrayRef<int> Mask) {
for (int i = 0; i < 16; i += 2)
if (Mask[i] != -1 && Mask[i + 1] != -1 && Mask[i] != Mask[i + 1])
return false;
return true;
};
auto tryToWidenViaDuplication = [&]() -> SDValue {
if (!canWidenViaDuplication(Mask))
return SDValue();
SmallVector<int, 4> LoInputs;
std::copy_if(Mask.begin(), Mask.end(), std::back_inserter(LoInputs),
[](int M) { return M >= 0 && M < 8; });
std::sort(LoInputs.begin(), LoInputs.end());
LoInputs.erase(std::unique(LoInputs.begin(), LoInputs.end()),
LoInputs.end());
SmallVector<int, 4> HiInputs;
std::copy_if(Mask.begin(), Mask.end(), std::back_inserter(HiInputs),
[](int M) { return M >= 8; });
std::sort(HiInputs.begin(), HiInputs.end());
HiInputs.erase(std::unique(HiInputs.begin(), HiInputs.end()),
HiInputs.end());
bool TargetLo = LoInputs.size() >= HiInputs.size();
ArrayRef<int> InPlaceInputs = TargetLo ? LoInputs : HiInputs;
ArrayRef<int> MovingInputs = TargetLo ? HiInputs : LoInputs;
int PreDupI16Shuffle[] = {-1, -1, -1, -1, -1, -1, -1, -1};
SmallDenseMap<int, int, 8> LaneMap;
for (int I : InPlaceInputs) {
PreDupI16Shuffle[I/2] = I/2;
LaneMap[I] = I;
}
int j = TargetLo ? 0 : 4, je = j + 4;
for (int i = 0, ie = MovingInputs.size(); i < ie; ++i) {
if (PreDupI16Shuffle[j] != MovingInputs[i] / 2) {
while (j < je && PreDupI16Shuffle[j] != -1)
++j;
if (j == je)
return SDValue();
PreDupI16Shuffle[j] = MovingInputs[i] / 2;
}
LaneMap[MovingInputs[i]] = 2 * j + MovingInputs[i] % 2;
}
V1 = DAG.getNode(
ISD::BITCAST, DL, MVT::v16i8,
DAG.getVectorShuffle(MVT::v8i16, DL,
DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V1),
DAG.getUNDEF(MVT::v8i16), PreDupI16Shuffle));
V1 = DAG.getNode(TargetLo ? X86ISD::UNPCKL : X86ISD::UNPCKH, DL,
MVT::v16i8, V1, V1);
int PostDupI16Shuffle[8] = {-1, -1, -1, -1, -1, -1, -1, -1};
for (int i = 0; i < 16; ++i)
if (Mask[i] != -1) {
int MappedMask = LaneMap[Mask[i]] - (TargetLo ? 0 : 8);
assert(MappedMask < 8 && "Invalid v8 shuffle mask!");
if (PostDupI16Shuffle[i / 2] == -1)
PostDupI16Shuffle[i / 2] = MappedMask;
else
assert(PostDupI16Shuffle[i / 2] == MappedMask &&
"Conflicting entrties in the original shuffle!");
}
return DAG.getNode(
ISD::BITCAST, DL, MVT::v16i8,
DAG.getVectorShuffle(MVT::v8i16, DL,
DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V1),
DAG.getUNDEF(MVT::v8i16), PostDupI16Shuffle));
};
if (SDValue V = tryToWidenViaDuplication())
return V;
}
if (shouldLowerAsInterleaving(Mask)) {
int NumLoHalf = std::count_if(Mask.begin(), Mask.end(), [](int M) {
return (M >= 0 && M < 8) || (M >= 16 && M < 24);
});
int NumHiHalf = std::count_if(Mask.begin(), Mask.end(), [](int M) {
return (M >= 8 && M < 16) || M >= 24;
});
int EMask[16] = {-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1};
int OMask[16] = {-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1};
bool UnpackLo = NumLoHalf >= NumHiHalf;
MutableArrayRef<int> TargetEMask(UnpackLo ? EMask : EMask + 8, 8);
MutableArrayRef<int> TargetOMask(UnpackLo ? OMask : OMask + 8, 8);
for (int i = 0; i < 8; ++i) {
TargetEMask[i] = Mask[2 * i];
TargetOMask[i] = Mask[2 * i + 1];
}
SDValue Evens = DAG.getVectorShuffle(MVT::v16i8, DL, V1, V2, EMask);
SDValue Odds = DAG.getVectorShuffle(MVT::v16i8, DL, V1, V2, OMask);
return DAG.getNode(UnpackLo ? X86ISD::UNPCKL : X86ISD::UNPCKH, DL,
MVT::v16i8, Evens, Odds);
}
if (Subtarget->hasSSSE3()) {
SDValue V1Mask[16];
SDValue V2Mask[16];
bool V1InUse = false;
bool V2InUse = false;
SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
for (int i = 0; i < 16; ++i) {
if (Mask[i] == -1) {
V1Mask[i] = V2Mask[i] = DAG.getUNDEF(MVT::i8);
} else {
const int ZeroMask = 0x80;
int V1Idx = (Mask[i] < 16 ? Mask[i] : ZeroMask);
int V2Idx = (Mask[i] < 16 ? ZeroMask : Mask[i] - 16);
if (Zeroable[i])
V1Idx = V2Idx = ZeroMask;
V1Mask[i] = DAG.getConstant(V1Idx, MVT::i8);
V2Mask[i] = DAG.getConstant(V2Idx, MVT::i8);
V1InUse |= (ZeroMask != V1Idx);
V2InUse |= (ZeroMask != V2Idx);
}
}
if (V1InUse)
V1 = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, V1,
DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v16i8, V1Mask));
if (V2InUse)
V2 = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, V2,
DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v16i8, V2Mask));
if (V1InUse && V2InUse)
return DAG.getNode(ISD::OR, DL, MVT::v16i8, V1, V2);
if (V1InUse)
return V1; if (V2InUse)
return V2; return getZeroVector(MVT::v16i8, Subtarget, DAG, DL);
}
if (NumV2Elements == 1)
if (SDValue V = lowerVectorShuffleAsElementInsertion(MVT::v16i8, DL, V1, V2,
Mask, Subtarget, DAG))
return V;
if (int NumEvenDrops = canLowerByDroppingEvenElements(Mask)) {
bool IsSingleInput = isSingleInputShuffleMask(Mask);
assert(NumEvenDrops <= 3 &&
"No support for dropping even elements more than 3 times.");
MVT MaskVTs[] = { MVT::v8i16, MVT::v4i32, MVT::v2i64 };
SDValue ByteClearMask =
DAG.getNode(ISD::BITCAST, DL, MVT::v16i8,
DAG.getConstant(0xFF, MaskVTs[NumEvenDrops - 1]));
V1 = DAG.getNode(ISD::AND, DL, MVT::v16i8, V1, ByteClearMask);
if (!IsSingleInput)
V2 = DAG.getNode(ISD::AND, DL, MVT::v16i8, V2, ByteClearMask);
V1 = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V1);
V2 = IsSingleInput ? V1 : DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V2);
SDValue Result = DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8, V1, V2);
for (int i = 1; i < NumEvenDrops; ++i) {
Result = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, Result);
Result = DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8, Result, Result);
}
return Result;
}
int V1LoBlendMask[8] = {-1, -1, -1, -1, -1, -1, -1, -1};
int V1HiBlendMask[8] = {-1, -1, -1, -1, -1, -1, -1, -1};
int V2LoBlendMask[8] = {-1, -1, -1, -1, -1, -1, -1, -1};
int V2HiBlendMask[8] = {-1, -1, -1, -1, -1, -1, -1, -1};
auto buildBlendMasks = [](MutableArrayRef<int> HalfMask,
MutableArrayRef<int> V1HalfBlendMask,
MutableArrayRef<int> V2HalfBlendMask) {
for (int i = 0; i < 8; ++i)
if (HalfMask[i] >= 0 && HalfMask[i] < 16) {
V1HalfBlendMask[i] = HalfMask[i];
HalfMask[i] = i;
} else if (HalfMask[i] >= 16) {
V2HalfBlendMask[i] = HalfMask[i] - 16;
HalfMask[i] = i + 8;
}
};
buildBlendMasks(LoMask, V1LoBlendMask, V2LoBlendMask);
buildBlendMasks(HiMask, V1HiBlendMask, V2HiBlendMask);
SDValue Zero = getZeroVector(MVT::v8i16, Subtarget, DAG, DL);
auto buildLoAndHiV8s = [&](SDValue V, MutableArrayRef<int> LoBlendMask,
MutableArrayRef<int> HiBlendMask) {
SDValue V1, V2;
if (std::none_of(LoBlendMask.begin(), LoBlendMask.end(),
[](int M) { return M >= 0 && M % 2 == 1; }) &&
std::none_of(HiBlendMask.begin(), HiBlendMask.end(),
[](int M) { return M >= 0 && M % 2 == 1; })) {
V1 = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V);
V1 = DAG.getNode(ISD::AND, DL, MVT::v8i16, V1,
DAG.getConstant(0x00FF, MVT::v8i16));
V2 = DAG.getUNDEF(MVT::v8i16);
for (int &M : LoBlendMask)
if (M >= 0)
M /= 2;
for (int &M : HiBlendMask)
if (M >= 0)
M /= 2;
} else {
V1 = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16,
DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i8, V, Zero));
V2 = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16,
DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i8, V, Zero));
}
SDValue BlendedLo = DAG.getVectorShuffle(MVT::v8i16, DL, V1, V2, LoBlendMask);
SDValue BlendedHi = DAG.getVectorShuffle(MVT::v8i16, DL, V1, V2, HiBlendMask);
return std::make_pair(BlendedLo, BlendedHi);
};
SDValue V1Lo, V1Hi, V2Lo, V2Hi;
std::tie(V1Lo, V1Hi) = buildLoAndHiV8s(V1, V1LoBlendMask, V1HiBlendMask);
std::tie(V2Lo, V2Hi) = buildLoAndHiV8s(V2, V2LoBlendMask, V2HiBlendMask);
SDValue LoV = DAG.getVectorShuffle(MVT::v8i16, DL, V1Lo, V2Lo, LoMask);
SDValue HiV = DAG.getVectorShuffle(MVT::v8i16, DL, V1Hi, V2Hi, HiMask);
return DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8, LoV, HiV);
}
static SDValue lower128BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2,
MVT VT, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
switch (VT.SimpleTy) {
case MVT::v2i64:
return lowerV2I64VectorShuffle(Op, V1, V2, Subtarget, DAG);
case MVT::v2f64:
return lowerV2F64VectorShuffle(Op, V1, V2, Subtarget, DAG);
case MVT::v4i32:
return lowerV4I32VectorShuffle(Op, V1, V2, Subtarget, DAG);
case MVT::v4f32:
return lowerV4F32VectorShuffle(Op, V1, V2, Subtarget, DAG);
case MVT::v8i16:
return lowerV8I16VectorShuffle(Op, V1, V2, Subtarget, DAG);
case MVT::v16i8:
return lowerV16I8VectorShuffle(Op, V1, V2, Subtarget, DAG);
default:
llvm_unreachable("Unimplemented!");
}
}
static bool canWidenShuffleElements(ArrayRef<int> Mask,
SmallVectorImpl<int> &WidenedMask) {
for (int i = 0, Size = Mask.size(); i < Size; i += 2) {
if (Mask[i] == SM_SentinelUndef && Mask[i + 1] == SM_SentinelUndef) {
WidenedMask.push_back(SM_SentinelUndef);
continue;
}
if (Mask[i] == SM_SentinelUndef && Mask[i + 1] >= 0 && Mask[i + 1] % 2 == 1) {
WidenedMask.push_back(Mask[i + 1] / 2);
continue;
}
if (Mask[i + 1] == SM_SentinelUndef && Mask[i] >= 0 && Mask[i] % 2 == 0) {
WidenedMask.push_back(Mask[i] / 2);
continue;
}
if (Mask[i] == SM_SentinelZero || Mask[i + 1] == SM_SentinelZero) {
if ((Mask[i] == SM_SentinelZero || Mask[i] == SM_SentinelUndef) &&
(Mask[i + 1] == SM_SentinelZero || Mask[i + 1] == SM_SentinelUndef)) {
WidenedMask.push_back(SM_SentinelZero);
continue;
}
return false;
}
if (Mask[i] != SM_SentinelUndef && Mask[i] % 2 == 0 && Mask[i] + 1 == Mask[i + 1]) {
WidenedMask.push_back(Mask[i] / 2);
continue;
}
return false;
}
assert(WidenedMask.size() == Mask.size() / 2 &&
"Incorrect size of mask after widening the elements!");
return true;
}
static SDValue splitAndLowerVectorShuffle(SDLoc DL, MVT VT, SDValue V1,
SDValue V2, ArrayRef<int> Mask,
SelectionDAG &DAG) {
assert(VT.getSizeInBits() >= 256 &&
"Only for 256-bit or wider vector shuffles!");
assert(V1.getSimpleValueType() == VT && "Bad operand type!");
assert(V2.getSimpleValueType() == VT && "Bad operand type!");
ArrayRef<int> LoMask = Mask.slice(0, Mask.size() / 2);
ArrayRef<int> HiMask = Mask.slice(Mask.size() / 2);
int NumElements = VT.getVectorNumElements();
int SplitNumElements = NumElements / 2;
MVT ScalarVT = VT.getScalarType();
MVT SplitVT = MVT::getVectorVT(ScalarVT, NumElements / 2);
SDValue LoV1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, V1,
DAG.getIntPtrConstant(0));
SDValue HiV1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, V1,
DAG.getIntPtrConstant(SplitNumElements));
SDValue LoV2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, V2,
DAG.getIntPtrConstant(0));
SDValue HiV2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, V2,
DAG.getIntPtrConstant(SplitNumElements));
auto HalfBlend = [&](ArrayRef<int> HalfMask) {
bool UseLoV1 = false, UseHiV1 = false, UseLoV2 = false, UseHiV2 = false;
SmallVector<int, 32> V1BlendMask, V2BlendMask, BlendMask;
for (int i = 0; i < SplitNumElements; ++i) {
int M = HalfMask[i];
if (M >= NumElements) {
if (M >= NumElements + SplitNumElements)
UseHiV2 = true;
else
UseLoV2 = true;
V2BlendMask.push_back(M - NumElements);
V1BlendMask.push_back(-1);
BlendMask.push_back(SplitNumElements + i);
} else if (M >= 0) {
if (M >= SplitNumElements)
UseHiV1 = true;
else
UseLoV1 = true;
V2BlendMask.push_back(-1);
V1BlendMask.push_back(M);
BlendMask.push_back(i);
} else {
V2BlendMask.push_back(-1);
V1BlendMask.push_back(-1);
BlendMask.push_back(-1);
}
}
if (!UseLoV1 && !UseHiV1 && !UseLoV2 && !UseHiV2)
return DAG.getUNDEF(SplitVT);
if (!UseLoV2 && !UseHiV2)
return DAG.getVectorShuffle(SplitVT, DL, LoV1, HiV1, V1BlendMask);
if (!UseLoV1 && !UseHiV1)
return DAG.getVectorShuffle(SplitVT, DL, LoV2, HiV2, V2BlendMask);
SDValue V1Blend, V2Blend;
if (UseLoV1 && UseHiV1) {
V1Blend =
DAG.getVectorShuffle(SplitVT, DL, LoV1, HiV1, V1BlendMask);
} else {
V1Blend = UseLoV1 ? LoV1 : HiV1;
for (int i = 0; i < SplitNumElements; ++i)
if (BlendMask[i] >= 0 && BlendMask[i] < SplitNumElements)
BlendMask[i] = V1BlendMask[i] - (UseLoV1 ? 0 : SplitNumElements);
}
if (UseLoV2 && UseHiV2) {
V2Blend =
DAG.getVectorShuffle(SplitVT, DL, LoV2, HiV2, V2BlendMask);
} else {
V2Blend = UseLoV2 ? LoV2 : HiV2;
for (int i = 0; i < SplitNumElements; ++i)
if (BlendMask[i] >= SplitNumElements)
BlendMask[i] = V2BlendMask[i] + (UseLoV2 ? SplitNumElements : 0);
}
return DAG.getVectorShuffle(SplitVT, DL, V1Blend, V2Blend, BlendMask);
};
SDValue Lo = HalfBlend(LoMask);
SDValue Hi = HalfBlend(HiMask);
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
}
static SDValue lowerVectorShuffleAsSplitOrBlend(SDLoc DL, MVT VT, SDValue V1,
SDValue V2, ArrayRef<int> Mask,
SelectionDAG &DAG) {
assert(!isSingleInputShuffleMask(Mask) && "This routine must not be used to "
"lower single-input shuffles as it "
"could then recurse on itself.");
int Size = Mask.size();
auto DoBothBroadcast = [&] {
int V1BroadcastIdx = -1, V2BroadcastIdx = -1;
for (int M : Mask)
if (M >= Size) {
if (V2BroadcastIdx == -1)
V2BroadcastIdx = M - Size;
else if (M - Size != V2BroadcastIdx)
return false;
} else if (M >= 0) {
if (V1BroadcastIdx == -1)
V1BroadcastIdx = M;
else if (M != V1BroadcastIdx)
return false;
}
return true;
};
if (DoBothBroadcast())
return lowerVectorShuffleAsDecomposedShuffleBlend(DL, VT, V1, V2, Mask,
DAG);
int LaneCount = VT.getSizeInBits() / 128;
int LaneSize = Size / LaneCount;
SmallBitVector LaneInputs[2];
LaneInputs[0].resize(LaneCount, false);
LaneInputs[1].resize(LaneCount, false);
for (int i = 0; i < Size; ++i)
if (Mask[i] >= 0)
LaneInputs[Mask[i] / Size][(Mask[i] % Size) / LaneSize] = true;
if (LaneInputs[0].count() <= 1 && LaneInputs[1].count() <= 1)
return splitAndLowerVectorShuffle(DL, VT, V1, V2, Mask, DAG);
return lowerVectorShuffleAsDecomposedShuffleBlend(DL, VT, V1, V2, Mask, DAG);
}
static SDValue lowerVectorShuffleAsLanePermuteAndBlend(SDLoc DL, MVT VT,
SDValue V1, SDValue V2,
ArrayRef<int> Mask,
SelectionDAG &DAG) {
assert(VT.getSizeInBits() == 256 && "Only for 256-bit vector shuffles!");
int LaneSize = Mask.size() / 2;
bool LaneCrossing[2] = {false, false};
for (int i = 0, Size = Mask.size(); i < Size; ++i)
if (Mask[i] >= 0 && (Mask[i] % Size) / LaneSize != i / LaneSize)
LaneCrossing[(Mask[i] % Size) / LaneSize] = true;
if (!LaneCrossing[0] || !LaneCrossing[1])
return splitAndLowerVectorShuffle(DL, VT, V1, V2, Mask, DAG);
if (isSingleInputShuffleMask(Mask)) {
SmallVector<int, 32> FlippedBlendMask;
for (int i = 0, Size = Mask.size(); i < Size; ++i)
FlippedBlendMask.push_back(
Mask[i] < 0 ? -1 : (((Mask[i] % Size) / LaneSize == i / LaneSize)
? Mask[i]
: Mask[i] % LaneSize +
(i / LaneSize) * LaneSize + Size));
unsigned PERMMask = 3 | 2 << 4;
SDValue Flipped = DAG.getNode(X86ISD::VPERM2X128, DL, VT, DAG.getUNDEF(VT),
V1, DAG.getConstant(PERMMask, MVT::i8));
return DAG.getVectorShuffle(VT, DL, V1, Flipped, FlippedBlendMask);
}
return lowerVectorShuffleAsDecomposedShuffleBlend(DL, VT, V1, V2, Mask, DAG);
}
static SDValue lowerV2X128VectorShuffle(SDLoc DL, MVT VT, SDValue V1,
SDValue V2, ArrayRef<int> Mask,
const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, VT, V1, V2, Mask,
Subtarget, DAG))
return Blend;
MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(),
VT.getVectorNumElements() / 2);
if (isShuffleEquivalent(Mask, 0, 1, 0, 1) ||
isShuffleEquivalent(Mask, 0, 1, 4, 5)) {
SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1,
DAG.getIntPtrConstant(0));
SDValue HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT,
Mask[2] < 4 ? V1 : V2, DAG.getIntPtrConstant(0));
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LoV, HiV);
}
if (isShuffleEquivalent(Mask, 0, 1, 6, 7)) {
SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1,
DAG.getIntPtrConstant(0));
SDValue HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V2,
DAG.getIntPtrConstant(2));
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LoV, HiV);
}
unsigned PermMask = Mask[0] / 2 | (Mask[2] / 2) << 4;
return DAG.getNode(X86ISD::VPERM2X128, DL, VT, V1, V2,
DAG.getConstant(PermMask, MVT::i8));
}
static SDValue lowerVectorShuffleByMerging128BitLanes(
SDLoc DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
const X86Subtarget *Subtarget, SelectionDAG &DAG) {
assert(!isSingleInputShuffleMask(Mask) &&
"This is only useful with multiple inputs.");
int Size = Mask.size();
int LaneSize = 128 / VT.getScalarSizeInBits();
int NumLanes = Size / LaneSize;
assert(NumLanes > 1 && "Only handles 256-bit and wider shuffles.");
SmallVector<int, 4> Lanes;
Lanes.resize(NumLanes, -1);
SmallVector<int, 4> InLaneMask;
InLaneMask.resize(LaneSize, -1);
for (int i = 0; i < Size; ++i) {
if (Mask[i] < 0)
continue;
int j = i / LaneSize;
if (Lanes[j] < 0) {
Lanes[j] = Mask[i] / LaneSize;
} else if (Lanes[j] != Mask[i] / LaneSize) {
return SDValue();
}
int k = i % LaneSize;
if (InLaneMask[k] < 0) {
InLaneMask[k] = Mask[i] % LaneSize;
} else if (InLaneMask[k] != Mask[i] % LaneSize) {
return SDValue();
}
}
MVT LaneVT = MVT::getVectorVT(VT.isFloatingPoint() ? MVT::f64 : MVT::i64,
VT.getSizeInBits() / 64);
SmallVector<int, 8> LaneMask;
LaneMask.resize(NumLanes * 2, -1);
for (int i = 0; i < NumLanes; ++i)
if (Lanes[i] >= 0) {
LaneMask[2 * i + 0] = 2*Lanes[i] + 0;
LaneMask[2 * i + 1] = 2*Lanes[i] + 1;
}
V1 = DAG.getNode(ISD::BITCAST, DL, LaneVT, V1);
V2 = DAG.getNode(ISD::BITCAST, DL, LaneVT, V2);
SDValue LaneShuffle = DAG.getVectorShuffle(LaneVT, DL, V1, V2, LaneMask);
LaneShuffle = DAG.getNode(ISD::BITCAST, DL, VT, LaneShuffle);
SmallVector<int, 8> NewMask;
NewMask.resize(Size, -1);
for (int i = 0; i < Size; ++i)
if (Mask[i] >= 0)
NewMask[i] = (i / LaneSize) * LaneSize + Mask[i] % LaneSize;
assert(!is128BitLaneCrossingShuffleMask(VT, NewMask) &&
"Must not introduce lane crosses at this point!");
return DAG.getVectorShuffle(VT, DL, LaneShuffle, DAG.getUNDEF(VT), NewMask);
}
static bool isShuffleMaskInputInPlace(int Input, ArrayRef<int> Mask) {
assert((Input == 0 || Input == 1) && "Only two inputs to shuffles.");
int Size = Mask.size();
for (int i = 0; i < Size; ++i)
if (Mask[i] >= 0 && Mask[i] / Size == Input && Mask[i] % Size != i)
return false;
return true;
}
static SDValue lowerV4F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
SDLoc DL(Op);
assert(V1.getSimpleValueType() == MVT::v4f64 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v4f64 && "Bad operand type!");
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
ArrayRef<int> Mask = SVOp->getMask();
assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
SmallVector<int, 4> WidenedMask;
if (canWidenShuffleElements(Mask, WidenedMask))
return lowerV2X128VectorShuffle(DL, MVT::v4f64, V1, V2, Mask, Subtarget,
DAG);
if (isSingleInputShuffleMask(Mask)) {
if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(MVT::v4f64, DL, V1,
Mask, Subtarget, DAG))
return Broadcast;
if (isShuffleEquivalent(Mask, 0, 0, 2, 2))
return DAG.getNode(X86ISD::MOVDDUP, DL, MVT::v4f64, V1);
if (!is128BitLaneCrossingShuffleMask(MVT::v4f64, Mask)) {
unsigned VPERMILPMask = (Mask[0] == 1) | ((Mask[1] == 1) << 1) |
((Mask[2] == 3) << 2) | ((Mask[3] == 3) << 3);
return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v4f64, V1,
DAG.getConstant(VPERMILPMask, MVT::i8));
}
if (Subtarget->hasAVX2())
return DAG.getNode(X86ISD::VPERMI, DL, MVT::v4f64, V1,
getV4X86ShuffleImm8ForMask(Mask, DAG));
return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v4f64, V1, V2, Mask,
DAG);
}
if (isShuffleEquivalent(Mask, 0, 4, 2, 6))
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4f64, V1, V2);
if (isShuffleEquivalent(Mask, 1, 5, 3, 7))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f64, V1, V2);
int NumV2Elements =
std::count_if(Mask.begin(), Mask.end(), [](int M) { return M >= 4; });
if (NumV2Elements == 1 && Mask[0] >= 4)
if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
MVT::v4f64, DL, V1, V2, Mask, Subtarget, DAG))
return Insertion;
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4f64, V1, V2, Mask,
Subtarget, DAG))
return Blend;
if ((Mask[0] == -1 || Mask[0] < 2) &&
(Mask[1] == -1 || (Mask[1] >= 4 && Mask[1] < 6)) &&
(Mask[2] == -1 || (Mask[2] >= 2 && Mask[2] < 4)) &&
(Mask[3] == -1 || Mask[3] >= 6)) {
unsigned SHUFPDMask = (Mask[0] == 1) | ((Mask[1] == 5) << 1) |
((Mask[2] == 3) << 2) | ((Mask[3] == 7) << 3);
return DAG.getNode(X86ISD::SHUFP, DL, MVT::v4f64, V1, V2,
DAG.getConstant(SHUFPDMask, MVT::i8));
}
if ((Mask[0] == -1 || (Mask[0] >= 4 && Mask[0] < 6)) &&
(Mask[1] == -1 || Mask[1] < 2) &&
(Mask[2] == -1 || Mask[2] >= 6) &&
(Mask[3] == -1 || (Mask[3] >= 2 && Mask[3] < 4))) {
unsigned SHUFPDMask = (Mask[0] == 5) | ((Mask[1] == 1) << 1) |
((Mask[2] == 7) << 2) | ((Mask[3] == 3) << 3);
return DAG.getNode(X86ISD::SHUFP, DL, MVT::v4f64, V2, V1,
DAG.getConstant(SHUFPDMask, MVT::i8));
}
if (!(Subtarget->hasAVX2() && (isShuffleMaskInputInPlace(0, Mask) ||
isShuffleMaskInputInPlace(1, Mask))))
if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG))
return Result;
if (Subtarget->hasAVX2())
return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v4f64, V1, V2,
Mask, DAG);
return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v4f64, V1, V2, Mask, DAG);
}
static SDValue lowerV4I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
SDLoc DL(Op);
assert(V1.getSimpleValueType() == MVT::v4i64 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v4i64 && "Bad operand type!");
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
ArrayRef<int> Mask = SVOp->getMask();
assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
assert(Subtarget->hasAVX2() && "We can only lower v4i64 with AVX2!");
SmallVector<int, 4> WidenedMask;
if (canWidenShuffleElements(Mask, WidenedMask))
return lowerV2X128VectorShuffle(DL, MVT::v4i64, V1, V2, Mask, Subtarget,
DAG);
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4i64, V1, V2, Mask,
Subtarget, DAG))
return Blend;
if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(MVT::v4i64, DL, V1,
Mask, Subtarget, DAG))
return Broadcast;
SmallVector<int, 2> RepeatedMask;
if (is128BitLaneRepeatedShuffleMask(MVT::v4i64, Mask, RepeatedMask)) {
if (isSingleInputShuffleMask(Mask)) {
int PSHUFDMask[] = {-1, -1, -1, -1};
for (int i = 0; i < 2; ++i)
if (RepeatedMask[i] >= 0) {
PSHUFDMask[2 * i] = 2 * RepeatedMask[i];
PSHUFDMask[2 * i + 1] = 2 * RepeatedMask[i] + 1;
}
return DAG.getNode(
ISD::BITCAST, DL, MVT::v4i64,
DAG.getNode(X86ISD::PSHUFD, DL, MVT::v8i32,
DAG.getNode(ISD::BITCAST, DL, MVT::v8i32, V1),
getV4X86ShuffleImm8ForMask(PSHUFDMask, DAG)));
}
if (isShuffleEquivalent(Mask, 0, 4, 2, 6))
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4i64, V1, V2);
if (isShuffleEquivalent(Mask, 1, 5, 3, 7))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4i64, V1, V2);
}
if (isSingleInputShuffleMask(Mask))
return DAG.getNode(X86ISD::VPERMI, DL, MVT::v4i64, V1,
getV4X86ShuffleImm8ForMask(Mask, DAG));
if (!(Subtarget->hasAVX2() && (isShuffleMaskInputInPlace(0, Mask) ||
isShuffleMaskInputInPlace(1, Mask))))
if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
DL, MVT::v4i64, V1, V2, Mask, Subtarget, DAG))
return Result;
return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v4i64, V1, V2,
Mask, DAG);
}
static SDValue lowerV8F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
SDLoc DL(Op);
assert(V1.getSimpleValueType() == MVT::v8f32 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v8f32 && "Bad operand type!");
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
ArrayRef<int> Mask = SVOp->getMask();
assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8f32, V1, V2, Mask,
Subtarget, DAG))
return Blend;
if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(MVT::v8f32, DL, V1,
Mask, Subtarget, DAG))
return Broadcast;
SmallVector<int, 4> RepeatedMask;
if (is128BitLaneRepeatedShuffleMask(MVT::v8f32, Mask, RepeatedMask)) {
assert(RepeatedMask.size() == 4 &&
"Repeated masks must be half the mask width!");
if (isShuffleEquivalent(Mask, 0, 0, 2, 2, 4, 4, 6, 6))
return DAG.getNode(X86ISD::MOVSLDUP, DL, MVT::v8f32, V1);
if (isShuffleEquivalent(Mask, 1, 1, 3, 3, 5, 5, 7, 7))
return DAG.getNode(X86ISD::MOVSHDUP, DL, MVT::v8f32, V1);
if (isSingleInputShuffleMask(Mask))
return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v8f32, V1,
getV4X86ShuffleImm8ForMask(RepeatedMask, DAG));
if (isShuffleEquivalent(Mask, 0, 8, 1, 9, 4, 12, 5, 13))
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8f32, V1, V2);
if (isShuffleEquivalent(Mask, 2, 10, 3, 11, 6, 14, 7, 15))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8f32, V1, V2);
for (int i = 0; i < 4; ++i)
if (RepeatedMask[i] >= 8)
RepeatedMask[i] -= 4;
return lowerVectorShuffleWithSHUFPS(DL, MVT::v8f32, RepeatedMask, V1, V2, DAG);
}
if (isSingleInputShuffleMask(Mask)) {
SDValue VPermMask[8];
for (int i = 0; i < 8; ++i)
VPermMask[i] = Mask[i] < 0 ? DAG.getUNDEF(MVT::i32)
: DAG.getConstant(Mask[i], MVT::i32);
if (!is128BitLaneCrossingShuffleMask(MVT::v8f32, Mask))
return DAG.getNode(
X86ISD::VPERMILPV, DL, MVT::v8f32, V1,
DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i32, VPermMask));
if (Subtarget->hasAVX2())
return DAG.getNode(X86ISD::VPERMV, DL, MVT::v8f32,
DAG.getNode(ISD::BITCAST, DL, MVT::v8f32,
DAG.getNode(ISD::BUILD_VECTOR, DL,
MVT::v8i32, VPermMask)),
V1);
return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v8f32, V1, V2, Mask,
DAG);
}
if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
DL, MVT::v8f32, V1, V2, Mask, Subtarget, DAG))
return Result;
if (Subtarget->hasAVX2())
return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v8f32, V1, V2,
Mask, DAG);
return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v8f32, V1, V2, Mask, DAG);
}
static SDValue lowerV8I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
SDLoc DL(Op);
assert(V1.getSimpleValueType() == MVT::v8i32 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v8i32 && "Bad operand type!");
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
ArrayRef<int> Mask = SVOp->getMask();
assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
assert(Subtarget->hasAVX2() && "We can only lower v8i32 with AVX2!");
if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(DL, MVT::v8i32, V1, V2,
Mask, Subtarget, DAG))
return ZExt;
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8i32, V1, V2, Mask,
Subtarget, DAG))
return Blend;
if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(MVT::v8i32, DL, V1,
Mask, Subtarget, DAG))
return Broadcast;
SmallVector<int, 4> RepeatedMask;
if (is128BitLaneRepeatedShuffleMask(MVT::v8i32, Mask, RepeatedMask)) {
assert(RepeatedMask.size() == 4 && "Unexpected repeated mask size!");
if (isSingleInputShuffleMask(Mask))
return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v8i32, V1,
getV4X86ShuffleImm8ForMask(RepeatedMask, DAG));
if (isShuffleEquivalent(Mask, 0, 8, 1, 9, 4, 12, 5, 13))
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i32, V1, V2);
if (isShuffleEquivalent(Mask, 2, 10, 3, 11, 6, 14, 7, 15))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i32, V1, V2);
}
if (isSingleInputShuffleMask(Mask)) {
SDValue VPermMask[8];
for (int i = 0; i < 8; ++i)
VPermMask[i] = Mask[i] < 0 ? DAG.getUNDEF(MVT::i32)
: DAG.getConstant(Mask[i], MVT::i32);
return DAG.getNode(
X86ISD::VPERMV, DL, MVT::v8i32,
DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i32, VPermMask), V1);
}
if (SDValue Shift = lowerVectorShuffleAsBitShift(
DL, MVT::v8i32, V1, V2, Mask, DAG))
return Shift;
if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
DL, MVT::v8i32, V1, V2, Mask, Subtarget, DAG))
return Result;
return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v8i32, V1, V2,
Mask, DAG);
}
static SDValue lowerV16I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
SDLoc DL(Op);
assert(V1.getSimpleValueType() == MVT::v16i16 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v16i16 && "Bad operand type!");
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
ArrayRef<int> Mask = SVOp->getMask();
assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
assert(Subtarget->hasAVX2() && "We can only lower v16i16 with AVX2!");
if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(DL, MVT::v16i16, V1, V2,
Mask, Subtarget, DAG))
return ZExt;
if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(MVT::v16i16, DL, V1,
Mask, Subtarget, DAG))
return Broadcast;
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v16i16, V1, V2, Mask,
Subtarget, DAG))
return Blend;
if (isShuffleEquivalent(Mask,
0, 16, 1, 17, 2, 18, 3, 19,
8, 24, 9, 25, 10, 26, 11, 27))
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i16, V1, V2);
if (isShuffleEquivalent(Mask,
4, 20, 5, 21, 6, 22, 7, 23,
12, 28, 13, 29, 14, 30, 15, 31))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i16, V1, V2);
if (isSingleInputShuffleMask(Mask)) {
if (is128BitLaneCrossingShuffleMask(MVT::v16i16, Mask))
return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v16i16, V1, V2,
Mask, DAG);
SDValue PSHUFBMask[32];
for (int i = 0; i < 16; ++i) {
if (Mask[i] == -1) {
PSHUFBMask[2 * i] = PSHUFBMask[2 * i + 1] = DAG.getUNDEF(MVT::i8);
continue;
}
int M = i < 8 ? Mask[i] : Mask[i] - 8;
assert(M >= 0 && M < 8 && "Invalid single-input mask!");
PSHUFBMask[2 * i] = DAG.getConstant(2 * M, MVT::i8);
PSHUFBMask[2 * i + 1] = DAG.getConstant(2 * M + 1, MVT::i8);
}
return DAG.getNode(
ISD::BITCAST, DL, MVT::v16i16,
DAG.getNode(
X86ISD::PSHUFB, DL, MVT::v32i8,
DAG.getNode(ISD::BITCAST, DL, MVT::v32i8, V1),
DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v32i8, PSHUFBMask)));
}
if (SDValue Shift = lowerVectorShuffleAsBitShift(
DL, MVT::v16i16, V1, V2, Mask, DAG))
return Shift;
if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
DL, MVT::v16i16, V1, V2, Mask, Subtarget, DAG))
return Result;
return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v16i16, V1, V2, Mask, DAG);
}
static SDValue lowerV32I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
SDLoc DL(Op);
assert(V1.getSimpleValueType() == MVT::v32i8 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v32i8 && "Bad operand type!");
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
ArrayRef<int> Mask = SVOp->getMask();
assert(Mask.size() == 32 && "Unexpected mask size for v32 shuffle!");
assert(Subtarget->hasAVX2() && "We can only lower v32i8 with AVX2!");
if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(DL, MVT::v32i8, V1, V2,
Mask, Subtarget, DAG))
return ZExt;
if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(MVT::v32i8, DL, V1,
Mask, Subtarget, DAG))
return Broadcast;
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v32i8, V1, V2, Mask,
Subtarget, DAG))
return Blend;
if (isShuffleEquivalent(
Mask,
0, 32, 1, 33, 2, 34, 3, 35, 4, 36, 5, 37, 6, 38, 7, 39,
16, 48, 17, 49, 18, 50, 19, 51, 20, 52, 21, 53, 22, 54, 23, 55))
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v32i8, V1, V2);
if (isShuffleEquivalent(
Mask,
8, 40, 9, 41, 10, 42, 11, 43, 12, 44, 13, 45, 14, 46, 15, 47,
24, 56, 25, 57, 26, 58, 27, 59, 28, 60, 29, 61, 30, 62, 31, 63))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v32i8, V1, V2);
if (isSingleInputShuffleMask(Mask)) {
if (is128BitLaneCrossingShuffleMask(MVT::v32i8, Mask))
return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v32i8, V1, V2,
Mask, DAG);
SDValue PSHUFBMask[32];
for (int i = 0; i < 32; ++i)
PSHUFBMask[i] =
Mask[i] < 0
? DAG.getUNDEF(MVT::i8)
: DAG.getConstant(Mask[i] < 16 ? Mask[i] : Mask[i] - 16, MVT::i8);
return DAG.getNode(
X86ISD::PSHUFB, DL, MVT::v32i8, V1,
DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v32i8, PSHUFBMask));
}
if (SDValue Shift = lowerVectorShuffleAsBitShift(
DL, MVT::v32i8, V1, V2, Mask, DAG))
return Shift;
if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
DL, MVT::v32i8, V1, V2, Mask, Subtarget, DAG))
return Result;
return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v32i8, V1, V2, Mask, DAG);
}
static SDValue lower256BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2,
MVT VT, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
SDLoc DL(Op);
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
ArrayRef<int> Mask = SVOp->getMask();
if (VT.isInteger() && !Subtarget->hasAVX2()) {
int ElementBits = VT.getScalarSizeInBits();
if (ElementBits < 32)
return splitAndLowerVectorShuffle(DL, VT, V1, V2, Mask, DAG);
MVT FpVT = MVT::getVectorVT(MVT::getFloatingPointVT(ElementBits),
VT.getVectorNumElements());
V1 = DAG.getNode(ISD::BITCAST, DL, FpVT, V1);
V2 = DAG.getNode(ISD::BITCAST, DL, FpVT, V2);
return DAG.getNode(ISD::BITCAST, DL, VT,
DAG.getVectorShuffle(FpVT, DL, V1, V2, Mask));
}
switch (VT.SimpleTy) {
case MVT::v4f64:
return lowerV4F64VectorShuffle(Op, V1, V2, Subtarget, DAG);
case MVT::v4i64:
return lowerV4I64VectorShuffle(Op, V1, V2, Subtarget, DAG);
case MVT::v8f32:
return lowerV8F32VectorShuffle(Op, V1, V2, Subtarget, DAG);
case MVT::v8i32:
return lowerV8I32VectorShuffle(Op, V1, V2, Subtarget, DAG);
case MVT::v16i16:
return lowerV16I16VectorShuffle(Op, V1, V2, Subtarget, DAG);
case MVT::v32i8:
return lowerV32I8VectorShuffle(Op, V1, V2, Subtarget, DAG);
default:
llvm_unreachable("Not a valid 256-bit x86 vector type!");
}
}
static SDValue lowerV8F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
SDLoc DL(Op);
assert(V1.getSimpleValueType() == MVT::v8f64 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v8f64 && "Bad operand type!");
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
ArrayRef<int> Mask = SVOp->getMask();
assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
if (isShuffleEquivalent(Mask, 0, 8, 2, 10, 4, 12, 6, 14))
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8f64, V1, V2);
if (isShuffleEquivalent(Mask, 1, 9, 3, 11, 5, 13, 7, 15))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8f64, V1, V2);
return splitAndLowerVectorShuffle(DL, MVT::v8f64, V1, V2, Mask, DAG);
}
static SDValue lowerV16F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
SDLoc DL(Op);
assert(V1.getSimpleValueType() == MVT::v16f32 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v16f32 && "Bad operand type!");
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
ArrayRef<int> Mask = SVOp->getMask();
assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
if (isShuffleEquivalent(Mask,
0, 16, 1, 17, 4, 20, 5, 21,
8, 24, 9, 25, 12, 28, 13, 29))
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16f32, V1, V2);
if (isShuffleEquivalent(Mask,
2, 18, 3, 19, 6, 22, 7, 23,
10, 26, 11, 27, 14, 30, 15, 31))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16f32, V1, V2);
return splitAndLowerVectorShuffle(DL, MVT::v16f32, V1, V2, Mask, DAG);
}
static SDValue lowerV8I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
SDLoc DL(Op);
assert(V1.getSimpleValueType() == MVT::v8i64 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v8i64 && "Bad operand type!");
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
ArrayRef<int> Mask = SVOp->getMask();
assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
if (isShuffleEquivalent(Mask, 0, 8, 2, 10, 4, 12, 6, 14))
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i64, V1, V2);
if (isShuffleEquivalent(Mask, 1, 9, 3, 11, 5, 13, 7, 15))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i64, V1, V2);
return splitAndLowerVectorShuffle(DL, MVT::v8i64, V1, V2, Mask, DAG);
}
static SDValue lowerV16I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
SDLoc DL(Op);
assert(V1.getSimpleValueType() == MVT::v16i32 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v16i32 && "Bad operand type!");
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
ArrayRef<int> Mask = SVOp->getMask();
assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
if (isShuffleEquivalent(Mask,
0, 16, 1, 17, 4, 20, 5, 21,
8, 24, 9, 25, 12, 28, 13, 29))
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i32, V1, V2);
if (isShuffleEquivalent(Mask,
2, 18, 3, 19, 6, 22, 7, 23,
10, 26, 11, 27, 14, 30, 15, 31))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i32, V1, V2);
return splitAndLowerVectorShuffle(DL, MVT::v16i32, V1, V2, Mask, DAG);
}
static SDValue lowerV32I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
SDLoc DL(Op);
assert(V1.getSimpleValueType() == MVT::v32i16 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v32i16 && "Bad operand type!");
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
ArrayRef<int> Mask = SVOp->getMask();
assert(Mask.size() == 32 && "Unexpected mask size for v32 shuffle!");
assert(Subtarget->hasBWI() && "We can only lower v32i16 with AVX-512-BWI!");
return splitAndLowerVectorShuffle(DL, MVT::v32i16, V1, V2, Mask, DAG);
}
static SDValue lowerV64I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
SDLoc DL(Op);
assert(V1.getSimpleValueType() == MVT::v64i8 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v64i8 && "Bad operand type!");
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
ArrayRef<int> Mask = SVOp->getMask();
assert(Mask.size() == 64 && "Unexpected mask size for v64 shuffle!");
assert(Subtarget->hasBWI() && "We can only lower v64i8 with AVX-512-BWI!");
return splitAndLowerVectorShuffle(DL, MVT::v64i8, V1, V2, Mask, DAG);
}
static SDValue lower512BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2,
MVT VT, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
SDLoc DL(Op);
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
ArrayRef<int> Mask = SVOp->getMask();
assert(Subtarget->hasAVX512() &&
"Cannot lower 512-bit vectors w/ basic ISA!");
if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(VT.SimpleTy, DL, V1,
Mask, Subtarget, DAG))
return Broadcast;
switch (VT.SimpleTy) {
case MVT::v8f64:
return lowerV8F64VectorShuffle(Op, V1, V2, Subtarget, DAG);
case MVT::v16f32:
return lowerV16F32VectorShuffle(Op, V1, V2, Subtarget, DAG);
case MVT::v8i64:
return lowerV8I64VectorShuffle(Op, V1, V2, Subtarget, DAG);
case MVT::v16i32:
return lowerV16I32VectorShuffle(Op, V1, V2, Subtarget, DAG);
case MVT::v32i16:
if (Subtarget->hasBWI())
return lowerV32I16VectorShuffle(Op, V1, V2, Subtarget, DAG);
break;
case MVT::v64i8:
if (Subtarget->hasBWI())
return lowerV64I8VectorShuffle(Op, V1, V2, Subtarget, DAG);
break;
default:
llvm_unreachable("Not a valid 512-bit x86 vector type!");
}
return splitAndLowerVectorShuffle(DL, VT, V1, V2, Mask, DAG);
}
static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
ArrayRef<int> Mask = SVOp->getMask();
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
MVT VT = Op.getSimpleValueType();
int NumElements = VT.getVectorNumElements();
SDLoc dl(Op);
assert(VT.getSizeInBits() != 64 && "Can't lower MMX shuffles");
bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
if (V1IsUndef && V2IsUndef)
return DAG.getUNDEF(VT);
if (V1IsUndef)
return DAG.getCommutedVectorShuffle(*SVOp);
if (V2IsUndef)
for (int M : Mask)
if (M >= NumElements) {
SmallVector<int, 8> NewMask(Mask.begin(), Mask.end());
for (int &M : NewMask)
if (M >= NumElements)
M = -1;
return DAG.getVectorShuffle(VT, dl, V1, V2, NewMask);
}
SmallVector<int, 16> WidenedMask;
if (VT.getScalarSizeInBits() < 64 &&
canWidenShuffleElements(Mask, WidenedMask)) {
MVT NewEltVT = VT.isFloatingPoint()
? MVT::getFloatingPointVT(VT.getScalarSizeInBits() * 2)
: MVT::getIntegerVT(VT.getScalarSizeInBits() * 2);
MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
if (DAG.getTargetLoweringInfo().isTypeLegal(NewVT)) {
V1 = DAG.getNode(ISD::BITCAST, dl, NewVT, V1);
V2 = DAG.getNode(ISD::BITCAST, dl, NewVT, V2);
return DAG.getNode(ISD::BITCAST, dl, VT,
DAG.getVectorShuffle(NewVT, dl, V1, V2, WidenedMask));
}
}
int NumV1Elements = 0, NumUndefElements = 0, NumV2Elements = 0;
for (int M : SVOp->getMask())
if (M < 0)
++NumUndefElements;
else if (M < NumElements)
++NumV1Elements;
else
++NumV2Elements;
if (NumV2Elements > NumV1Elements)
return DAG.getCommutedVectorShuffle(*SVOp);
if (NumV1Elements == NumV2Elements) {
int LowV1Elements = 0, LowV2Elements = 0;
for (int M : SVOp->getMask().slice(0, NumElements / 2))
if (M >= NumElements)
++LowV2Elements;
else if (M >= 0)
++LowV1Elements;
if (LowV2Elements > LowV1Elements) {
return DAG.getCommutedVectorShuffle(*SVOp);
} else if (LowV2Elements == LowV1Elements) {
int SumV1Indices = 0, SumV2Indices = 0;
for (int i = 0, Size = SVOp->getMask().size(); i < Size; ++i)
if (SVOp->getMask()[i] >= NumElements)
SumV2Indices += i;
else if (SVOp->getMask()[i] >= 0)
SumV1Indices += i;
if (SumV2Indices < SumV1Indices) {
return DAG.getCommutedVectorShuffle(*SVOp);
} else if (SumV2Indices == SumV1Indices) {
int NumV1OddIndices = 0, NumV2OddIndices = 0;
for (int i = 0, Size = SVOp->getMask().size(); i < Size; ++i)
if (SVOp->getMask()[i] >= NumElements)
NumV2OddIndices += i % 2;
else if (SVOp->getMask()[i] >= 0)
NumV1OddIndices += i % 2;
if (NumV2OddIndices < NumV1OddIndices)
return DAG.getCommutedVectorShuffle(*SVOp);
}
}
}
if (VT.getSizeInBits() == 128)
return lower128BitVectorShuffle(Op, V1, V2, VT, Subtarget, DAG);
if (VT.getSizeInBits() == 256)
return lower256BitVectorShuffle(Op, V1, V2, VT, Subtarget, DAG);
if (VT.getSizeInBits() == 512)
return lower512BitVectorShuffle(Op, V1, V2, VT, Subtarget, DAG);
llvm_unreachable("Unimplemented!");
}
static bool isBlendMask(ArrayRef<int> MaskVals, MVT VT, bool hasSSE41,
bool hasInt256, unsigned *MaskOut = nullptr) {
MVT EltVT = VT.getVectorElementType();
if (VT.is512BitVector())
return false;
if (!hasSSE41 || EltVT == MVT::i8)
return false;
if (!hasInt256 && VT == MVT::v16i16)
return false;
unsigned MaskValue = 0;
unsigned NumElems = VT.getVectorNumElements();
unsigned NumLanes = (NumElems - 1) / 8 + 1;
unsigned NumElemsInLane = NumElems / NumLanes;
for (unsigned i = 0; i < NumElemsInLane; ++i) {
int SndLaneEltIdx = (NumLanes == 2) ? MaskVals[i + NumElemsInLane] : -1;
int EltIdx = MaskVals[i];
if ((EltIdx < 0 || EltIdx == (int)i) &&
(SndLaneEltIdx < 0 || SndLaneEltIdx == (int)(i + NumElemsInLane)))
continue;
if (((unsigned)EltIdx == (i + NumElems)) &&
(SndLaneEltIdx < 0 ||
(unsigned)SndLaneEltIdx == i + NumElems + NumElemsInLane))
MaskValue |= (1 << i);
else
return false;
}
if (MaskOut)
*MaskOut = MaskValue;
return true;
}
static SDValue LowerVECTOR_SHUFFLEtoBlend(ShuffleVectorSDNode *SVOp,
unsigned MaskValue,
const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
MVT VT = SVOp->getSimpleValueType(0);
MVT EltVT = VT.getVectorElementType();
assert(isBlendMask(SVOp->getMask(), VT, Subtarget->hasSSE41(),
Subtarget->hasInt256() && "Trying to lower a "
"VECTOR_SHUFFLE to a Blend but "
"with the wrong mask"));
SDValue V1 = SVOp->getOperand(0);
SDValue V2 = SVOp->getOperand(1);
SDLoc dl(SVOp);
unsigned NumElems = VT.getVectorNumElements();
MVT BlendVT = VT;
if (EltVT == MVT::i64 || (EltVT == MVT::i32 && !Subtarget->hasInt256())) {
BlendVT = MVT::getVectorVT(MVT::getFloatingPointVT(EltVT.getSizeInBits()),
NumElems);
V1 = DAG.getNode(ISD::BITCAST, dl, VT, V1);
V2 = DAG.getNode(ISD::BITCAST, dl, VT, V2);
}
SDValue Ret = DAG.getNode(X86ISD::BLENDI, dl, BlendVT, V1, V2,
DAG.getConstant(MaskValue, MVT::i32));
return DAG.getNode(ISD::BITCAST, dl, VT, Ret);
}
static bool ShuffleCrosses128bitLane(MVT VT, unsigned InputIdx,
unsigned OutputIdx) {
unsigned EltSize = VT.getVectorElementType().getSizeInBits();
return InputIdx * EltSize / 128 != OutputIdx * EltSize / 128;
}
static SDValue getPSHUFB(ArrayRef<int> MaskVals, SDValue V1, SDLoc &dl,
SelectionDAG &DAG) {
MVT VT = V1.getSimpleValueType();
assert(VT.is128BitVector() || VT.is256BitVector());
MVT EltVT = VT.getVectorElementType();
unsigned EltSizeInBytes = EltVT.getSizeInBits() / 8;
unsigned NumElts = VT.getVectorNumElements();
SmallVector<SDValue, 32> PshufbMask;
for (unsigned OutputIdx = 0; OutputIdx < NumElts; ++OutputIdx) {
int InputIdx = MaskVals[OutputIdx];
unsigned InputByteIdx;
if (InputIdx < 0 || NumElts <= (unsigned)InputIdx)
InputByteIdx = 0x80;
else {
if (ShuffleCrosses128bitLane(VT, InputIdx, OutputIdx))
return SDValue();
InputByteIdx = InputIdx * EltSizeInBytes;
InputByteIdx &= 0xf;
}
for (unsigned j = 0; j < EltSizeInBytes; ++j) {
PshufbMask.push_back(DAG.getConstant(InputByteIdx, MVT::i8));
if (InputByteIdx != 0x80)
++InputByteIdx;
}
}
MVT ShufVT = MVT::getVectorVT(MVT::i8, PshufbMask.size());
if (ShufVT != VT)
V1 = DAG.getNode(ISD::BITCAST, dl, ShufVT, V1);
return DAG.getNode(X86ISD::PSHUFB, dl, ShufVT, V1,
DAG.getNode(ISD::BUILD_VECTOR, dl, ShufVT, PshufbMask));
}
static SDValue
LowerVECTOR_SHUFFLEv8i16(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
SDValue V1 = SVOp->getOperand(0);
SDValue V2 = SVOp->getOperand(1);
SDLoc dl(SVOp);
SmallVector<int, 8> MaskVals;
unsigned LoQuad[] = { 0, 0, 0, 0 };
unsigned HiQuad[] = { 0, 0, 0, 0 };
std::bitset<4> InputQuads;
for (unsigned i = 0; i < 8; ++i) {
unsigned *Quad = i < 4 ? LoQuad : HiQuad;
int EltIdx = SVOp->getMaskElt(i);
MaskVals.push_back(EltIdx);
if (EltIdx < 0) {
++Quad[0];
++Quad[1];
++Quad[2];
++Quad[3];
continue;
}
++Quad[EltIdx / 4];
InputQuads.set(EltIdx / 4);
}
int BestLoQuad = -1;
unsigned MaxQuad = 1;
for (unsigned i = 0; i < 4; ++i) {
if (LoQuad[i] > MaxQuad) {
BestLoQuad = i;
MaxQuad = LoQuad[i];
}
}
int BestHiQuad = -1;
MaxQuad = 1;
for (unsigned i = 0; i < 4; ++i) {
if (HiQuad[i] > MaxQuad) {
BestHiQuad = i;
MaxQuad = HiQuad[i];
}
}
bool V1Used = InputQuads[0] || InputQuads[1];
bool V2Used = InputQuads[2] || InputQuads[3];
if (Subtarget->hasSSSE3()) {
if (InputQuads.count() == 2 && V1Used && V2Used) {
BestLoQuad = InputQuads[0] ? 0 : 1;
BestHiQuad = InputQuads[2] ? 2 : 3;
}
if (InputQuads.count() > 2) {
BestLoQuad = -1;
BestHiQuad = -1;
}
}
SDValue NewV;
if (BestLoQuad >= 0 || BestHiQuad >= 0) {
int MaskV[] = {
BestLoQuad < 0 ? 0 : BestLoQuad,
BestHiQuad < 0 ? 1 : BestHiQuad
};
NewV = DAG.getVectorShuffle(MVT::v2i64, dl,
DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1),
DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2), &MaskV[0]);
NewV = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, NewV);
bool AllWordsInNewV = true;
bool InOrder[2] = { true, true };
for (unsigned i = 0; i != 8; ++i) {
int idx = MaskVals[i];
if (idx != (int)i)
InOrder[i/4] = false;
if (idx < 0 || (idx/4) == BestLoQuad || (idx/4) == BestHiQuad)
continue;
AllWordsInNewV = false;
break;
}
bool pshuflw = AllWordsInNewV, pshufhw = AllWordsInNewV;
if (AllWordsInNewV) {
for (int i = 0; i != 8; ++i) {
int idx = MaskVals[i];
if (idx < 0)
continue;
idx = MaskVals[i] = (idx / 4) == BestLoQuad ? (idx & 3) : (idx & 3) + 4;
if ((idx != i) && idx < 4)
pshufhw = false;
if ((idx != i) && idx > 3)
pshuflw = false;
}
V1 = NewV;
V2Used = false;
BestLoQuad = 0;
BestHiQuad = 1;
}
if ((pshufhw && InOrder[0]) || (pshuflw && InOrder[1])) {
unsigned Opc = pshufhw ? X86ISD::PSHUFHW : X86ISD::PSHUFLW;
unsigned TargetMask = 0;
NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV,
DAG.getUNDEF(MVT::v8i16), &MaskVals[0]);
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(NewV.getNode());
TargetMask = pshufhw ? getShufflePSHUFHWImmediate(SVOp):
getShufflePSHUFLWImmediate(SVOp);
V1 = NewV.getOperand(0);
return getTargetShuffleNode(Opc, dl, MVT::v8i16, V1, TargetMask, DAG);
}
}
if (SVOp->isSplat())
return PromoteSplat(SVOp, DAG);
if (Subtarget->hasSSSE3()) {
SmallVector<SDValue,16> pshufbMask;
bool TwoInputs = V1Used && V2Used;
V1 = getPSHUFB(MaskVals, V1, dl, DAG);
if (!TwoInputs)
return DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
CommuteVectorShuffleMask(MaskVals, 8);
V2 = getPSHUFB(MaskVals, V2, dl, DAG);
V1 = DAG.getNode(ISD::OR, dl, MVT::v16i8, V1, V2);
return DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
}
std::bitset<8> InOrder;
if (BestLoQuad >= 0) {
int MaskV[] = { -1, -1, -1, -1, 4, 5, 6, 7 };
for (int i = 0; i != 4; ++i) {
int idx = MaskVals[i];
if (idx < 0) {
InOrder.set(i);
} else if ((idx / 4) == BestLoQuad) {
MaskV[i] = idx & 3;
InOrder.set(i);
}
}
NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16),
&MaskV[0]);
if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && Subtarget->hasSSE2()) {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(NewV.getNode());
NewV = getTargetShuffleNode(X86ISD::PSHUFLW, dl, MVT::v8i16,
NewV.getOperand(0),
getShufflePSHUFLWImmediate(SVOp), DAG);
}
}
if (BestHiQuad >= 0) {
int MaskV[] = { 0, 1, 2, 3, -1, -1, -1, -1 };
for (unsigned i = 4; i != 8; ++i) {
int idx = MaskVals[i];
if (idx < 0) {
InOrder.set(i);
} else if ((idx / 4) == BestHiQuad) {
MaskV[i] = (idx & 3) + 4;
InOrder.set(i);
}
}
NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16),
&MaskV[0]);
if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && Subtarget->hasSSE2()) {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(NewV.getNode());
NewV = getTargetShuffleNode(X86ISD::PSHUFHW, dl, MVT::v8i16,
NewV.getOperand(0),
getShufflePSHUFHWImmediate(SVOp), DAG);
}
}
if (BestLoQuad == -1 && BestHiQuad == -1) {
NewV = V1;
for (int i = 0; i != 8; ++i)
if (MaskVals[i] < 0 || MaskVals[i] == i)
InOrder.set(i);
}
for (unsigned i = 0; i != 8; ++i) {
if (InOrder[i])
continue;
int EltIdx = MaskVals[i];
if (EltIdx < 0)
continue;
SDValue ExtOp = (EltIdx < 8) ?
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, V1,
DAG.getIntPtrConstant(EltIdx)) :
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, V2,
DAG.getIntPtrConstant(EltIdx - 8));
NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, NewV, ExtOp,
DAG.getIntPtrConstant(i));
}
return NewV;
}
static SDValue
LowerVECTOR_SHUFFLEv16i16(SDValue Op, SelectionDAG &DAG) {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
SDValue V1 = SVOp->getOperand(0);
SDValue V2 = SVOp->getOperand(1);
SDLoc dl(SVOp);
if (V2.getOpcode() != ISD::UNDEF)
return SDValue();
SmallVector<int, 16> MaskVals(SVOp->getMask().begin(), SVOp->getMask().end());
return getPSHUFB(MaskVals, V1, dl, DAG);
}
static SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
const X86Subtarget* Subtarget,
SelectionDAG &DAG) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue V1 = SVOp->getOperand(0);
SDValue V2 = SVOp->getOperand(1);
SDLoc dl(SVOp);
ArrayRef<int> MaskVals = SVOp->getMask();
if (SVOp->isSplat())
return PromoteSplat(SVOp, DAG);
if (Subtarget->hasSSSE3()) {
SmallVector<SDValue,16> pshufbMask;
for (unsigned i = 0; i != 16; ++i) {
int EltIdx = MaskVals[i];
if (EltIdx < 0 || EltIdx >= 16)
EltIdx = 0x80;
pshufbMask.push_back(DAG.getConstant(EltIdx, MVT::i8));
}
V1 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V1,
DAG.getNode(ISD::BUILD_VECTOR, dl,
MVT::v16i8, pshufbMask));
if (V2.getOpcode() == ISD::UNDEF ||
ISD::isBuildVectorAllZeros(V2.getNode()))
return V1;
pshufbMask.clear();
for (unsigned i = 0; i != 16; ++i) {
int EltIdx = MaskVals[i];
EltIdx = (EltIdx < 16) ? 0x80 : EltIdx - 16;
pshufbMask.push_back(DAG.getConstant(EltIdx, MVT::i8));
}
V2 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V2,
DAG.getNode(ISD::BUILD_VECTOR, dl,
MVT::v16i8, pshufbMask));
return DAG.getNode(ISD::OR, dl, MVT::v16i8, V1, V2);
}
V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
V2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
SDValue NewV = V1;
for (int i = 0; i != 8; ++i) {
int Elt0 = MaskVals[i*2];
int Elt1 = MaskVals[i*2+1];
if (Elt0 < 0 && Elt1 < 0)
continue;
if ((Elt0 == i*2) && (Elt1 == i*2+1))
continue;
SDValue Elt0Src = Elt0 < 16 ? V1 : V2;
SDValue Elt1Src = Elt1 < 16 ? V1 : V2;
SDValue InsElt;
if ((Elt0 >= 0) && ((Elt0 + 1) == Elt1) && ((Elt0 & 1) == 0)) {
InsElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, Elt1Src,
DAG.getIntPtrConstant(Elt1 / 2));
NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, NewV, InsElt,
DAG.getIntPtrConstant(i));
continue;
}
if (Elt1 >= 0) {
InsElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, Elt1Src,
DAG.getIntPtrConstant(Elt1 / 2));
if ((Elt1 & 1) == 0)
InsElt = DAG.getNode(ISD::SHL, dl, MVT::i16, InsElt,
DAG.getConstant(8,
TLI.getShiftAmountTy(InsElt.getValueType())));
else if (Elt0 >= 0)
InsElt = DAG.getNode(ISD::AND, dl, MVT::i16, InsElt,
DAG.getConstant(0xFF00, MVT::i16));
}
if (Elt0 >= 0) {
SDValue InsElt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16,
Elt0Src, DAG.getIntPtrConstant(Elt0 / 2));
if ((Elt0 & 1) != 0)
InsElt0 = DAG.getNode(ISD::SRL, dl, MVT::i16, InsElt0,
DAG.getConstant(8,
TLI.getShiftAmountTy(InsElt0.getValueType())));
else if (Elt1 >= 0)
InsElt0 = DAG.getNode(ISD::AND, dl, MVT::i16, InsElt0,
DAG.getConstant(0x00FF, MVT::i16));
InsElt = Elt1 >= 0 ? DAG.getNode(ISD::OR, dl, MVT::i16, InsElt, InsElt0)
: InsElt0;
}
NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, NewV, InsElt,
DAG.getIntPtrConstant(i));
}
return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, NewV);
}
static
SDValue LowerVECTOR_SHUFFLEv32i8(ShuffleVectorSDNode *SVOp,
const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
MVT VT = SVOp->getSimpleValueType(0);
SDValue V1 = SVOp->getOperand(0);
SDValue V2 = SVOp->getOperand(1);
SDLoc dl(SVOp);
SmallVector<int, 32> MaskVals(SVOp->getMask().begin(), SVOp->getMask().end());
bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
bool V1IsAllZero = ISD::isBuildVectorAllZeros(V1.getNode());
bool V2IsAllZero = ISD::isBuildVectorAllZeros(V2.getNode());
if (VT != MVT::v32i8 || !Subtarget->hasInt256() ||
(!V2IsUndef && !V2IsAllZero && !V1IsAllZero))
return SDValue();
if (V1IsAllZero && !V2IsAllZero) {
CommuteVectorShuffleMask(MaskVals, 32);
V1 = V2;
}
return getPSHUFB(MaskVals, V1, dl, DAG);
}
static
SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp,
SelectionDAG &DAG) {
MVT VT = SVOp->getSimpleValueType(0);
SDLoc dl(SVOp);
unsigned NumElems = VT.getVectorNumElements();
MVT NewVT;
unsigned Scale;
switch (VT.SimpleTy) {
default: llvm_unreachable("Unexpected!");
case MVT::v2i64:
case MVT::v2f64:
return SDValue(SVOp, 0);
case MVT::v4f32: NewVT = MVT::v2f64; Scale = 2; break;
case MVT::v4i32: NewVT = MVT::v2i64; Scale = 2; break;
case MVT::v8i16: NewVT = MVT::v4i32; Scale = 2; break;
case MVT::v16i8: NewVT = MVT::v4i32; Scale = 4; break;
case MVT::v16i16: NewVT = MVT::v8i32; Scale = 2; break;
case MVT::v32i8: NewVT = MVT::v8i32; Scale = 4; break;
}
SmallVector<int, 8> MaskVec;
for (unsigned i = 0; i != NumElems; i += Scale) {
int StartIdx = -1;
for (unsigned j = 0; j != Scale; ++j) {
int EltIdx = SVOp->getMaskElt(i+j);
if (EltIdx < 0)
continue;
if (StartIdx < 0)
StartIdx = (EltIdx / Scale);
if (EltIdx != (int)(StartIdx*Scale + j))
return SDValue();
}
MaskVec.push_back(StartIdx);
}
SDValue V1 = DAG.getNode(ISD::BITCAST, dl, NewVT, SVOp->getOperand(0));
SDValue V2 = DAG.getNode(ISD::BITCAST, dl, NewVT, SVOp->getOperand(1));
return DAG.getVectorShuffle(NewVT, dl, V1, V2, &MaskVec[0]);
}
static SDValue getVZextMovL(MVT VT, MVT OpVT,
SDValue SrcOp, SelectionDAG &DAG,
const X86Subtarget *Subtarget, SDLoc dl) {
if (VT == MVT::v2f64 || VT == MVT::v4f32) {
LoadSDNode *LD = nullptr;
if (!isScalarLoadToVector(SrcOp.getNode(), &LD))
LD = dyn_cast<LoadSDNode>(SrcOp);
if (!LD) {
MVT ExtVT = (OpVT == MVT::v2f64) ? MVT::i64 : MVT::i32;
if ((ExtVT != MVT::i64 || Subtarget->is64Bit()) &&
SrcOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
SrcOp.getOperand(0).getOpcode() == ISD::BITCAST &&
SrcOp.getOperand(0).getOperand(0).getValueType() == ExtVT) {
OpVT = (OpVT == MVT::v2f64) ? MVT::v2i64 : MVT::v4i32;
return DAG.getNode(ISD::BITCAST, dl, VT,
DAG.getNode(X86ISD::VZEXT_MOVL, dl, OpVT,
DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
OpVT,
SrcOp.getOperand(0)
.getOperand(0))));
}
}
}
return DAG.getNode(ISD::BITCAST, dl, VT,
DAG.getNode(X86ISD::VZEXT_MOVL, dl, OpVT,
DAG.getNode(ISD::BITCAST, dl,
OpVT, SrcOp)));
}
static SDValue
LowerVECTOR_SHUFFLE_256(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
SDValue NewOp = Compact8x32ShuffleNode(SVOp, DAG);
if (NewOp.getNode())
return NewOp;
MVT VT = SVOp->getSimpleValueType(0);
unsigned NumElems = VT.getVectorNumElements();
unsigned NumLaneElems = NumElems / 2;
SDLoc dl(SVOp);
MVT EltVT = VT.getVectorElementType();
MVT NVT = MVT::getVectorVT(EltVT, NumLaneElems);
SDValue Output[2];
SmallVector<int, 16> Mask;
for (unsigned l = 0; l < 2; ++l) {
bool UseBuildVector = false;
int InputUsed[2] = { -1, -1 }; unsigned LaneStart = l * NumLaneElems;
for (unsigned i = 0; i != NumLaneElems; ++i) {
int Idx = SVOp->getMaskElt(i+LaneStart);
if (Idx < 0) {
Mask.push_back(-1);
continue;
}
int Input = Idx / NumLaneElems;
Idx -= Input * NumLaneElems;
unsigned OpNo;
for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) {
if (InputUsed[OpNo] == Input)
break;
if (InputUsed[OpNo] < 0) {
InputUsed[OpNo] = Input;
break;
}
}
if (OpNo >= array_lengthof(InputUsed)) {
UseBuildVector = true;
break;
}
Mask.push_back(Idx + OpNo * NumLaneElems);
}
if (UseBuildVector) {
SmallVector<SDValue, 16> SVOps;
for (unsigned i = 0; i != NumLaneElems; ++i) {
int Idx = SVOp->getMaskElt(i+LaneStart);
if (Idx < 0) {
SVOps.push_back(DAG.getUNDEF(EltVT));
continue;
}
int Input = Idx / NumElems;
Idx -= Input * NumElems;
SVOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
SVOp->getOperand(Input),
DAG.getIntPtrConstant(Idx)));
}
Output[l] = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, SVOps);
} else if (InputUsed[0] < 0) {
Output[l] = DAG.getUNDEF(NVT);
} else {
SDValue Op0 = Extract128BitVector(SVOp->getOperand(InputUsed[0] / 2),
(InputUsed[0] % 2) * NumLaneElems,
DAG, dl);
SDValue Op1 = (InputUsed[1] < 0) ? DAG.getUNDEF(NVT) :
Extract128BitVector(SVOp->getOperand(InputUsed[1] / 2),
(InputUsed[1] % 2) * NumLaneElems, DAG, dl);
Output[l] = DAG.getVectorShuffle(NVT, dl, Op0, Op1, &Mask[0]);
}
Mask.clear();
}
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Output[0], Output[1]);
}
static SDValue
LowerVECTOR_SHUFFLE_128v4(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
SDValue V1 = SVOp->getOperand(0);
SDValue V2 = SVOp->getOperand(1);
SDLoc dl(SVOp);
MVT VT = SVOp->getSimpleValueType(0);
assert(VT.is128BitVector() && "Unsupported vector size");
std::pair<int, int> Locs[4];
int Mask1[] = { -1, -1, -1, -1 };
SmallVector<int, 8> PermMask(SVOp->getMask().begin(), SVOp->getMask().end());
unsigned NumHi = 0;
unsigned NumLo = 0;
for (unsigned i = 0; i != 4; ++i) {
int Idx = PermMask[i];
if (Idx < 0) {
Locs[i] = std::make_pair(-1, -1);
} else {
assert(Idx < 8 && "Invalid VECTOR_SHUFFLE index!");
if (Idx < 4) {
Locs[i] = std::make_pair(0, NumLo);
Mask1[NumLo] = Idx;
NumLo++;
} else {
Locs[i] = std::make_pair(1, NumHi);
if (2+NumHi < 4)
Mask1[2+NumHi] = Idx;
NumHi++;
}
}
}
if (NumLo <= 2 && NumHi <= 2) {
V1 = DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]);
int Mask2[] = { -1, -1, -1, -1 };
for (unsigned i = 0; i != 4; ++i)
if (Locs[i].first != -1) {
unsigned Idx = (i < 2) ? 0 : 4;
Idx += Locs[i].first * 2 + Locs[i].second;
Mask2[i] = Idx;
}
return DAG.getVectorShuffle(VT, dl, V1, V1, &Mask2[0]);
}
if (NumLo == 3 || NumHi == 3) {
if (NumHi == 3) {
CommuteVectorShuffleMask(PermMask, 4);
std::swap(V1, V2);
}
unsigned HiIndex;
for (HiIndex = 0; HiIndex < 3; ++HiIndex) {
int Val = PermMask[HiIndex];
if (Val < 0)
continue;
if (Val >= 4)
break;
}
Mask1[0] = PermMask[HiIndex];
Mask1[1] = -1;
Mask1[2] = PermMask[HiIndex^1];
Mask1[3] = -1;
V2 = DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]);
if (HiIndex >= 2) {
Mask1[0] = PermMask[0];
Mask1[1] = PermMask[1];
Mask1[2] = HiIndex & 1 ? 6 : 4;
Mask1[3] = HiIndex & 1 ? 4 : 6;
return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]);
}
Mask1[0] = HiIndex & 1 ? 2 : 0;
Mask1[1] = HiIndex & 1 ? 0 : 2;
Mask1[2] = PermMask[2];
Mask1[3] = PermMask[3];
if (Mask1[2] >= 0)
Mask1[2] += 4;
if (Mask1[3] >= 0)
Mask1[3] += 4;
return DAG.getVectorShuffle(VT, dl, V2, V1, &Mask1[0]);
}
int LoMask[] = { -1, -1, -1, -1 };
int HiMask[] = { -1, -1, -1, -1 };
int *MaskPtr = LoMask;
unsigned MaskIdx = 0;
unsigned LoIdx = 0;
unsigned HiIdx = 2;
for (unsigned i = 0; i != 4; ++i) {
if (i == 2) {
MaskPtr = HiMask;
MaskIdx = 1;
LoIdx = 0;
HiIdx = 2;
}
int Idx = PermMask[i];
if (Idx < 0) {
Locs[i] = std::make_pair(-1, -1);
} else if (Idx < 4) {
Locs[i] = std::make_pair(MaskIdx, LoIdx);
MaskPtr[LoIdx] = Idx;
LoIdx++;
} else {
Locs[i] = std::make_pair(MaskIdx, HiIdx);
MaskPtr[HiIdx] = Idx;
HiIdx++;
}
}
SDValue LoShuffle = DAG.getVectorShuffle(VT, dl, V1, V2, &LoMask[0]);
SDValue HiShuffle = DAG.getVectorShuffle(VT, dl, V1, V2, &HiMask[0]);
int MaskOps[] = { -1, -1, -1, -1 };
for (unsigned i = 0; i != 4; ++i)
if (Locs[i].first != -1)
MaskOps[i] = Locs[i].first * 4 + Locs[i].second;
return DAG.getVectorShuffle(VT, dl, LoShuffle, HiShuffle, &MaskOps[0]);
}
static bool MayFoldVectorLoad(SDValue V) {
while (V.hasOneUse() && V.getOpcode() == ISD::BITCAST)
V = V.getOperand(0);
if (V.hasOneUse() && V.getOpcode() == ISD::SCALAR_TO_VECTOR)
V = V.getOperand(0);
if (V.hasOneUse() && V.getOpcode() == ISD::BUILD_VECTOR &&
V.getNumOperands() == 2 && V.getOperand(1).getOpcode() == ISD::UNDEF)
V = V.getOperand(0);
return MayFoldLoad(V);
}
static
SDValue getMOVDDup(SDValue &Op, SDLoc &dl, SDValue V1, SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
return DAG.getNode(ISD::BITCAST, dl, VT,
getTargetShuffleNode(X86ISD::MOVDDUP, dl, MVT::v2f64,
V1, DAG));
}
static
SDValue getMOVLowToHigh(SDValue &Op, SDLoc &dl, SelectionDAG &DAG,
bool HasSSE2) {
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
MVT VT = Op.getSimpleValueType();
assert(VT != MVT::v2i64 && "unsupported shuffle type");
if (HasSSE2 && VT == MVT::v2f64)
return getTargetShuffleNode(X86ISD::MOVLHPD, dl, VT, V1, V2, DAG);
return DAG.getNode(ISD::BITCAST, dl, VT,
getTargetShuffleNode(X86ISD::MOVLHPS, dl, MVT::v4f32,
DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, V1),
DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, V2), DAG));
}
static
SDValue getMOVHighToLow(SDValue &Op, SDLoc &dl, SelectionDAG &DAG) {
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
MVT VT = Op.getSimpleValueType();
assert((VT == MVT::v4i32 || VT == MVT::v4f32) &&
"unsupported shuffle type");
if (V2.getOpcode() == ISD::UNDEF)
V2 = V1;
return getTargetShuffleNode(X86ISD::MOVHLPS, dl, VT, V1, V2, DAG);
}
static
SDValue getMOVLP(SDValue &Op, SDLoc &dl, SelectionDAG &DAG, bool HasSSE2) {
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
MVT VT = Op.getSimpleValueType();
unsigned NumElems = VT.getVectorNumElements();
bool CanFoldLoad = false;
if (MayFoldVectorLoad(V2))
CanFoldLoad = true;
else if (MayFoldVectorLoad(V1) && MayFoldIntoStore(Op))
CanFoldLoad = true;
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
if (CanFoldLoad) {
if (HasSSE2 && NumElems == 2)
return getTargetShuffleNode(X86ISD::MOVLPD, dl, VT, V1, V2, DAG);
if (NumElems == 4)
if (SVOp->getMaskElt(1) != -1)
return getTargetShuffleNode(X86ISD::MOVLPS, dl, VT, V1, V2, DAG);
}
if (HasSSE2) {
if (NumElems == 2 || !isMOVLMask(SVOp->getMask(), VT))
return getTargetShuffleNode(X86ISD::MOVSD, dl, VT, V1, V2, DAG);
return getTargetShuffleNode(X86ISD::MOVSS, dl, VT, V1, V2, DAG);
}
assert(VT != MVT::v4i32 && "unsupported shuffle type");
return getTargetShuffleNode(X86ISD::SHUFP, dl, VT, V2, V1,
getShuffleSHUFImmediate(SVOp), DAG);
}
static SDValue NarrowVectorLoadToElement(LoadSDNode *Load, unsigned Index,
SelectionDAG &DAG) {
SDLoc dl(Load);
MVT VT = Load->getSimpleValueType(0);
MVT EVT = VT.getVectorElementType();
SDValue Addr = Load->getOperand(1);
SDValue NewAddr = DAG.getNode(
ISD::ADD, dl, Addr.getSimpleValueType(), Addr,
DAG.getConstant(Index * EVT.getStoreSize(), Addr.getSimpleValueType()));
SDValue NewLoad =
DAG.getLoad(EVT, dl, Load->getChain(), NewAddr,
DAG.getMachineFunction().getMachineMemOperand(
Load->getMemOperand(), 0, EVT.getStoreSize()));
return NewLoad;
}
static SDValue getINSERTPS(ShuffleVectorSDNode *SVOp, SDLoc &dl,
SelectionDAG &DAG) {
MVT VT = SVOp->getSimpleValueType(0);
MVT EVT = VT.getVectorElementType();
SDValue V1 = SVOp->getOperand(0);
SDValue V2 = SVOp->getOperand(1);
auto Mask = SVOp->getMask();
assert((VT == MVT::v4f32 || VT == MVT::v4i32) &&
"unsupported vector type for insertps/pinsrd");
auto FromV1Predicate = [](const int &i) { return i < 4 && i > -1; };
auto FromV2Predicate = [](const int &i) { return i >= 4; };
int FromV1 = std::count_if(Mask.begin(), Mask.end(), FromV1Predicate);
SDValue From;
SDValue To;
unsigned DestIndex;
if (FromV1 == 1) {
From = V1;
To = V2;
DestIndex = std::find_if(Mask.begin(), Mask.end(), FromV1Predicate) -
Mask.begin();
int FromV2 = std::count_if(Mask.begin(), Mask.end(), FromV2Predicate);
assert(DestIndex <= INT32_MAX && "truncated destination index");
if (FromV1 == FromV2 &&
static_cast<int>(DestIndex) == Mask[DestIndex] % 4) {
From = V2;
To = V1;
DestIndex =
std::find_if(Mask.begin(), Mask.end(), FromV2Predicate) - Mask.begin();
}
} else {
assert(std::count_if(Mask.begin(), Mask.end(), FromV2Predicate) == 1 &&
"More than one element from V1 and from V2, or no elements from one "
"of the vectors. This case should not have returned true from "
"isINSERTPSMask");
From = V2;
To = V1;
DestIndex =
std::find_if(Mask.begin(), Mask.end(), FromV2Predicate) - Mask.begin();
}
unsigned SrcIndex = Mask[DestIndex] % 4;
if (MayFoldLoad(From)) {
SDValue NewLoad =
NarrowVectorLoadToElement(cast<LoadSDNode>(From), SrcIndex, DAG);
if (!NewLoad.getNode())
return SDValue();
if (EVT == MVT::f32) {
SDValue LoadScalarToVector =
DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, NewLoad);
SDValue InsertpsMask = DAG.getIntPtrConstant(DestIndex << 4);
return DAG.getNode(X86ISD::INSERTPS, dl, VT, To, LoadScalarToVector,
InsertpsMask);
} else { return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, To, NewLoad,
DAG.getConstant(DestIndex, MVT::i32));
}
}
SDValue InsertpsMask = DAG.getIntPtrConstant(DestIndex << 4 | SrcIndex << 6);
return DAG.getNode(X86ISD::INSERTPS, dl, VT, To, From, InsertpsMask);
}
static SDValue LowerVectorIntExtend(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
if (!Subtarget->hasSSE41())
return SDValue();
MVT VT = Op.getSimpleValueType();
if (!Subtarget->hasInt256() && VT.is256BitVector())
return SDValue();
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
SDLoc DL(Op);
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
unsigned NumElems = VT.getVectorNumElements();
if (V2.getOpcode() != ISD::UNDEF || !VT.isInteger() ||
VT.getVectorElementType() == MVT::i64)
return SDValue();
unsigned Shift = 1; while ((1U << Shift) < NumElems) {
if (SVOp->getMaskElt(1U << Shift) == 1)
break;
Shift += 1;
if (Shift > 3)
return SDValue();
}
unsigned Mask = (1U << Shift) - 1;
for (unsigned i = 0; i != NumElems; ++i) {
int EltIdx = SVOp->getMaskElt(i);
if ((i & Mask) != 0 && EltIdx != -1)
return SDValue();
if ((i & Mask) == 0 && (unsigned)EltIdx != (i >> Shift))
return SDValue();
}
unsigned NBits = VT.getVectorElementType().getSizeInBits() << Shift;
MVT NeVT = MVT::getIntegerVT(NBits);
MVT NVT = MVT::getVectorVT(NeVT, NumElems >> Shift);
if (!DAG.getTargetLoweringInfo().isTypeLegal(NVT))
return SDValue();
return DAG.getNode(ISD::BITCAST, DL, VT,
DAG.getNode(X86ISD::VZEXT, DL, NVT, V1));
}
static SDValue NormalizeVectorShuffle(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
if (isZeroShuffle(SVOp))
return getZeroVector(VT, Subtarget, DAG, dl);
if (SVOp->isSplat()) {
SDValue Broadcast = LowerVectorBroadcast(Op, Subtarget, DAG);
if (Broadcast.getNode())
return Broadcast;
}
SDValue NewOp = LowerVectorIntExtend(Op, Subtarget, DAG);
if (NewOp.getNode())
return NewOp;
if (VT == MVT::v8i16 || VT == MVT::v16i8 || VT == MVT::v16i16 ||
VT == MVT::v32i8) {
SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG);
if (NewOp.getNode())
return DAG.getNode(ISD::BITCAST, dl, VT, NewOp);
} else if (VT.is128BitVector() && Subtarget->hasSSE2()) {
if (ISD::isBuildVectorAllZeros(V2.getNode())) {
SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG);
if (NewOp.getNode()) {
MVT NewVT = NewOp.getSimpleValueType();
if (isCommutedMOVLMask(cast<ShuffleVectorSDNode>(NewOp)->getMask(),
NewVT, true, false))
return getVZextMovL(VT, NewVT, NewOp.getOperand(0), DAG, Subtarget,
dl);
}
} else if (ISD::isBuildVectorAllZeros(V1.getNode())) {
SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG);
if (NewOp.getNode()) {
MVT NewVT = NewOp.getSimpleValueType();
if (isMOVLMask(cast<ShuffleVectorSDNode>(NewOp)->getMask(), NewVT))
return getVZextMovL(VT, NewVT, NewOp.getOperand(1), DAG, Subtarget,
dl);
}
}
}
return SDValue();
}
SDValue
X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
unsigned NumElems = VT.getVectorNumElements();
bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
bool V1IsSplat = false;
bool V2IsSplat = false;
bool HasSSE2 = Subtarget->hasSSE2();
bool HasFp256 = Subtarget->hasFp256();
bool HasInt256 = Subtarget->hasInt256();
MachineFunction &MF = DAG.getMachineFunction();
bool OptForSize =
MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
if (ExperimentalVectorShuffleLowering)
return lowerVectorShuffle(Op, Subtarget, DAG);
assert(VT.getSizeInBits() != 64 && "Can't lower MMX shuffles");
if (V1IsUndef && V2IsUndef)
return DAG.getUNDEF(VT);
if (V1IsUndef)
return DAG.getCommutedVectorShuffle(*SVOp);
SDValue NewOp = NormalizeVectorShuffle(Op, Subtarget, DAG);
if (NewOp.getNode())
return NewOp;
SmallVector<int, 8> M(SVOp->getMask().begin(), SVOp->getMask().end());
if (OptForSize && isUNPCKL_v_undef_Mask(M, VT, HasInt256))
return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG);
if (OptForSize && isUNPCKH_v_undef_Mask(M, VT, HasInt256))
return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V1, DAG);
if (isMOVDDUPMask(M, VT) && Subtarget->hasSSE3() &&
V2IsUndef && MayFoldVectorLoad(V1))
return getMOVDDup(Op, dl, V1, DAG);
if (isMOVHLPS_v_undef_Mask(M, VT))
return getMOVHighToLow(Op, dl, DAG);
if (HasSSE2 && isUNPCKHMask(M, VT, HasInt256) && V2IsUndef &&
(VT == MVT::v2f64 || VT == MVT::v2i64))
return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V1, DAG);
if (isPSHUFDMask(M, VT)) {
if (isMOVDDUPMask(M, VT) && ((VT == MVT::v4f32 || VT == MVT::v2i64)))
return getTargetShuffleNode(X86ISD::MOVLHPS, dl, VT, V1, V1, DAG);
unsigned TargetMask = getShuffleSHUFImmediate(SVOp);
if (HasSSE2 && (VT == MVT::v4f32 || VT == MVT::v4i32))
return getTargetShuffleNode(X86ISD::PSHUFD, dl, VT, V1, TargetMask, DAG);
if (HasFp256 && (VT == MVT::v4f32 || VT == MVT::v2f64))
return getTargetShuffleNode(X86ISD::VPERMILPI, dl, VT, V1, TargetMask,
DAG);
return getTargetShuffleNode(X86ISD::SHUFP, dl, VT, V1, V1,
TargetMask, DAG);
}
if (isPALIGNRMask(M, VT, Subtarget))
return getTargetShuffleNode(X86ISD::PALIGNR, dl, VT, V1, V2,
getShufflePALIGNRImmediate(SVOp),
DAG);
if (isVALIGNMask(M, VT, Subtarget))
return getTargetShuffleNode(X86ISD::VALIGN, dl, VT, V1, V2,
getShuffleVALIGNImmediate(SVOp),
DAG);
bool isLeft = false;
unsigned ShAmt = 0;
SDValue ShVal;
bool isShift = HasSSE2 && isVectorShift(SVOp, DAG, isLeft, ShVal, ShAmt);
if (isShift && ShVal.hasOneUse()) {
MVT EltVT = VT.getVectorElementType();
ShAmt *= EltVT.getSizeInBits();
return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl);
}
if (isMOVLMask(M, VT)) {
if (ISD::isBuildVectorAllZeros(V1.getNode()))
return getVZextMovL(VT, VT, V2, DAG, Subtarget, dl);
if (!isMOVLPMask(M, VT)) {
if (HasSSE2 && (VT == MVT::v2i64 || VT == MVT::v2f64))
return getTargetShuffleNode(X86ISD::MOVSD, dl, VT, V1, V2, DAG);
if (VT == MVT::v4i32 || VT == MVT::v4f32)
return getTargetShuffleNode(X86ISD::MOVSS, dl, VT, V1, V2, DAG);
}
}
if (isMOVLHPSMask(M, VT) && !isUNPCKLMask(M, VT, HasInt256))
return getMOVLowToHigh(Op, dl, DAG, HasSSE2);
if (isMOVHLPSMask(M, VT))
return getMOVHighToLow(Op, dl, DAG);
if (V2IsUndef && isMOVSHDUPMask(M, VT, Subtarget))
return getTargetShuffleNode(X86ISD::MOVSHDUP, dl, VT, V1, DAG);
if (V2IsUndef && isMOVSLDUPMask(M, VT, Subtarget))
return getTargetShuffleNode(X86ISD::MOVSLDUP, dl, VT, V1, DAG);
if (isMOVLPMask(M, VT))
return getMOVLP(Op, dl, DAG, HasSSE2);
if (ShouldXformToMOVHLPS(M, VT) ||
ShouldXformToMOVLP(V1.getNode(), V2.getNode(), M, VT))
return DAG.getCommutedVectorShuffle(*SVOp);
if (isShift) {
MVT EltVT = VT.getVectorElementType();
ShAmt *= EltVT.getSizeInBits();
return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl);
}
bool Commuted = false;
BitVector UndefElements;
if (auto *BVOp = dyn_cast<BuildVectorSDNode>(V1.getNode()))
if (BVOp->getConstantSplatNode(&UndefElements) && UndefElements.none())
V1IsSplat = true;
if (auto *BVOp = dyn_cast<BuildVectorSDNode>(V2.getNode()))
if (BVOp->getConstantSplatNode(&UndefElements) && UndefElements.none())
V2IsSplat = true;
if (!V2IsUndef && V1IsSplat && !V2IsSplat) {
CommuteVectorShuffleMask(M, NumElems);
std::swap(V1, V2);
std::swap(V1IsSplat, V2IsSplat);
Commuted = true;
}
if (isCommutedMOVLMask(M, VT, V2IsSplat, V2IsUndef)) {
if (V2IsUndef)
return V1;
return getMOVL(DAG, dl, VT, V2, V1);
}
if (isUNPCKLMask(M, VT, HasInt256))
return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V2, DAG);
if (isUNPCKHMask(M, VT, HasInt256))
return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V2, DAG);
if (V2IsSplat) {
SmallVector<int, 8> NewMask(M.begin(), M.end());
NormalizeMask(NewMask, NumElems);
if (isUNPCKLMask(NewMask, VT, HasInt256, true))
return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V2, DAG);
if (isUNPCKHMask(NewMask, VT, HasInt256, true))
return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V2, DAG);
}
if (Commuted) {
CommuteVectorShuffleMask(M, NumElems);
std::swap(V1, V2);
std::swap(V1IsSplat, V2IsSplat);
if (isUNPCKLMask(M, VT, HasInt256))
return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V2, DAG);
if (isUNPCKHMask(M, VT, HasInt256))
return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V2, DAG);
}
if (!V2IsUndef && (isSHUFPMask(M, VT, true)))
return DAG.getCommutedVectorShuffle(*SVOp);
if (ShuffleVectorSDNode::isSplatMask(&M[0], VT) &&
SVOp->getSplatIndex() == 0 && V2IsUndef) {
if (VT == MVT::v2f64 || VT == MVT::v2i64)
return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG);
}
if (isPSHUFHWMask(M, VT, HasInt256))
return getTargetShuffleNode(X86ISD::PSHUFHW, dl, VT, V1,
getShufflePSHUFHWImmediate(SVOp),
DAG);
if (isPSHUFLWMask(M, VT, HasInt256))
return getTargetShuffleNode(X86ISD::PSHUFLW, dl, VT, V1,
getShufflePSHUFLWImmediate(SVOp),
DAG);
unsigned MaskValue;
if (isBlendMask(M, VT, Subtarget->hasSSE41(), Subtarget->hasInt256(),
&MaskValue))
return LowerVECTOR_SHUFFLEtoBlend(SVOp, MaskValue, Subtarget, DAG);
if (isSHUFPMask(M, VT))
return getTargetShuffleNode(X86ISD::SHUFP, dl, VT, V1, V2,
getShuffleSHUFImmediate(SVOp), DAG);
if (isUNPCKL_v_undef_Mask(M, VT, HasInt256))
return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG);
if (isUNPCKH_v_undef_Mask(M, VT, HasInt256))
return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V1, DAG);
if (V2IsUndef && isMOVDDUPYMask(M, VT, HasFp256))
return getTargetShuffleNode(X86ISD::MOVDDUP, dl, VT, V1, DAG);
if (isVPERMILPMask(M, VT)) {
if ((HasInt256 && VT == MVT::v8i32) || VT == MVT::v16i32)
return getTargetShuffleNode(X86ISD::PSHUFD, dl, VT, V1,
getShuffleSHUFImmediate(SVOp), DAG);
return getTargetShuffleNode(X86ISD::VPERMILPI, dl, VT, V1,
getShuffleSHUFImmediate(SVOp), DAG);
}
unsigned Idx;
if (VT.is512BitVector() && isINSERT64x4Mask(M, VT, &Idx))
return Insert256BitVector(V1, Extract256BitVector(V2, 0, DAG, dl),
Idx*(NumElems/2), DAG, dl);
if (isVPERM2X128Mask(M, VT, HasFp256))
return getTargetShuffleNode(X86ISD::VPERM2X128, dl, VT, V1,
V2, getShuffleVPERM2X128Immediate(SVOp), DAG);
if (Subtarget->hasSSE41() && isINSERTPSMask(M, VT))
return getINSERTPS(SVOp, dl, DAG);
unsigned Imm8;
if (V2IsUndef && HasInt256 && isPermImmMask(M, VT, Imm8))
return getTargetShuffleNode(X86ISD::VPERMI, dl, VT, V1, Imm8, DAG);
if ((V2IsUndef && HasInt256 && VT.is256BitVector() && NumElems == 8) ||
VT.is512BitVector()) {
MVT MaskEltVT = MVT::getIntegerVT(VT.getVectorElementType().getSizeInBits());
MVT MaskVectorVT = MVT::getVectorVT(MaskEltVT, NumElems);
SmallVector<SDValue, 16> permclMask;
for (unsigned i = 0; i != NumElems; ++i) {
permclMask.push_back(DAG.getConstant((M[i]>=0) ? M[i] : 0, MaskEltVT));
}
SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVectorVT, permclMask);
if (V2IsUndef)
return DAG.getNode(X86ISD::VPERMV, dl, VT,
DAG.getNode(ISD::BITCAST, dl, VT, Mask), V1);
return DAG.getNode(X86ISD::VPERMV3, dl, VT, V1,
DAG.getNode(ISD::BITCAST, dl, VT, Mask), V2);
}
if (VT == MVT::v8i16) {
SDValue NewOp = LowerVECTOR_SHUFFLEv8i16(Op, Subtarget, DAG);
if (NewOp.getNode())
return NewOp;
}
if (VT == MVT::v16i16 && Subtarget->hasInt256()) {
SDValue NewOp = LowerVECTOR_SHUFFLEv16i16(Op, DAG);
if (NewOp.getNode())
return NewOp;
}
if (VT == MVT::v16i8) {
SDValue NewOp = LowerVECTOR_SHUFFLEv16i8(SVOp, Subtarget, DAG);
if (NewOp.getNode())
return NewOp;
}
if (VT == MVT::v32i8) {
SDValue NewOp = LowerVECTOR_SHUFFLEv32i8(SVOp, Subtarget, DAG);
if (NewOp.getNode())
return NewOp;
}
if (NumElems == 4 && VT.is128BitVector())
return LowerVECTOR_SHUFFLE_128v4(SVOp, DAG);
if (VT.is256BitVector())
return LowerVECTOR_SHUFFLE_256(SVOp, DAG);
return SDValue();
}
static bool BUILD_VECTORtoBlendMask(BuildVectorSDNode *BuildVector,
unsigned &MaskValue) {
MaskValue = 0;
unsigned NumElems = BuildVector->getNumOperands();
unsigned NumLanes = (NumElems - 1) / 8 + 1;
unsigned NumElemsInLane = NumElems / NumLanes;
for (unsigned i = 0; i < NumElemsInLane; ++i) {
SDValue EltCond = BuildVector->getOperand(i);
SDValue SndLaneEltCond =
(NumLanes == 2) ? BuildVector->getOperand(i + NumElemsInLane) : EltCond;
int Lane1Cond = -1, Lane2Cond = -1;
if (isa<ConstantSDNode>(EltCond))
Lane1Cond = !isZero(EltCond);
if (isa<ConstantSDNode>(SndLaneEltCond))
Lane2Cond = !isZero(SndLaneEltCond);
if (Lane1Cond == Lane2Cond || Lane2Cond < 0)
MaskValue |= !Lane1Cond << i;
else if (Lane1Cond < 0)
MaskValue |= !Lane2Cond << i;
else
return false;
}
return true;
}
static SDValue lowerVSELECTtoBLENDI(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
SDValue Cond = Op.getOperand(0);
SDValue LHS = Op.getOperand(1);
SDValue RHS = Op.getOperand(2);
SDLoc dl(Op);
MVT VT = Op.getSimpleValueType();
MVT EltVT = VT.getVectorElementType();
unsigned NumElems = VT.getVectorNumElements();
if (VT.is512BitVector())
return SDValue();
if (!Subtarget->hasSSE41() || EltVT == MVT::i8)
return SDValue();
if (!Subtarget->hasInt256() && VT == MVT::v16i16)
return SDValue();
if (!ISD::isBuildVectorOfConstantSDNodes(Cond.getNode()))
return SDValue();
unsigned MaskValue = 0;
if (!BUILD_VECTORtoBlendMask(cast<BuildVectorSDNode>(Cond), MaskValue))
return SDValue();
MVT BlendVT = VT;
if (EltVT == MVT::i64 || (EltVT == MVT::i32 && !Subtarget->hasInt256())) {
BlendVT = MVT::getVectorVT(MVT::getFloatingPointVT(EltVT.getSizeInBits()),
NumElems);
LHS = DAG.getNode(ISD::BITCAST, dl, VT, LHS);
RHS = DAG.getNode(ISD::BITCAST, dl, VT, RHS);
}
SDValue Ret = DAG.getNode(X86ISD::BLENDI, dl, BlendVT, LHS, RHS,
DAG.getConstant(MaskValue, MVT::i32));
return DAG.getNode(ISD::BITCAST, dl, VT, Ret);
}
SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
if (ISD::isBuildVectorOfConstantSDNodes(Op.getOperand(0).getNode()) &&
ISD::isBuildVectorOfConstantSDNodes(Op.getOperand(1).getNode()) &&
ISD::isBuildVectorOfConstantSDNodes(Op.getOperand(2).getNode()))
return SDValue();
SDValue BlendOp = lowerVSELECTtoBLENDI(Op, Subtarget, DAG);
if (BlendOp.getNode())
return BlendOp;
MVT VT = Op.getSimpleValueType();
switch (VT.SimpleTy) {
default:
break;
case MVT::v8i16:
case MVT::v16i16:
if (Subtarget->hasBWI() && Subtarget->hasVLX())
break;
return SDValue();
}
return Op;
}
static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
if (!Op.getOperand(0).getSimpleValueType().is128BitVector())
return SDValue();
if (VT.getSizeInBits() == 8) {
SDValue Extract = DAG.getNode(X86ISD::PEXTRB, dl, MVT::i32,
Op.getOperand(0), Op.getOperand(1));
SDValue Assert = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Extract,
DAG.getValueType(VT));
return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert);
}
if (VT.getSizeInBits() == 16) {
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
if (Idx == 0)
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16,
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
DAG.getNode(ISD::BITCAST, dl,
MVT::v4i32,
Op.getOperand(0)),
Op.getOperand(1)));
SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, MVT::i32,
Op.getOperand(0), Op.getOperand(1));
SDValue Assert = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Extract,
DAG.getValueType(VT));
return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert);
}
if (VT == MVT::f32) {
if (!Op.hasOneUse())
return SDValue();
SDNode *User = *Op.getNode()->use_begin();
if ((User->getOpcode() != ISD::STORE ||
(isa<ConstantSDNode>(Op.getOperand(1)) &&
cast<ConstantSDNode>(Op.getOperand(1))->isNullValue())) &&
(User->getOpcode() != ISD::BITCAST ||
User->getValueType(0) != MVT::i32))
return SDValue();
SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
DAG.getNode(ISD::BITCAST, dl, MVT::v4i32,
Op.getOperand(0)),
Op.getOperand(1));
return DAG.getNode(ISD::BITCAST, dl, MVT::f32, Extract);
}
if (VT == MVT::i32 || VT == MVT::i64) {
if (isa<ConstantSDNode>(Op.getOperand(1)))
return Op;
}
return SDValue();
}
SDValue
X86TargetLowering::ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG) const {
SDValue Vec = Op.getOperand(0);
SDLoc dl(Vec);
MVT VecVT = Vec.getSimpleValueType();
SDValue Idx = Op.getOperand(1);
MVT EltVT = Op.getSimpleValueType();
assert((EltVT == MVT::i1) && "Unexpected operands in ExtractBitFromMaskVector");
assert((VecVT.getVectorNumElements() <= 16 || Subtarget->hasBWI()) &&
"Unexpected vector type in ExtractBitFromMaskVector");
if (!isa<ConstantSDNode>(Idx)) {
MVT ExtVT = (VecVT == MVT::v8i1 ? MVT::v8i64 : MVT::v16i32);
SDValue Ext = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtVT, Vec);
SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
ExtVT.getVectorElementType(), Ext, Idx);
return DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt);
}
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
const TargetRegisterClass* rc = getRegClassFor(VecVT);
if (!Subtarget->hasDQI() && (VecVT.getVectorNumElements() <= 8))
rc = getRegClassFor(MVT::v16i1);
unsigned MaxSift = rc->getSize()*8 - 1;
Vec = DAG.getNode(X86ISD::VSHLI, dl, VecVT, Vec,
DAG.getConstant(MaxSift - IdxVal, MVT::i8));
Vec = DAG.getNode(X86ISD::VSRLI, dl, VecVT, Vec,
DAG.getConstant(MaxSift, MVT::i8));
return DAG.getNode(X86ISD::VEXTRACT, dl, MVT::i1, Vec,
DAG.getIntPtrConstant(0));
}
SDValue
X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
SDValue Vec = Op.getOperand(0);
MVT VecVT = Vec.getSimpleValueType();
SDValue Idx = Op.getOperand(1);
if (Op.getSimpleValueType() == MVT::i1)
return ExtractBitFromMaskVector(Op, DAG);
if (!isa<ConstantSDNode>(Idx)) {
if (VecVT.is512BitVector() ||
(VecVT.is256BitVector() && Subtarget->hasInt256() &&
VecVT.getVectorElementType().getSizeInBits() == 32)) {
MVT MaskEltVT =
MVT::getIntegerVT(VecVT.getVectorElementType().getSizeInBits());
MVT MaskVT = MVT::getVectorVT(MaskEltVT, VecVT.getSizeInBits() /
MaskEltVT.getSizeInBits());
Idx = DAG.getZExtOrTrunc(Idx, dl, MaskEltVT);
SDValue Mask = DAG.getNode(X86ISD::VINSERT, dl, MaskVT,
getZeroVector(MaskVT, Subtarget, DAG, dl),
Idx, DAG.getConstant(0, getPointerTy()));
SDValue Perm = DAG.getNode(X86ISD::VPERMV, dl, VecVT, Mask, Vec);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(),
Perm, DAG.getConstant(0, getPointerTy()));
}
return SDValue();
}
if (VecVT.is256BitVector() || VecVT.is512BitVector()) {
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
Vec = Extract128BitVector(Vec, IdxVal, DAG, dl);
MVT EltVT = VecVT.getVectorElementType();
unsigned ElemsPerChunk = 128 / EltVT.getSizeInBits();
IdxVal -= (IdxVal/ElemsPerChunk)*ElemsPerChunk;
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec,
DAG.getConstant(IdxVal, MVT::i32));
}
assert(VecVT.is128BitVector() && "Unexpected vector length");
if (Subtarget->hasSSE41()) {
SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG);
if (Res.getNode())
return Res;
}
MVT VT = Op.getSimpleValueType();
if (VT.getSizeInBits() == 16) {
SDValue Vec = Op.getOperand(0);
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
if (Idx == 0)
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16,
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
DAG.getNode(ISD::BITCAST, dl,
MVT::v4i32, Vec),
Op.getOperand(1)));
MVT EltVT = MVT::i32;
SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, EltVT,
Op.getOperand(0), Op.getOperand(1));
SDValue Assert = DAG.getNode(ISD::AssertZext, dl, EltVT, Extract,
DAG.getValueType(VT));
return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert);
}
if (VT.getSizeInBits() == 32) {
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
if (Idx == 0)
return Op;
int Mask[4] = { static_cast<int>(Idx), -1, -1, -1 };
MVT VVT = Op.getOperand(0).getSimpleValueType();
SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0),
DAG.getUNDEF(VVT), Mask);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec,
DAG.getIntPtrConstant(0));
}
if (VT.getSizeInBits() == 64) {
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
if (Idx == 0)
return Op;
int Mask[2] = { 1, -1 };
MVT VVT = Op.getOperand(0).getSimpleValueType();
SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0),
DAG.getUNDEF(VVT), Mask);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec,
DAG.getIntPtrConstant(0));
}
return SDValue();
}
SDValue
X86TargetLowering::InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
SDValue Vec = Op.getOperand(0);
SDValue Elt = Op.getOperand(1);
SDValue Idx = Op.getOperand(2);
MVT VecVT = Vec.getSimpleValueType();
if (!isa<ConstantSDNode>(Idx)) {
MVT ExtVecVT = (VecVT == MVT::v8i1 ? MVT::v8i64 : MVT::v16i32);
MVT ExtEltVT = (VecVT == MVT::v8i1 ? MVT::i64 : MVT::i32);
SDValue ExtOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ExtVecVT,
DAG.getNode(ISD::ZERO_EXTEND, dl, ExtVecVT, Vec),
DAG.getNode(ISD::ZERO_EXTEND, dl, ExtEltVT, Elt), Idx);
return DAG.getNode(ISD::TRUNCATE, dl, VecVT, ExtOp);
}
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
SDValue EltInVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Elt);
if (Vec.getOpcode() == ISD::UNDEF)
return DAG.getNode(X86ISD::VSHLI, dl, VecVT, EltInVec,
DAG.getConstant(IdxVal, MVT::i8));
const TargetRegisterClass* rc = getRegClassFor(VecVT);
unsigned MaxSift = rc->getSize()*8 - 1;
EltInVec = DAG.getNode(X86ISD::VSHLI, dl, VecVT, EltInVec,
DAG.getConstant(MaxSift, MVT::i8));
EltInVec = DAG.getNode(X86ISD::VSRLI, dl, VecVT, EltInVec,
DAG.getConstant(MaxSift - IdxVal, MVT::i8));
return DAG.getNode(ISD::OR, dl, VecVT, Vec, EltInVec);
}
SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
SelectionDAG &DAG) const {
MVT VT = Op.getSimpleValueType();
MVT EltVT = VT.getVectorElementType();
if (EltVT == MVT::i1)
return InsertBitToMaskVector(Op, DAG);
SDLoc dl(Op);
SDValue N0 = Op.getOperand(0);
SDValue N1 = Op.getOperand(1);
SDValue N2 = Op.getOperand(2);
if (!isa<ConstantSDNode>(N2))
return SDValue();
auto *N2C = cast<ConstantSDNode>(N2);
unsigned IdxVal = N2C->getZExtValue();
if (VT.is256BitVector() || VT.is512BitVector()) {
SDValue V = Extract128BitVector(N0, IdxVal, DAG, dl);
unsigned NumEltsIn128 = 128 / EltVT.getSizeInBits();
unsigned IdxIn128 = IdxVal - (IdxVal / NumEltsIn128) * NumEltsIn128;
V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, V.getValueType(), V, N1,
DAG.getConstant(IdxIn128, MVT::i32));
return Insert128BitVector(N0, V, IdxVal, DAG, dl);
}
assert(VT.is128BitVector() && "Only 128-bit vector types should be left!");
if (Subtarget->hasSSE41()) {
if (EltVT.getSizeInBits() == 8 || EltVT.getSizeInBits() == 16) {
unsigned Opc;
if (VT == MVT::v8i16) {
Opc = X86ISD::PINSRW;
} else {
assert(VT == MVT::v16i8);
Opc = X86ISD::PINSRB;
}
if (N1.getValueType() != MVT::i32)
N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1);
if (N2.getValueType() != MVT::i32)
N2 = DAG.getIntPtrConstant(IdxVal);
return DAG.getNode(Opc, dl, VT, N0, N1, N2);
}
if (EltVT == MVT::f32) {
N2 = DAG.getIntPtrConstant(IdxVal << 4);
N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1);
return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1, N2);
}
if (EltVT == MVT::i32 || EltVT == MVT::i64) {
return Op;
}
}
if (EltVT == MVT::i8)
return SDValue();
if (EltVT.getSizeInBits() == 16) {
if (N1.getValueType() != MVT::i32)
N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1);
if (N2.getValueType() != MVT::i32)
N2 = DAG.getIntPtrConstant(IdxVal);
return DAG.getNode(X86ISD::PINSRW, dl, VT, N0, N1, N2);
}
return SDValue();
}
static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
SDLoc dl(Op);
MVT OpVT = Op.getSimpleValueType();
if (!OpVT.is128BitVector()) {
unsigned SizeFactor = OpVT.getSizeInBits()/128;
MVT VT128 = MVT::getVectorVT(OpVT.getVectorElementType(),
OpVT.getVectorNumElements() / SizeFactor);
Op = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT128, Op.getOperand(0));
return Insert128BitVector(DAG.getUNDEF(OpVT), Op, 0, DAG, dl);
}
if (OpVT == MVT::v1i64 &&
Op.getOperand(0).getValueType() == MVT::i64)
return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i64, Op.getOperand(0));
SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op.getOperand(0));
assert(OpVT.is128BitVector() && "Expected an SSE type!");
return DAG.getNode(ISD::BITCAST, dl, OpVT,
DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,AnyExt));
}
static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
SDLoc dl(Op);
SDValue In = Op.getOperand(0);
SDValue Idx = Op.getOperand(1);
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
MVT ResVT = Op.getSimpleValueType();
MVT InVT = In.getSimpleValueType();
if (Subtarget->hasFp256()) {
if (ResVT.is128BitVector() &&
(InVT.is256BitVector() || InVT.is512BitVector()) &&
isa<ConstantSDNode>(Idx)) {
return Extract128BitVector(In, IdxVal, DAG, dl);
}
if (ResVT.is256BitVector() && InVT.is512BitVector() &&
isa<ConstantSDNode>(Idx)) {
return Extract256BitVector(In, IdxVal, DAG, dl);
}
}
return SDValue();
}
static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
if (!Subtarget->hasAVX())
return SDValue();
SDLoc dl(Op);
SDValue Vec = Op.getOperand(0);
SDValue SubVec = Op.getOperand(1);
SDValue Idx = Op.getOperand(2);
if (!isa<ConstantSDNode>(Idx))
return SDValue();
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
MVT OpVT = Op.getSimpleValueType();
MVT SubVecVT = SubVec.getSimpleValueType();
if ((IdxVal == OpVT.getVectorNumElements() / 2) &&
Vec.getOpcode() == ISD::INSERT_SUBVECTOR &&
OpVT.is256BitVector() && SubVecVT.is128BitVector() &&
!Subtarget->isUnalignedMem32Slow()) {
SDValue SubVec2 = Vec.getOperand(1);
if (auto *Idx2 = dyn_cast<ConstantSDNode>(Vec.getOperand(2))) {
if (Idx2->getZExtValue() == 0) {
SDValue Ops[] = { SubVec2, SubVec };
SDValue LD = EltsFromConsecutiveLoads(OpVT, Ops, dl, DAG, false);
if (LD.getNode())
return LD;
}
}
}
if ((OpVT.is256BitVector() || OpVT.is512BitVector()) &&
SubVecVT.is128BitVector())
return Insert128BitVector(Vec, SubVec, IdxVal, DAG, dl);
if (OpVT.is512BitVector() && SubVecVT.is256BitVector())
return Insert256BitVector(Vec, SubVec, IdxVal, DAG, dl);
return SDValue();
}
SDValue
X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
unsigned char OpFlag = 0;
unsigned WrapperKind = X86ISD::Wrapper;
CodeModel::Model M = DAG.getTarget().getCodeModel();
if (Subtarget->isPICStyleRIPRel() &&
(M == CodeModel::Small || M == CodeModel::Kernel))
WrapperKind = X86ISD::WrapperRIP;
else if (Subtarget->isPICStyleGOT())
OpFlag = X86II::MO_GOTOFF;
else if (Subtarget->isPICStyleStubPIC())
OpFlag = X86II::MO_PIC_BASE_OFFSET;
SDValue Result = DAG.getTargetConstantPool(CP->getConstVal(), getPointerTy(),
CP->getAlignment(),
CP->getOffset(), OpFlag);
SDLoc DL(CP);
Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
if (OpFlag) {
Result = DAG.getNode(ISD::ADD, DL, getPointerTy(),
DAG.getNode(X86ISD::GlobalBaseReg,
SDLoc(), getPointerTy()),
Result);
}
return Result;
}
SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
unsigned char OpFlag = 0;
unsigned WrapperKind = X86ISD::Wrapper;
CodeModel::Model M = DAG.getTarget().getCodeModel();
if (Subtarget->isPICStyleRIPRel() &&
(M == CodeModel::Small || M == CodeModel::Kernel))
WrapperKind = X86ISD::WrapperRIP;
else if (Subtarget->isPICStyleGOT())
OpFlag = X86II::MO_GOTOFF;
else if (Subtarget->isPICStyleStubPIC())
OpFlag = X86II::MO_PIC_BASE_OFFSET;
SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy(),
OpFlag);
SDLoc DL(JT);
Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
if (OpFlag)
Result = DAG.getNode(ISD::ADD, DL, getPointerTy(),
DAG.getNode(X86ISD::GlobalBaseReg,
SDLoc(), getPointerTy()),
Result);
return Result;
}
SDValue
X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const {
const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
unsigned char OpFlag = 0;
unsigned WrapperKind = X86ISD::Wrapper;
CodeModel::Model M = DAG.getTarget().getCodeModel();
if (Subtarget->isPICStyleRIPRel() &&
(M == CodeModel::Small || M == CodeModel::Kernel)) {
if (Subtarget->isTargetDarwin() || Subtarget->isTargetELF())
OpFlag = X86II::MO_GOTPCREL;
WrapperKind = X86ISD::WrapperRIP;
} else if (Subtarget->isPICStyleGOT()) {
OpFlag = X86II::MO_GOT;
} else if (Subtarget->isPICStyleStubPIC()) {
OpFlag = X86II::MO_DARWIN_NONLAZY_PIC_BASE;
} else if (Subtarget->isPICStyleStubNoDynamic()) {
OpFlag = X86II::MO_DARWIN_NONLAZY;
}
SDValue Result = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlag);
SDLoc DL(Op);
Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
if (DAG.getTarget().getRelocationModel() == Reloc::PIC_ &&
!Subtarget->is64Bit()) {
Result = DAG.getNode(ISD::ADD, DL, getPointerTy(),
DAG.getNode(X86ISD::GlobalBaseReg,
SDLoc(), getPointerTy()),
Result);
}
if (isGlobalStubReference(OpFlag))
Result = DAG.getLoad(getPointerTy(), DL, DAG.getEntryNode(), Result,
MachinePointerInfo::getGOT(), false, false, false, 0);
return Result;
}
SDValue
X86TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
unsigned char OpFlags =
Subtarget->ClassifyBlockAddressReference();
CodeModel::Model M = DAG.getTarget().getCodeModel();
const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
int64_t Offset = cast<BlockAddressSDNode>(Op)->getOffset();
SDLoc dl(Op);
SDValue Result = DAG.getTargetBlockAddress(BA, getPointerTy(), Offset,
OpFlags);
if (Subtarget->isPICStyleRIPRel() &&
(M == CodeModel::Small || M == CodeModel::Kernel))
Result = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Result);
else
Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result);
if (isGlobalRelativeToPICBase(OpFlags)) {
Result = DAG.getNode(ISD::ADD, dl, getPointerTy(),
DAG.getNode(X86ISD::GlobalBaseReg, dl, getPointerTy()),
Result);
}
return Result;
}
SDValue
X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, SDLoc dl,
int64_t Offset, SelectionDAG &DAG) const {
unsigned char OpFlags =
Subtarget->ClassifyGlobalReference(GV, DAG.getTarget());
CodeModel::Model M = DAG.getTarget().getCodeModel();
SDValue Result;
if (OpFlags == X86II::MO_NO_FLAG &&
X86::isOffsetSuitableForCodeModel(Offset, M)) {
Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), Offset);
Offset = 0;
} else {
Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags);
}
if (Subtarget->isPICStyleRIPRel() &&
(M == CodeModel::Small || M == CodeModel::Kernel))
Result = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Result);
else
Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result);
if (isGlobalRelativeToPICBase(OpFlags)) {
Result = DAG.getNode(ISD::ADD, dl, getPointerTy(),
DAG.getNode(X86ISD::GlobalBaseReg, dl, getPointerTy()),
Result);
}
if (isGlobalStubReference(OpFlags))
Result = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Result,
MachinePointerInfo::getGOT(), false, false, false, 0);
if (Offset != 0)
Result = DAG.getNode(ISD::ADD, dl, getPointerTy(), Result,
DAG.getConstant(Offset, getPointerTy()));
return Result;
}
SDValue
X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset();
return LowerGlobalAddress(GV, SDLoc(Op), Offset, DAG);
}
static SDValue
GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
SDValue *InFlag, const EVT PtrVT, unsigned ReturnReg,
unsigned char OperandFlags, bool LocalDynamic = false) {
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
SDLoc dl(GA);
SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
GA->getValueType(0),
GA->getOffset(),
OperandFlags);
X86ISD::NodeType CallType = LocalDynamic ? X86ISD::TLSBASEADDR
: X86ISD::TLSADDR;
if (InFlag) {
SDValue Ops[] = { Chain, TGA, *InFlag };
Chain = DAG.getNode(CallType, dl, NodeTys, Ops);
} else {
SDValue Ops[] = { Chain, TGA };
Chain = DAG.getNode(CallType, dl, NodeTys, Ops);
}
MFI->setAdjustsStack(true);
MFI->setHasCalls(true);
SDValue Flag = Chain.getValue(1);
return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Flag);
}
static SDValue
LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG,
const EVT PtrVT) {
SDValue InFlag;
SDLoc dl(GA); SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX,
DAG.getNode(X86ISD::GlobalBaseReg,
SDLoc(), PtrVT), InFlag);
InFlag = Chain.getValue(1);
return GetTLSADDR(DAG, Chain, GA, &InFlag, PtrVT, X86::EAX, X86II::MO_TLSGD);
}
static SDValue
LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG,
const EVT PtrVT) {
return GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT,
X86::RAX, X86II::MO_TLSGD);
}
static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA,
SelectionDAG &DAG,
const EVT PtrVT,
bool is64Bit) {
SDLoc dl(GA);
X86MachineFunctionInfo* MFI = DAG.getMachineFunction()
.getInfo<X86MachineFunctionInfo>();
MFI->incNumLocalDynamicTLSAccesses();
SDValue Base;
if (is64Bit) {
Base = GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, X86::RAX,
X86II::MO_TLSLD, true);
} else {
SDValue InFlag;
SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX,
DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), InFlag);
InFlag = Chain.getValue(1);
Base = GetTLSADDR(DAG, Chain, GA, &InFlag, PtrVT, X86::EAX,
X86II::MO_TLSLDM, true);
}
unsigned char OperandFlags = X86II::MO_DTPOFF;
unsigned WrapperKind = X86ISD::Wrapper;
SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
GA->getValueType(0),
GA->getOffset(), OperandFlags);
SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA);
return DAG.getNode(ISD::ADD, dl, PtrVT, Offset, Base);
}
static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
const EVT PtrVT, TLSModel::Model model,
bool is64Bit, bool isPIC) {
SDLoc dl(GA);
Value *Ptr = Constant::getNullValue(Type::getInt8PtrTy(*DAG.getContext(),
is64Bit ? 257 : 256));
SDValue ThreadPointer =
DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), DAG.getIntPtrConstant(0),
MachinePointerInfo(Ptr), false, false, false, 0);
unsigned char OperandFlags = 0;
unsigned WrapperKind = X86ISD::Wrapper;
if (model == TLSModel::LocalExec) {
OperandFlags = is64Bit ? X86II::MO_TPOFF : X86II::MO_NTPOFF;
} else if (model == TLSModel::InitialExec) {
if (is64Bit) {
OperandFlags = X86II::MO_GOTTPOFF;
WrapperKind = X86ISD::WrapperRIP;
} else {
OperandFlags = isPIC ? X86II::MO_GOTNTPOFF : X86II::MO_INDNTPOFF;
}
} else {
llvm_unreachable("Unexpected model");
}
SDValue TGA =
DAG.getTargetGlobalAddress(GA->getGlobal(), dl, GA->getValueType(0),
GA->getOffset(), OperandFlags);
SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA);
if (model == TLSModel::InitialExec) {
if (isPIC && !is64Bit) {
Offset = DAG.getNode(ISD::ADD, dl, PtrVT,
DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT),
Offset);
}
Offset = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Offset,
MachinePointerInfo::getGOT(), false, false, false, 0);
}
return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
}
SDValue
X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
const GlobalValue *GV = GA->getGlobal();
if (Subtarget->isTargetELF()) {
TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
switch (model) {
case TLSModel::GeneralDynamic:
if (Subtarget->is64Bit())
return LowerToTLSGeneralDynamicModel64(GA, DAG, getPointerTy());
return LowerToTLSGeneralDynamicModel32(GA, DAG, getPointerTy());
case TLSModel::LocalDynamic:
return LowerToTLSLocalDynamicModel(GA, DAG, getPointerTy(),
Subtarget->is64Bit());
case TLSModel::InitialExec:
case TLSModel::LocalExec:
return LowerToTLSExecModel(
GA, DAG, getPointerTy(), model, Subtarget->is64Bit(),
DAG.getTarget().getRelocationModel() == Reloc::PIC_);
}
llvm_unreachable("Unknown TLS model.");
}
if (Subtarget->isTargetDarwin()) {
unsigned char OpFlag = 0;
unsigned WrapperKind = Subtarget->isPICStyleRIPRel() ?
X86ISD::WrapperRIP : X86ISD::Wrapper;
bool PIC32 = (DAG.getTarget().getRelocationModel() == Reloc::PIC_) &&
!Subtarget->is64Bit();
if (PIC32)
OpFlag = X86II::MO_TLVP_PIC_BASE;
else
OpFlag = X86II::MO_TLVP;
SDLoc DL(Op);
SDValue Result = DAG.getTargetGlobalAddress(GA->getGlobal(), DL,
GA->getValueType(0),
GA->getOffset(), OpFlag);
SDValue Offset = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
if (PIC32)
Offset = DAG.getNode(ISD::ADD, DL, getPointerTy(),
DAG.getNode(X86ISD::GlobalBaseReg,
SDLoc(), getPointerTy()),
Offset);
SDValue Chain = DAG.getEntryNode();
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue Args[] = { Chain, Offset };
Chain = DAG.getNode(X86ISD::TLSCALL, DL, NodeTys, Args);
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
MFI->setAdjustsStack(true);
unsigned Reg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
return DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy(),
Chain.getValue(1));
}
if (Subtarget->isTargetKnownWindowsMSVC() ||
Subtarget->isTargetWindowsGNU()) {
SDLoc dl(GA);
SDValue Chain = DAG.getEntryNode();
Value *Ptr = Constant::getNullValue(Subtarget->is64Bit()
? Type::getInt8PtrTy(*DAG.getContext(),
256)
: Type::getInt32PtrTy(*DAG.getContext(),
257));
SDValue TlsArray =
Subtarget->is64Bit()
? DAG.getIntPtrConstant(0x58)
: (Subtarget->isTargetWindowsGNU()
? DAG.getIntPtrConstant(0x2C)
: DAG.getExternalSymbol("_tls_array", getPointerTy()));
SDValue ThreadPointer =
DAG.getLoad(getPointerTy(), dl, Chain, TlsArray,
MachinePointerInfo(Ptr), false, false, false, 0);
SDValue IDX = DAG.getExternalSymbol("_tls_index", getPointerTy());
if (Subtarget->is64Bit())
IDX = DAG.getExtLoad(ISD::ZEXTLOAD, dl, getPointerTy(), Chain,
IDX, MachinePointerInfo(), MVT::i32,
false, false, false, 0);
else
IDX = DAG.getLoad(getPointerTy(), dl, Chain, IDX, MachinePointerInfo(),
false, false, false, 0);
SDValue Scale = DAG.getConstant(Log2_64_Ceil(TD->getPointerSize()),
getPointerTy());
IDX = DAG.getNode(ISD::SHL, dl, getPointerTy(), IDX, Scale);
SDValue res = DAG.getNode(ISD::ADD, dl, getPointerTy(), ThreadPointer, IDX);
res = DAG.getLoad(getPointerTy(), dl, Chain, res, MachinePointerInfo(),
false, false, false, 0);
SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
GA->getValueType(0),
GA->getOffset(), X86II::MO_SECREL);
SDValue Offset = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), TGA);
return DAG.getNode(ISD::ADD, dl, getPointerTy(), res, Offset);
}
llvm_unreachable("TLS not implemented for this target.");
}
static SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) {
assert(Op.getNumOperands() == 3 && "Not a double-shift!");
MVT VT = Op.getSimpleValueType();
unsigned VTBits = VT.getSizeInBits();
SDLoc dl(Op);
bool isSRA = Op.getOpcode() == ISD::SRA_PARTS;
SDValue ShOpLo = Op.getOperand(0);
SDValue ShOpHi = Op.getOperand(1);
SDValue ShAmt = Op.getOperand(2);
SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, MVT::i8, ShAmt,
DAG.getConstant(VTBits - 1, MVT::i8));
SDValue Tmp1 = isSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
DAG.getConstant(VTBits - 1, MVT::i8))
: DAG.getConstant(0, VT);
SDValue Tmp2, Tmp3;
if (Op.getOpcode() == ISD::SHL_PARTS) {
Tmp2 = DAG.getNode(X86ISD::SHLD, dl, VT, ShOpHi, ShOpLo, ShAmt);
Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt);
} else {
Tmp2 = DAG.getNode(X86ISD::SHRD, dl, VT, ShOpLo, ShOpHi, ShAmt);
Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt);
}
SDValue AndNode = DAG.getNode(ISD::AND, dl, MVT::i8, ShAmt,
DAG.getConstant(VTBits, MVT::i8));
SDValue Cond = DAG.getNode(X86ISD::CMP, dl, MVT::i32,
AndNode, DAG.getConstant(0, MVT::i8));
SDValue Hi, Lo;
SDValue CC = DAG.getConstant(X86::COND_NE, MVT::i8);
SDValue Ops0[4] = { Tmp2, Tmp3, CC, Cond };
SDValue Ops1[4] = { Tmp3, Tmp1, CC, Cond };
if (Op.getOpcode() == ISD::SHL_PARTS) {
Hi = DAG.getNode(X86ISD::CMOV, dl, VT, Ops0);
Lo = DAG.getNode(X86ISD::CMOV, dl, VT, Ops1);
} else {
Lo = DAG.getNode(X86ISD::CMOV, dl, VT, Ops0);
Hi = DAG.getNode(X86ISD::CMOV, dl, VT, Ops1);
}
SDValue Ops[2] = { Lo, Hi };
return DAG.getMergeValues(Ops, dl);
}
SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
MVT SrcVT = Op.getOperand(0).getSimpleValueType();
SDLoc dl(Op);
if (SrcVT.isVector()) {
if (SrcVT.getVectorElementType() == MVT::i1) {
MVT IntegerVT = MVT::getVectorVT(MVT::i32, SrcVT.getVectorNumElements());
return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(),
DAG.getNode(ISD::SIGN_EXTEND, dl, IntegerVT,
Op.getOperand(0)));
}
return SDValue();
}
assert(SrcVT <= MVT::i64 && SrcVT >= MVT::i16 &&
"Unknown SINT_TO_FP to lower!");
if (SrcVT == MVT::i32 && isScalarFPTypeInSSEReg(Op.getValueType()))
return Op;
if (SrcVT == MVT::i64 && isScalarFPTypeInSSEReg(Op.getValueType()) &&
Subtarget->is64Bit()) {
return Op;
}
unsigned Size = SrcVT.getSizeInBits()/8;
MachineFunction &MF = DAG.getMachineFunction();
int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
StackSlot,
MachinePointerInfo::getFixedStack(SSFI),
false, false, 0);
return BuildFILD(Op, SrcVT, Chain, StackSlot, DAG);
}
SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
SDValue StackSlot,
SelectionDAG &DAG) const {
SDLoc DL(Op);
SDVTList Tys;
bool useSSE = isScalarFPTypeInSSEReg(Op.getValueType());
if (useSSE)
Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Glue);
else
Tys = DAG.getVTList(Op.getValueType(), MVT::Other);
unsigned ByteSize = SrcVT.getSizeInBits()/8;
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(StackSlot);
MachineMemOperand *MMO;
if (FI) {
int SSFI = FI->getIndex();
MMO =
DAG.getMachineFunction()
.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
MachineMemOperand::MOLoad, ByteSize, ByteSize);
} else {
MMO = cast<LoadSDNode>(StackSlot)->getMemOperand();
StackSlot = StackSlot.getOperand(1);
}
SDValue Ops[] = { Chain, StackSlot, DAG.getValueType(SrcVT) };
SDValue Result = DAG.getMemIntrinsicNode(useSSE ? X86ISD::FILD_FLAG :
X86ISD::FILD, DL,
Tys, Ops, SrcVT, MMO);
if (useSSE) {
Chain = Result.getValue(1);
SDValue InFlag = Result.getValue(2);
MachineFunction &MF = DAG.getMachineFunction();
unsigned SSFISize = Op.getValueType().getSizeInBits()/8;
int SSFI = MF.getFrameInfo()->CreateStackObject(SSFISize, SSFISize, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
Tys = DAG.getVTList(MVT::Other);
SDValue Ops[] = {
Chain, Result, StackSlot, DAG.getValueType(Op.getValueType()), InFlag
};
MachineMemOperand *MMO =
DAG.getMachineFunction()
.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
MachineMemOperand::MOStore, SSFISize, SSFISize);
Chain = DAG.getMemIntrinsicNode(X86ISD::FST, DL, Tys,
Ops, Op.getValueType(), MMO);
Result = DAG.getLoad(Op.getValueType(), DL, Chain, StackSlot,
MachinePointerInfo::getFixedStack(SSFI),
false, false, false, 0);
}
return Result;
}
SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
LLVMContext *Context = DAG.getContext();
static const uint32_t CV0[] = { 0x43300000, 0x45300000, 0, 0 };
Constant *C0 = ConstantDataVector::get(*Context, CV0);
SDValue CPIdx0 = DAG.getConstantPool(C0, getPointerTy(), 16);
SmallVector<Constant*,2> CV1;
CV1.push_back(
ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble,
APInt(64, 0x4330000000000000ULL))));
CV1.push_back(
ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble,
APInt(64, 0x4530000000000000ULL))));
Constant *C1 = ConstantVector::get(CV1);
SDValue CPIdx1 = DAG.getConstantPool(C1, getPointerTy(), 16);
SDValue XR1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
Op.getOperand(0));
SDValue CLod0 = DAG.getLoad(MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0,
MachinePointerInfo::getConstantPool(),
false, false, false, 16);
SDValue Unpck1 = getUnpackl(DAG, dl, MVT::v4i32,
DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, XR1),
CLod0);
SDValue CLod1 = DAG.getLoad(MVT::v2f64, dl, CLod0.getValue(1), CPIdx1,
MachinePointerInfo::getConstantPool(),
false, false, false, 16);
SDValue XR2F = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Unpck1);
SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1);
SDValue Result;
if (Subtarget->hasSSE3()) {
Result = DAG.getNode(X86ISD::FHADD, dl, MVT::v2f64, Sub, Sub);
} else {
SDValue S2F = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Sub);
SDValue Shuffle = getTargetShuffleNode(X86ISD::PSHUFD, dl, MVT::v4i32,
S2F, 0x4E, DAG);
Result = DAG.getNode(ISD::FADD, dl, MVT::v2f64,
DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Shuffle),
Sub);
}
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Result,
DAG.getIntPtrConstant(0));
}
SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
SDValue Bias = DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL),
MVT::f64);
SDValue Load = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,
Op.getOperand(0));
Load = getShuffleVectorZeroOrUndef(Load, 0, true, Subtarget, DAG);
Load = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Load),
DAG.getIntPtrConstant(0));
SDValue Or = DAG.getNode(ISD::OR, dl, MVT::v2i64,
DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
MVT::v2f64, Load)),
DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
MVT::v2f64, Bias)));
Or = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Or),
DAG.getIntPtrConstant(0));
SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Or, Bias);
EVT DestVT = Op.getValueType();
if (DestVT.bitsLT(MVT::f64))
return DAG.getNode(ISD::FP_ROUND, dl, DestVT, Sub,
DAG.getIntPtrConstant(0));
if (DestVT.bitsGT(MVT::f64))
return DAG.getNode(ISD::FP_EXTEND, dl, DestVT, Sub);
return Sub;
}
static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
SDLoc DL(Op);
SDValue V = Op->getOperand(0);
EVT VecIntVT = V.getValueType();
bool Is128 = VecIntVT == MVT::v4i32;
EVT VecFloatVT = Is128 ? MVT::v4f32 : MVT::v8f32;
if (VecFloatVT != Op->getValueType(0))
return SDValue();
unsigned NumElts = VecIntVT.getVectorNumElements();
assert((VecIntVT == MVT::v4i32 || VecIntVT == MVT::v8i32) &&
"Unsupported custom type");
assert(NumElts <= 8 && "The size of the constant array must be fixed");
SDValue CstLow = DAG.getConstant(0x4b000000, MVT::i32);
SDValue CstLowArray[] = {CstLow, CstLow, CstLow, CstLow,
CstLow, CstLow, CstLow, CstLow};
SDValue VecCstLow = DAG.getNode(ISD::BUILD_VECTOR, DL, VecIntVT,
makeArrayRef(&CstLowArray[0], NumElts));
SDValue CstHigh = DAG.getConstant(0x53000000, MVT::i32);
SDValue CstHighArray[] = {CstHigh, CstHigh, CstHigh, CstHigh,
CstHigh, CstHigh, CstHigh, CstHigh};
SDValue VecCstHigh = DAG.getNode(ISD::BUILD_VECTOR, DL, VecIntVT,
makeArrayRef(&CstHighArray[0], NumElts));
SDValue CstShift = DAG.getConstant(16, MVT::i32);
SDValue CstShiftArray[] = {CstShift, CstShift, CstShift, CstShift,
CstShift, CstShift, CstShift, CstShift};
SDValue VecCstShift = DAG.getNode(ISD::BUILD_VECTOR, DL, VecIntVT,
makeArrayRef(&CstShiftArray[0], NumElts));
SDValue HighShift = DAG.getNode(ISD::SRL, DL, VecIntVT, V, VecCstShift);
SDValue Low, High;
if (Subtarget.hasSSE41()) {
EVT VecI16VT = Is128 ? MVT::v8i16 : MVT::v16i16;
SDValue VecCstLowBitcast =
DAG.getNode(ISD::BITCAST, DL, VecI16VT, VecCstLow);
SDValue VecBitcast = DAG.getNode(ISD::BITCAST, DL, VecI16VT, V);
Low = DAG.getNode(X86ISD::BLENDI, DL, VecI16VT, VecBitcast,
VecCstLowBitcast, DAG.getConstant(0xaa, MVT::i32));
SDValue VecCstHighBitcast =
DAG.getNode(ISD::BITCAST, DL, VecI16VT, VecCstHigh);
SDValue VecShiftBitcast =
DAG.getNode(ISD::BITCAST, DL, VecI16VT, HighShift);
High = DAG.getNode(X86ISD::BLENDI, DL, VecI16VT, VecShiftBitcast,
VecCstHighBitcast, DAG.getConstant(0xaa, MVT::i32));
} else {
SDValue CstMask = DAG.getConstant(0xffff, MVT::i32);
SDValue VecCstMask = DAG.getNode(ISD::BUILD_VECTOR, DL, VecIntVT, CstMask,
CstMask, CstMask, CstMask);
SDValue LowAnd = DAG.getNode(ISD::AND, DL, VecIntVT, V, VecCstMask);
Low = DAG.getNode(ISD::OR, DL, VecIntVT, LowAnd, VecCstLow);
High = DAG.getNode(ISD::OR, DL, VecIntVT, HighShift, VecCstHigh);
}
SDValue CstFAdd = DAG.getConstantFP(
APFloat(APFloat::IEEEsingle, APInt(32, 0xD3000080)), MVT::f32);
SDValue CstFAddArray[] = {CstFAdd, CstFAdd, CstFAdd, CstFAdd,
CstFAdd, CstFAdd, CstFAdd, CstFAdd};
SDValue VecCstFAdd = DAG.getNode(ISD::BUILD_VECTOR, DL, VecFloatVT,
makeArrayRef(&CstFAddArray[0], NumElts));
SDValue HighBitcast = DAG.getNode(ISD::BITCAST, DL, VecFloatVT, High);
SDValue FHigh =
DAG.getNode(ISD::FADD, DL, VecFloatVT, HighBitcast, VecCstFAdd);
SDValue LowBitcast = DAG.getNode(ISD::BITCAST, DL, VecFloatVT, Low);
return DAG.getNode(ISD::FADD, DL, VecFloatVT, LowBitcast, FHigh);
}
SDValue X86TargetLowering::lowerUINT_TO_FP_vec(SDValue Op,
SelectionDAG &DAG) const {
SDValue N0 = Op.getOperand(0);
MVT SVT = N0.getSimpleValueType();
SDLoc dl(Op);
switch (SVT.SimpleTy) {
default:
llvm_unreachable("Custom UINT_TO_FP is not supported!");
case MVT::v4i8:
case MVT::v4i16:
case MVT::v8i8:
case MVT::v8i16: {
MVT NVT = MVT::getVectorVT(MVT::i32, SVT.getVectorNumElements());
return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(),
DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, N0));
}
case MVT::v4i32:
case MVT::v8i32:
return lowerUINT_TO_FP_vXi32(Op, DAG, *Subtarget);
}
llvm_unreachable(nullptr);
}
SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
SDValue N0 = Op.getOperand(0);
SDLoc dl(Op);
if (Op.getValueType().isVector())
return lowerUINT_TO_FP_vec(Op, DAG);
if (DAG.SignBitIsZero(N0))
return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(), N0);
MVT SrcVT = N0.getSimpleValueType();
MVT DstVT = Op.getSimpleValueType();
if (SrcVT == MVT::i64 && DstVT == MVT::f64 && X86ScalarSSEf64)
return LowerUINT_TO_FP_i64(Op, DAG);
if (SrcVT == MVT::i32 && X86ScalarSSEf64)
return LowerUINT_TO_FP_i32(Op, DAG);
if (Subtarget->is64Bit() && SrcVT == MVT::i64 && DstVT == MVT::f32)
return SDValue();
SDValue StackSlot = DAG.CreateStackTemporary(MVT::i64);
if (SrcVT == MVT::i32) {
SDValue WordOff = DAG.getConstant(4, getPointerTy());
SDValue OffsetSlot = DAG.getNode(ISD::ADD, dl,
getPointerTy(), StackSlot, WordOff);
SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
StackSlot, MachinePointerInfo(),
false, false, 0);
SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, MVT::i32),
OffsetSlot, MachinePointerInfo(),
false, false, 0);
SDValue Fild = BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG);
return Fild;
}
assert(SrcVT == MVT::i64 && "Unexpected type in UINT_TO_FP");
SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
StackSlot, MachinePointerInfo(),
false, false, 0);
int SSFI = cast<FrameIndexSDNode>(StackSlot)->getIndex();
MachineMemOperand *MMO =
DAG.getMachineFunction()
.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
MachineMemOperand::MOLoad, 8, 8);
SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other);
SDValue Ops[] = { Store, StackSlot, DAG.getValueType(MVT::i64) };
SDValue Fild = DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, Ops,
MVT::i64, MMO);
APInt FF(32, 0x5F800000ULL);
SDValue SignSet = DAG.getSetCC(dl,
getSetCCResultType(*DAG.getContext(), MVT::i64),
Op.getOperand(0), DAG.getConstant(0, MVT::i64),
ISD::SETLT);
SDValue FudgePtr = DAG.getConstantPool(
ConstantInt::get(*DAG.getContext(), FF.zext(64)),
getPointerTy());
SDValue Zero = DAG.getIntPtrConstant(0);
SDValue Four = DAG.getIntPtrConstant(4);
SDValue Offset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(), SignSet,
Zero, Four);
FudgePtr = DAG.getNode(ISD::ADD, dl, getPointerTy(), FudgePtr, Offset);
SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, MVT::f80, DAG.getEntryNode(),
FudgePtr, MachinePointerInfo::getConstantPool(),
MVT::f32, false, false, false, 4);
SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::f80, Fild, Fudge);
return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add, DAG.getIntPtrConstant(0));
}
std::pair<SDValue,SDValue>
X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
bool IsSigned, bool IsReplace) const {
SDLoc DL(Op);
EVT DstTy = Op.getValueType();
if (!IsSigned && !isIntegerTypeFTOL(DstTy)) {
assert(DstTy == MVT::i32 && "Unexpected FP_TO_UINT");
DstTy = MVT::i64;
}
assert(DstTy.getSimpleVT() <= MVT::i64 &&
DstTy.getSimpleVT() >= MVT::i16 &&
"Unknown FP_TO_INT to lower!");
if (DstTy == MVT::i32 &&
isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType()))
return std::make_pair(SDValue(), SDValue());
if (Subtarget->is64Bit() &&
DstTy == MVT::i64 &&
isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType()))
return std::make_pair(SDValue(), SDValue());
MachineFunction &MF = DAG.getMachineFunction();
unsigned MemSize = DstTy.getSizeInBits()/8;
int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
unsigned Opc;
if (!IsSigned && isIntegerTypeFTOL(DstTy))
Opc = X86ISD::WIN_FTOL;
else
switch (DstTy.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Invalid FP_TO_SINT to lower!");
case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
}
SDValue Chain = DAG.getEntryNode();
SDValue Value = Op.getOperand(0);
EVT TheVT = Op.getOperand(0).getValueType();
if (isScalarFPTypeInSSEReg(TheVT)) {
assert(DstTy == MVT::i64 && "Invalid FP_TO_SINT to lower!");
Chain = DAG.getStore(Chain, DL, Value, StackSlot,
MachinePointerInfo::getFixedStack(SSFI),
false, false, 0);
SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other);
SDValue Ops[] = {
Chain, StackSlot, DAG.getValueType(TheVT)
};
MachineMemOperand *MMO =
MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
MachineMemOperand::MOLoad, MemSize, MemSize);
Value = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops, DstTy, MMO);
Chain = Value.getValue(1);
SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false);
StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
}
MachineMemOperand *MMO =
MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
MachineMemOperand::MOStore, MemSize, MemSize);
if (Opc != X86ISD::WIN_FTOL) {
SDValue Ops[] = { Chain, Value, StackSlot };
SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other),
Ops, DstTy, MMO);
return std::make_pair(FIST, StackSlot);
} else {
SDValue ftol = DAG.getNode(X86ISD::WIN_FTOL, DL,
DAG.getVTList(MVT::Other, MVT::Glue),
Chain, Value);
SDValue eax = DAG.getCopyFromReg(ftol, DL, X86::EAX,
MVT::i32, ftol.getValue(1));
SDValue edx = DAG.getCopyFromReg(eax.getValue(1), DL, X86::EDX,
MVT::i32, eax.getValue(2));
SDValue Ops[] = { eax, edx };
SDValue pair = IsReplace
? DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops)
: DAG.getMergeValues(Ops, DL);
return std::make_pair(pair, SDValue());
}
}
static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
MVT VT = Op->getSimpleValueType(0);
SDValue In = Op->getOperand(0);
MVT InVT = In.getSimpleValueType();
SDLoc dl(Op);
if (((VT != MVT::v16i16) || (InVT != MVT::v16i8)) &&
((VT != MVT::v8i32) || (InVT != MVT::v8i16)) &&
((VT != MVT::v4i64) || (InVT != MVT::v4i32)))
return SDValue();
if (Subtarget->hasInt256())
return DAG.getNode(X86ISD::VZEXT, dl, VT, In);
SDValue ZeroVec = getZeroVector(InVT, Subtarget, DAG, dl);
SDValue Undef = DAG.getUNDEF(InVT);
bool NeedZero = Op.getOpcode() == ISD::ZERO_EXTEND;
SDValue OpLo = getUnpackl(DAG, dl, InVT, In, NeedZero ? ZeroVec : Undef);
SDValue OpHi = getUnpackh(DAG, dl, InVT, In, NeedZero ? ZeroVec : Undef);
MVT HVT = MVT::getVectorVT(VT.getVectorElementType(),
VT.getVectorNumElements()/2);
OpLo = DAG.getNode(ISD::BITCAST, dl, HVT, OpLo);
OpHi = DAG.getNode(ISD::BITCAST, dl, HVT, OpHi);
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
}
static SDValue LowerZERO_EXTEND_AVX512(SDValue Op,
SelectionDAG &DAG) {
MVT VT = Op->getSimpleValueType(0);
SDValue In = Op->getOperand(0);
MVT InVT = In.getSimpleValueType();
SDLoc DL(Op);
unsigned int NumElts = VT.getVectorNumElements();
if (NumElts != 8 && NumElts != 16)
return SDValue();
if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1)
return DAG.getNode(X86ISD::VZEXT, DL, VT, In);
EVT ExtVT = (NumElts == 8)? MVT::v8i64 : MVT::v16i32;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
assert(InVT.getVectorElementType() == MVT::i1);
SDValue Cst = DAG.getTargetConstant(1, ExtVT.getScalarType());
const Constant *C = (dyn_cast<ConstantSDNode>(Cst))->getConstantIntValue();
SDValue CP = DAG.getConstantPool(C, TLI.getPointerTy());
unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
SDValue Ld = DAG.getLoad(Cst.getValueType(), DL, DAG.getEntryNode(), CP,
MachinePointerInfo::getConstantPool(),
false, false, false, Alignment);
SDValue Brcst = DAG.getNode(X86ISD::VBROADCASTM, DL, ExtVT, In, Ld);
if (VT.is512BitVector())
return Brcst;
return DAG.getNode(X86ISD::VTRUNC, DL, VT, Brcst);
}
static SDValue LowerANY_EXTEND(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
if (Subtarget->hasFp256()) {
SDValue Res = LowerAVXExtend(Op, DAG, Subtarget);
if (Res.getNode())
return Res;
}
return SDValue();
}
static SDValue LowerZERO_EXTEND(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
SDLoc DL(Op);
MVT VT = Op.getSimpleValueType();
SDValue In = Op.getOperand(0);
MVT SVT = In.getSimpleValueType();
if (VT.is512BitVector() || SVT.getVectorElementType() == MVT::i1)
return LowerZERO_EXTEND_AVX512(Op, DAG);
if (Subtarget->hasFp256()) {
SDValue Res = LowerAVXExtend(Op, DAG, Subtarget);
if (Res.getNode())
return Res;
}
assert(!VT.is256BitVector() || !SVT.is128BitVector() ||
VT.getVectorNumElements() != SVT.getVectorNumElements());
return SDValue();
}
SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
MVT VT = Op.getSimpleValueType();
SDValue In = Op.getOperand(0);
MVT InVT = In.getSimpleValueType();
if (VT == MVT::i1) {
assert((InVT.isInteger() && (InVT.getSizeInBits() <= 64)) &&
"Invalid scalar TRUNCATE operation");
if (InVT.getSizeInBits() >= 32)
return SDValue();
In = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, In);
return DAG.getNode(ISD::TRUNCATE, DL, VT, In);
}
assert(VT.getVectorNumElements() == InVT.getVectorNumElements() &&
"Invalid TRUNCATE operation");
if (InVT.is512BitVector() || VT.getVectorElementType() == MVT::i1) {
if (VT.getVectorElementType().getSizeInBits() >=8)
return DAG.getNode(X86ISD::VTRUNC, DL, VT, In);
assert(VT.getVectorElementType() == MVT::i1 && "Unexpected vector type");
unsigned NumElts = InVT.getVectorNumElements();
assert ((NumElts == 8 || NumElts == 16) && "Unexpected vector type");
if (InVT.getSizeInBits() < 512) {
MVT ExtVT = (NumElts == 16)? MVT::v16i32 : MVT::v8i64;
In = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, In);
InVT = ExtVT;
}
SDValue Cst = DAG.getTargetConstant(1, InVT.getVectorElementType());
const Constant *C = (dyn_cast<ConstantSDNode>(Cst))->getConstantIntValue();
SDValue CP = DAG.getConstantPool(C, getPointerTy());
unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
SDValue Ld = DAG.getLoad(Cst.getValueType(), DL, DAG.getEntryNode(), CP,
MachinePointerInfo::getConstantPool(),
false, false, false, Alignment);
SDValue OneV = DAG.getNode(X86ISD::VBROADCAST, DL, InVT, Ld);
SDValue And = DAG.getNode(ISD::AND, DL, InVT, OneV, In);
return DAG.getNode(X86ISD::TESTM, DL, VT, And, And);
}
if ((VT == MVT::v4i32) && (InVT == MVT::v4i64)) {
if (Subtarget->hasInt256()) {
static const int ShufMask[] = {0, 2, 4, 6, -1, -1, -1, -1};
In = DAG.getNode(ISD::BITCAST, DL, MVT::v8i32, In);
In = DAG.getVectorShuffle(MVT::v8i32, DL, In, DAG.getUNDEF(MVT::v8i32),
ShufMask);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, In,
DAG.getIntPtrConstant(0));
}
SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In,
DAG.getIntPtrConstant(0));
SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In,
DAG.getIntPtrConstant(2));
OpLo = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, OpLo);
OpHi = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, OpHi);
static const int ShufMask[] = {0, 2, 4, 6};
return DAG.getVectorShuffle(VT, DL, OpLo, OpHi, ShufMask);
}
if ((VT == MVT::v8i16) && (InVT == MVT::v8i32)) {
if (Subtarget->hasInt256()) {
In = DAG.getNode(ISD::BITCAST, DL, MVT::v32i8, In);
SmallVector<SDValue,32> pshufbMask;
for (unsigned i = 0; i < 2; ++i) {
pshufbMask.push_back(DAG.getConstant(0x0, MVT::i8));
pshufbMask.push_back(DAG.getConstant(0x1, MVT::i8));
pshufbMask.push_back(DAG.getConstant(0x4, MVT::i8));
pshufbMask.push_back(DAG.getConstant(0x5, MVT::i8));
pshufbMask.push_back(DAG.getConstant(0x8, MVT::i8));
pshufbMask.push_back(DAG.getConstant(0x9, MVT::i8));
pshufbMask.push_back(DAG.getConstant(0xc, MVT::i8));
pshufbMask.push_back(DAG.getConstant(0xd, MVT::i8));
for (unsigned j = 0; j < 8; ++j)
pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8));
}
SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v32i8, pshufbMask);
In = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v32i8, In, BV);
In = DAG.getNode(ISD::BITCAST, DL, MVT::v4i64, In);
static const int ShufMask[] = {0, 2, -1, -1};
In = DAG.getVectorShuffle(MVT::v4i64, DL, In, DAG.getUNDEF(MVT::v4i64),
&ShufMask[0]);
In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In,
DAG.getIntPtrConstant(0));
return DAG.getNode(ISD::BITCAST, DL, VT, In);
}
SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In,
DAG.getIntPtrConstant(0));
SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In,
DAG.getIntPtrConstant(4));
OpLo = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, OpLo);
OpHi = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, OpHi);
static const int ShufMask1[] = {0, 1, 4, 5, 8, 9, 12, 13,
-1, -1, -1, -1, -1, -1, -1, -1};
SDValue Undef = DAG.getUNDEF(MVT::v16i8);
OpLo = DAG.getVectorShuffle(MVT::v16i8, DL, OpLo, Undef, ShufMask1);
OpHi = DAG.getVectorShuffle(MVT::v16i8, DL, OpHi, Undef, ShufMask1);
OpLo = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, OpLo);
OpHi = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, OpHi);
static const int ShufMask2[] = {0, 1, 4, 5};
SDValue res = DAG.getVectorShuffle(MVT::v4i32, DL, OpLo, OpHi, ShufMask2);
return DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, res);
}
if (!VT.is128BitVector() || !InVT.is256BitVector())
return SDValue();
assert(Subtarget->hasFp256() && "256-bit vector without AVX!");
unsigned NumElems = VT.getVectorNumElements();
MVT NVT = MVT::getVectorVT(VT.getVectorElementType(), NumElems * 2);
SmallVector<int, 16> MaskVec(NumElems * 2, -1);
for (unsigned i = 0; i != NumElems; ++i)
MaskVec[i] = i * 2;
SDValue V = DAG.getVectorShuffle(NVT, DL,
DAG.getNode(ISD::BITCAST, DL, NVT, In),
DAG.getUNDEF(NVT), &MaskVec[0]);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V,
DAG.getIntPtrConstant(0));
}
SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op,
SelectionDAG &DAG) const {
assert(!Op.getSimpleValueType().isVector());
std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG,
true, false);
SDValue FIST = Vals.first, StackSlot = Vals.second;
if (!FIST.getNode()) return Op;
if (StackSlot.getNode())
return DAG.getLoad(Op.getValueType(), SDLoc(Op),
FIST, StackSlot, MachinePointerInfo(),
false, false, false, 0);
return FIST;
}
SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op,
SelectionDAG &DAG) const {
std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG,
false, false);
SDValue FIST = Vals.first, StackSlot = Vals.second;
assert(FIST.getNode() && "Unexpected failure");
if (StackSlot.getNode())
return DAG.getLoad(Op.getValueType(), SDLoc(Op),
FIST, StackSlot, MachinePointerInfo(),
false, false, false, 0);
return FIST;
}
static SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) {
SDLoc DL(Op);
MVT VT = Op.getSimpleValueType();
SDValue In = Op.getOperand(0);
MVT SVT = In.getSimpleValueType();
assert(SVT == MVT::v2f32 && "Only customize MVT::v2f32 type legalization!");
return DAG.getNode(X86ISD::VFPEXT, DL, VT,
DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4f32,
In, DAG.getUNDEF(SVT)));
}
static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) {
assert((Op.getOpcode() == ISD::FABS || Op.getOpcode() == ISD::FNEG) &&
"Wrong opcode for lowering FABS or FNEG.");
bool IsFABS = (Op.getOpcode() == ISD::FABS);
if (IsFABS)
for (SDNode *User : Op->uses())
if (User->getOpcode() == ISD::FNEG)
return Op;
SDValue Op0 = Op.getOperand(0);
bool IsFNABS = !IsFABS && (Op0.getOpcode() == ISD::FABS);
SDLoc dl(Op);
MVT VT = Op.getSimpleValueType();
MVT EltVT = VT;
unsigned NumElts = VT == MVT::f64 ? 2 : 4;
if (VT.isVector()) {
EltVT = VT.getVectorElementType();
NumElts = VT.getVectorNumElements();
}
unsigned EltBits = EltVT.getSizeInBits();
LLVMContext *Context = DAG.getContext();
APInt MaskElt =
IsFABS ? APInt::getSignedMaxValue(EltBits) : APInt::getSignBit(EltBits);
Constant *C = ConstantInt::get(*Context, MaskElt);
C = ConstantVector::getSplat(NumElts, C);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue CPIdx = DAG.getConstantPool(C, TLI.getPointerTy());
unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
MachinePointerInfo::getConstantPool(),
false, false, false, Alignment);
if (VT.isVector()) {
MVT VecVT = MVT::getVectorVT(MVT::i64, VT.getSizeInBits() / 64);
SDValue MaskCasted = DAG.getNode(ISD::BITCAST, dl, VecVT, Mask);
SDValue Operand = IsFNABS ?
DAG.getNode(ISD::BITCAST, dl, VecVT, Op0.getOperand(0)) :
DAG.getNode(ISD::BITCAST, dl, VecVT, Op0);
unsigned BitOp = IsFABS ? ISD::AND : IsFNABS ? ISD::OR : ISD::XOR;
return DAG.getNode(ISD::BITCAST, dl, VT,
DAG.getNode(BitOp, dl, VecVT, Operand, MaskCasted));
}
unsigned BitOp = IsFABS ? X86ISD::FAND : IsFNABS ? X86ISD::FOR : X86ISD::FXOR;
SDValue Operand = IsFNABS ? Op0.getOperand(0) : Op0;
return DAG.getNode(BitOp, dl, VT, Operand, Mask);
}
static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
LLVMContext *Context = DAG.getContext();
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
SDLoc dl(Op);
MVT VT = Op.getSimpleValueType();
MVT SrcVT = Op1.getSimpleValueType();
if (SrcVT.bitsLT(VT)) {
Op1 = DAG.getNode(ISD::FP_EXTEND, dl, VT, Op1);
SrcVT = VT;
}
if (SrcVT.bitsGT(VT)) {
Op1 = DAG.getNode(ISD::FP_ROUND, dl, VT, Op1, DAG.getIntPtrConstant(1));
SrcVT = VT;
}
const fltSemantics &Sem =
VT == MVT::f64 ? APFloat::IEEEdouble : APFloat::IEEEsingle;
const unsigned SizeInBits = VT.getSizeInBits();
SmallVector<Constant *, 4> CV(
VT == MVT::f64 ? 2 : 4,
ConstantFP::get(*Context, APFloat(Sem, APInt(SizeInBits, 0))));
CV[0] = ConstantFP::get(*Context,
APFloat(Sem, APInt::getHighBitsSet(SizeInBits, 1)));
Constant *C = ConstantVector::get(CV);
SDValue CPIdx = DAG.getConstantPool(C, TLI.getPointerTy(), 16);
SDValue Mask1 = DAG.getLoad(SrcVT, dl, DAG.getEntryNode(), CPIdx,
MachinePointerInfo::getConstantPool(),
false, false, false, 16);
SDValue SignBit = DAG.getNode(X86ISD::FAND, dl, SrcVT, Op1, Mask1);
if (ConstantFPSDNode *Op0CN = dyn_cast<ConstantFPSDNode>(Op0)) {
APFloat APF = Op0CN->getValueAPF();
if (APF.isPosZero())
return SignBit;
APF.clearSign();
CV[0] = ConstantFP::get(*Context, APF);
} else {
CV[0] = ConstantFP::get(
*Context,
APFloat(Sem, APInt::getLowBitsSet(SizeInBits, SizeInBits - 1)));
}
C = ConstantVector::get(CV);
CPIdx = DAG.getConstantPool(C, TLI.getPointerTy(), 16);
SDValue Val = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
MachinePointerInfo::getConstantPool(),
false, false, false, 16);
if (!isa<ConstantFPSDNode>(Op0))
Val = DAG.getNode(X86ISD::FAND, dl, VT, Op0, Val);
return DAG.getNode(X86ISD::FOR, dl, VT, Val, SignBit);
}
static SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) {
SDValue N0 = Op.getOperand(0);
SDLoc dl(Op);
MVT VT = Op.getSimpleValueType();
SDValue xFGETSIGN = DAG.getNode(X86ISD::FGETSIGNx86, dl, VT, N0,
DAG.getConstant(1, VT));
return DAG.getNode(ISD::AND, dl, VT, xFGETSIGN, DAG.getConstant(1, VT));
}
static SDValue LowerVectorAllZeroTest(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
assert(Op.getOpcode() == ISD::OR && "Only check OR'd tree.");
if (!Subtarget->hasSSE41())
return SDValue();
if (!Op->hasOneUse())
return SDValue();
SDNode *N = Op.getNode();
SDLoc DL(N);
SmallVector<SDValue, 8> Opnds;
DenseMap<SDValue, unsigned> VecInMap;
SmallVector<SDValue, 8> VecIns;
EVT VT = MVT::Other;
Opnds.push_back(N->getOperand(0));
Opnds.push_back(N->getOperand(1));
for (unsigned Slot = 0, e = Opnds.size(); Slot < e; ++Slot) {
SmallVectorImpl<SDValue>::const_iterator I = Opnds.begin() + Slot;
if (I->getOpcode() == ISD::OR) {
Opnds.push_back(I->getOperand(0));
Opnds.push_back(I->getOperand(1));
e += 2; continue;
}
if (I->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
return SDValue();
SDValue Idx = I->getOperand(1);
if (!isa<ConstantSDNode>(Idx))
return SDValue();
SDValue ExtractedFromVec = I->getOperand(0);
DenseMap<SDValue, unsigned>::iterator M = VecInMap.find(ExtractedFromVec);
if (M == VecInMap.end()) {
VT = ExtractedFromVec.getValueType();
if (!VT.is128BitVector() && !VT.is256BitVector())
return SDValue();
if (VecInMap.begin() != VecInMap.end() &&
VT != VecInMap.begin()->first.getValueType())
return SDValue();
M = VecInMap.insert(std::make_pair(ExtractedFromVec, 0)).first;
VecIns.push_back(ExtractedFromVec);
}
M->second |= 1U << cast<ConstantSDNode>(Idx)->getZExtValue();
}
assert((VT.is128BitVector() || VT.is256BitVector()) &&
"Not extracted from 128-/256-bit vector.");
unsigned FullMask = (1U << VT.getVectorNumElements()) - 1U;
for (DenseMap<SDValue, unsigned>::const_iterator
I = VecInMap.begin(), E = VecInMap.end(); I != E; ++I) {
if (I->second != FullMask)
return SDValue();
}
EVT TestVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
for (unsigned i = 0, e = VecIns.size(); i < e; ++i)
VecIns[i] = DAG.getNode(ISD::BITCAST, DL, TestVT, VecIns[i]);
for (unsigned Slot = 0, e = VecIns.size(); e - Slot > 1; Slot += 2, e += 1) {
SDValue LHS = VecIns[Slot];
SDValue RHS = VecIns[Slot + 1];
VecIns.push_back(DAG.getNode(ISD::OR, DL, TestVT, LHS, RHS));
}
return DAG.getNode(X86ISD::PTEST, DL, MVT::i32,
VecIns.back(), VecIns.back());
}
static bool hasNonFlagsUse(SDValue Op) {
for (SDNode::use_iterator UI = Op->use_begin(), UE = Op->use_end(); UI != UE;
++UI) {
SDNode *User = *UI;
unsigned UOpNo = UI.getOperandNo();
if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse()) {
UOpNo = User->use_begin().getOperandNo();
User = *User->use_begin();
}
if (User->getOpcode() != ISD::BRCOND && User->getOpcode() != ISD::SETCC &&
!(User->getOpcode() == ISD::SELECT && UOpNo == 0))
return true;
}
return false;
}
SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, SDLoc dl,
SelectionDAG &DAG) const {
if (Op.getValueType() == MVT::i1) {
SDValue ExtOp = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, Op);
return DAG.getNode(X86ISD::CMP, dl, MVT::i32, ExtOp,
DAG.getConstant(0, MVT::i8));
}
bool NeedCF = false;
bool NeedOF = false;
switch (X86CC) {
default: break;
case X86::COND_A: case X86::COND_AE:
case X86::COND_B: case X86::COND_BE:
NeedCF = true;
break;
case X86::COND_G: case X86::COND_GE:
case X86::COND_L: case X86::COND_LE:
case X86::COND_O: case X86::COND_NO: {
switch (Op->getOpcode()) {
case ISD::ADD:
case ISD::SUB:
case ISD::MUL:
case ISD::SHL: {
const BinaryWithFlagsSDNode *BinNode =
cast<BinaryWithFlagsSDNode>(Op.getNode());
if (BinNode->hasNoSignedWrap())
break;
}
default:
NeedOF = true;
break;
}
break;
}
}
if (Op.getResNo() != 0 || NeedOF || NeedCF) {
return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
DAG.getConstant(0, Op.getValueType()));
}
unsigned Opcode = 0;
unsigned NumOperands = 0;
bool NeedTruncation = false;
SDValue ArithOp = Op;
if (Op->getOpcode() == ISD::TRUNCATE && Op->hasOneUse()) {
SDValue Arith = Op->getOperand(0);
if (Arith->hasOneUse())
switch (Arith.getOpcode()) {
default: break;
case ISD::ADD:
case ISD::SUB:
case ISD::AND:
case ISD::OR:
case ISD::XOR: {
NeedTruncation = true;
ArithOp = Arith;
}
}
}
switch (ArithOp.getOpcode()) {
case ISD::ADD:
for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
UE = Op.getNode()->use_end(); UI != UE; ++UI)
if (UI->getOpcode() != ISD::CopyToReg &&
UI->getOpcode() != ISD::SETCC &&
UI->getOpcode() != ISD::STORE)
goto default_case;
if (ConstantSDNode *C =
dyn_cast<ConstantSDNode>(ArithOp.getNode()->getOperand(1))) {
if (C->getAPIntValue() == 1 && !Subtarget->slowIncDec()) {
Opcode = X86ISD::INC;
NumOperands = 1;
break;
}
if (C->getAPIntValue().isAllOnesValue() && !Subtarget->slowIncDec()) {
Opcode = X86ISD::DEC;
NumOperands = 1;
break;
}
}
Opcode = X86ISD::ADD;
NumOperands = 2;
break;
case ISD::SHL:
case ISD::SRL:
if ((X86CC == X86::COND_E || X86CC == X86::COND_NE) && Op->hasOneUse() &&
isa<ConstantSDNode>(Op->getOperand(1)) && !hasNonFlagsUse(Op)) {
EVT VT = Op.getValueType();
unsigned BitWidth = VT.getSizeInBits();
unsigned ShAmt = Op->getConstantOperandVal(1);
if (ShAmt >= BitWidth) break;
APInt Mask = ArithOp.getOpcode() == ISD::SRL
? APInt::getHighBitsSet(BitWidth, BitWidth - ShAmt)
: APInt::getLowBitsSet(BitWidth, BitWidth - ShAmt);
if (!Mask.isSignedIntN(32)) break;
SDValue New = DAG.getNode(ISD::AND, dl, VT, Op->getOperand(0),
DAG.getConstant(Mask, VT));
DAG.ReplaceAllUsesWith(Op, New);
Op = New;
}
break;
case ISD::AND:
if (!hasNonFlagsUse(Op))
break;
case ISD::SUB:
case ISD::OR:
case ISD::XOR:
for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
UE = Op.getNode()->use_end(); UI != UE; ++UI)
if (UI->getOpcode() == ISD::STORE)
goto default_case;
switch (ArithOp.getOpcode()) {
default: llvm_unreachable("unexpected operator!");
case ISD::SUB: Opcode = X86ISD::SUB; break;
case ISD::XOR: Opcode = X86ISD::XOR; break;
case ISD::AND: Opcode = X86ISD::AND; break;
case ISD::OR: {
if (!NeedTruncation && (X86CC == X86::COND_E || X86CC == X86::COND_NE)) {
SDValue EFLAGS = LowerVectorAllZeroTest(Op, Subtarget, DAG);
if (EFLAGS.getNode())
return EFLAGS;
}
Opcode = X86ISD::OR;
break;
}
}
NumOperands = 2;
break;
case X86ISD::ADD:
case X86ISD::SUB:
case X86ISD::INC:
case X86ISD::DEC:
case X86ISD::OR:
case X86ISD::XOR:
case X86ISD::AND:
return SDValue(Op.getNode(), 1);
default:
default_case:
break;
}
if (NeedTruncation) {
EVT VT = Op.getValueType();
SDValue WideVal = Op->getOperand(0);
EVT WideVT = WideVal.getValueType();
unsigned ConvertedOp = 0;
switch (WideVal.getOpcode()) {
default: break;
case ISD::ADD: ConvertedOp = X86ISD::ADD; break;
case ISD::SUB: ConvertedOp = X86ISD::SUB; break;
case ISD::AND: ConvertedOp = X86ISD::AND; break;
case ISD::OR: ConvertedOp = X86ISD::OR; break;
case ISD::XOR: ConvertedOp = X86ISD::XOR; break;
}
if (ConvertedOp) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (TLI.isOperationLegal(WideVal.getOpcode(), WideVT)) {
SDValue V0 = DAG.getNode(ISD::TRUNCATE, dl, VT, WideVal.getOperand(0));
SDValue V1 = DAG.getNode(ISD::TRUNCATE, dl, VT, WideVal.getOperand(1));
Op = DAG.getNode(ConvertedOp, dl, VT, V0, V1);
}
}
}
if (Opcode == 0)
return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
DAG.getConstant(0, Op.getValueType()));
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
SmallVector<SDValue, 4> Ops;
for (unsigned i = 0; i != NumOperands; ++i)
Ops.push_back(Op.getOperand(i));
SDValue New = DAG.getNode(Opcode, dl, VTs, Ops);
DAG.ReplaceAllUsesWith(Op, New);
return SDValue(New.getNode(), 1);
}
SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
SDLoc dl, SelectionDAG &DAG) const {
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op1)) {
if (C->getAPIntValue() == 0)
return EmitTest(Op0, X86CC, dl, DAG);
if (Op0.getValueType() == MVT::i1)
llvm_unreachable("Unexpected comparison operation for MVT::i1 operands");
}
if ((Op0.getValueType() == MVT::i8 || Op0.getValueType() == MVT::i16 ||
Op0.getValueType() == MVT::i32 || Op0.getValueType() == MVT::i64)) {
if (Op0.getValueType() != MVT::i32 && Op0.getValueType() != MVT::i64 &&
!DAG.getMachineFunction().getFunction()->hasFnAttribute(
Attribute::MinSize) &&
!Subtarget->isAtom()) {
unsigned ExtendOp =
isX86CCUnsigned(X86CC) ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
Op0 = DAG.getNode(ExtendOp, dl, MVT::i32, Op0);
Op1 = DAG.getNode(ExtendOp, dl, MVT::i32, Op1);
}
SDVTList VTs = DAG.getVTList(Op0.getValueType(), MVT::i32);
SDValue Sub = DAG.getNode(X86ISD::SUB, dl, VTs,
Op0, Op1);
return SDValue(Sub.getNode(), 1);
}
return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op0, Op1);
}
SDValue X86TargetLowering::ConvertCmpIfNecessary(SDValue Cmp,
SelectionDAG &DAG) const {
if (Subtarget->hasCMov() ||
Cmp.getOpcode() != X86ISD::CMP ||
!Cmp.getOperand(0).getValueType().isFloatingPoint() ||
!Cmp.getOperand(1).getValueType().isFloatingPoint())
return Cmp;
SDLoc dl(Cmp);
SDValue TruncFPSW = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, Cmp);
SDValue FNStSW = DAG.getNode(X86ISD::FNSTSW16r, dl, MVT::i16, TruncFPSW);
SDValue Srl = DAG.getNode(ISD::SRL, dl, MVT::i16, FNStSW,
DAG.getConstant(8, MVT::i8));
SDValue TruncSrl = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Srl);
return DAG.getNode(X86ISD::SAHF, dl, MVT::i32, TruncSrl);
}
SDValue X86TargetLowering::getRsqrtEstimate(SDValue Op,
DAGCombinerInfo &DCI,
unsigned &RefinementSteps,
bool &UseOneConstNR) const {
if (!Subtarget->useSqrtEst())
return SDValue();
EVT VT = Op.getValueType();
if ((Subtarget->hasSSE1() && (VT == MVT::f32 || VT == MVT::v4f32)) ||
(Subtarget->hasAVX() && VT == MVT::v8f32)) {
RefinementSteps = 1;
UseOneConstNR = false;
return DCI.DAG.getNode(X86ISD::FRSQRT, SDLoc(Op), VT, Op);
}
return SDValue();
}
SDValue X86TargetLowering::getRecipEstimate(SDValue Op,
DAGCombinerInfo &DCI,
unsigned &RefinementSteps) const {
if (!Subtarget->useReciprocalEst())
return SDValue();
EVT VT = Op.getValueType();
if ((Subtarget->hasSSE1() && (VT == MVT::f32 || VT == MVT::v4f32)) ||
(Subtarget->hasAVX() && VT == MVT::v8f32)) {
RefinementSteps = ReciprocalEstimateRefinementSteps;
return DCI.DAG.getNode(X86ISD::FRCP, SDLoc(Op), VT, Op);
}
return SDValue();
}
static bool isAllOnes(SDValue V) {
ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
return C && C->isAllOnesValue();
}
SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC,
SDLoc dl, SelectionDAG &DAG) const {
SDValue Op0 = And.getOperand(0);
SDValue Op1 = And.getOperand(1);
if (Op0.getOpcode() == ISD::TRUNCATE)
Op0 = Op0.getOperand(0);
if (Op1.getOpcode() == ISD::TRUNCATE)
Op1 = Op1.getOperand(0);
SDValue LHS, RHS;
if (Op1.getOpcode() == ISD::SHL)
std::swap(Op0, Op1);
if (Op0.getOpcode() == ISD::SHL) {
if (ConstantSDNode *And00C = dyn_cast<ConstantSDNode>(Op0.getOperand(0)))
if (And00C->getZExtValue() == 1) {
unsigned BitWidth = Op0.getValueSizeInBits();
unsigned AndBitWidth = And.getValueSizeInBits();
if (BitWidth > AndBitWidth) {
APInt Zeros, Ones;
DAG.computeKnownBits(Op0, Zeros, Ones);
if (Zeros.countLeadingOnes() < BitWidth - AndBitWidth)
return SDValue();
}
LHS = Op1;
RHS = Op0.getOperand(1);
}
} else if (Op1.getOpcode() == ISD::Constant) {
ConstantSDNode *AndRHS = cast<ConstantSDNode>(Op1);
uint64_t AndRHSVal = AndRHS->getZExtValue();
SDValue AndLHS = Op0;
if (AndRHSVal == 1 && AndLHS.getOpcode() == ISD::SRL) {
LHS = AndLHS.getOperand(0);
RHS = AndLHS.getOperand(1);
}
if (!isUInt<32>(AndRHSVal) && isPowerOf2_64(AndRHSVal)) {
LHS = AndLHS;
RHS = DAG.getConstant(Log2_64_Ceil(AndRHSVal), LHS.getValueType());
}
}
if (LHS.getNode()) {
if (LHS.getValueType() == MVT::i8 ||
LHS.getValueType() == MVT::i16)
LHS = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS);
if (LHS.getValueType() != RHS.getValueType())
RHS = DAG.getNode(ISD::ANY_EXTEND, dl, LHS.getValueType(), RHS);
SDValue BT = DAG.getNode(X86ISD::BT, dl, MVT::i32, LHS, RHS);
X86::CondCode Cond = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B;
return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
DAG.getConstant(Cond, MVT::i8), BT);
}
return SDValue();
}
static int translateX86FSETCC(ISD::CondCode SetCCOpcode, SDValue &Op0,
SDValue &Op1) {
unsigned SSECC;
bool Swap = false;
switch (SetCCOpcode) {
default: llvm_unreachable("Unexpected SETCC condition");
case ISD::SETOEQ:
case ISD::SETEQ: SSECC = 0; break;
case ISD::SETOGT:
case ISD::SETGT: Swap = true; case ISD::SETLT:
case ISD::SETOLT: SSECC = 1; break;
case ISD::SETOGE:
case ISD::SETGE: Swap = true; case ISD::SETLE:
case ISD::SETOLE: SSECC = 2; break;
case ISD::SETUO: SSECC = 3; break;
case ISD::SETUNE:
case ISD::SETNE: SSECC = 4; break;
case ISD::SETULE: Swap = true; case ISD::SETUGE: SSECC = 5; break;
case ISD::SETULT: Swap = true; case ISD::SETUGT: SSECC = 6; break;
case ISD::SETO: SSECC = 7; break;
case ISD::SETUEQ:
case ISD::SETONE: SSECC = 8; break;
}
if (Swap)
std::swap(Op0, Op1);
return SSECC;
}
static SDValue Lower256IntVSETCC(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
assert(VT.is256BitVector() && Op.getOpcode() == ISD::SETCC &&
"Unsupported value type for operation");
unsigned NumElems = VT.getVectorNumElements();
SDLoc dl(Op);
SDValue CC = Op.getOperand(2);
SDValue LHS = Op.getOperand(0);
SDValue LHS1 = Extract128BitVector(LHS, 0, DAG, dl);
SDValue LHS2 = Extract128BitVector(LHS, NumElems/2, DAG, dl);
SDValue RHS = Op.getOperand(1);
SDValue RHS1 = Extract128BitVector(RHS, 0, DAG, dl);
SDValue RHS2 = Extract128BitVector(RHS, NumElems/2, DAG, dl);
MVT EltVT = VT.getVectorElementType();
MVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, RHS1, CC),
DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2, CC));
}
static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
SDValue CC = Op.getOperand(2);
MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
assert(Op0.getValueType().getVectorElementType().getSizeInBits() >= 8 &&
Op.getValueType().getScalarType() == MVT::i1 &&
"Cannot set masked compare for this operation");
ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
unsigned Opc = 0;
bool Unsigned = false;
bool Swap = false;
unsigned SSECC;
switch (SetCCOpcode) {
default: llvm_unreachable("Unexpected SETCC condition");
case ISD::SETNE: SSECC = 4; break;
case ISD::SETEQ: Opc = X86ISD::PCMPEQM; break;
case ISD::SETUGT: SSECC = 6; Unsigned = true; break;
case ISD::SETLT: Swap = true; case ISD::SETGT: Opc = X86ISD::PCMPGTM; break;
case ISD::SETULT: SSECC = 1; Unsigned = true; break;
case ISD::SETUGE: SSECC = 5; Unsigned = true; break; case ISD::SETGE: Swap = true; SSECC = 2; break; case ISD::SETULE: Unsigned = true; case ISD::SETLE: SSECC = 2; break;
}
if (Swap)
std::swap(Op0, Op1);
if (Opc)
return DAG.getNode(Opc, dl, VT, Op0, Op1);
Opc = Unsigned ? X86ISD::CMPMU: X86ISD::CMPM;
return DAG.getNode(Opc, dl, VT, Op0, Op1,
DAG.getConstant(SSECC, MVT::i8));
}
static SDValue ChangeVSETULTtoVSETULE(SDLoc dl, SDValue Op1, SelectionDAG &DAG)
{
BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Op1.getNode());
if (!BV)
return SDValue();
MVT VT = Op1.getSimpleValueType();
MVT EVT = VT.getVectorElementType();
unsigned n = VT.getVectorNumElements();
SmallVector<SDValue, 8> ULTOp1;
for (unsigned i = 0; i < n; ++i) {
ConstantSDNode *Elt = dyn_cast<ConstantSDNode>(BV->getOperand(i));
if (!Elt || Elt->isOpaque() || Elt->getValueType(0) != EVT)
return SDValue();
APInt Val = Elt->getAPIntValue();
if (Val == 0)
return SDValue();
ULTOp1.push_back(DAG.getConstant(Val - 1, EVT));
}
return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, ULTOp1);
}
static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
SDValue CC = Op.getOperand(2);
MVT VT = Op.getSimpleValueType();
ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
bool isFP = Op.getOperand(1).getSimpleValueType().isFloatingPoint();
SDLoc dl(Op);
if (isFP) {
#ifndef NDEBUG
MVT EltVT = Op0.getSimpleValueType().getVectorElementType();
assert(EltVT == MVT::f32 || EltVT == MVT::f64);
#endif
unsigned SSECC = translateX86FSETCC(SetCCOpcode, Op0, Op1);
unsigned Opc = X86ISD::CMPP;
if (Subtarget->hasAVX512() && VT.getVectorElementType() == MVT::i1) {
assert(VT.getVectorNumElements() <= 16);
Opc = X86ISD::CMPM;
}
if (SSECC == 8) {
unsigned CC0, CC1;
unsigned CombineOpc;
if (SetCCOpcode == ISD::SETUEQ) {
CC0 = 3; CC1 = 0; CombineOpc = ISD::OR;
} else {
assert(SetCCOpcode == ISD::SETONE);
CC0 = 7; CC1 = 4; CombineOpc = ISD::AND;
}
SDValue Cmp0 = DAG.getNode(Opc, dl, VT, Op0, Op1,
DAG.getConstant(CC0, MVT::i8));
SDValue Cmp1 = DAG.getNode(Opc, dl, VT, Op0, Op1,
DAG.getConstant(CC1, MVT::i8));
return DAG.getNode(CombineOpc, dl, VT, Cmp0, Cmp1);
}
return DAG.getNode(Opc, dl, VT, Op0, Op1,
DAG.getConstant(SSECC, MVT::i8));
}
if (VT.is256BitVector() && !Subtarget->hasInt256())
return Lower256IntVSETCC(Op, DAG);
bool MaskResult = (VT.getVectorElementType() == MVT::i1);
EVT OpVT = Op1.getValueType();
if (Subtarget->hasAVX512()) {
if (Op1.getValueType().is512BitVector() ||
(Subtarget->hasBWI() && Subtarget->hasVLX()) ||
(MaskResult && OpVT.getVectorElementType().getSizeInBits() >= 32))
return LowerIntVSETCC_AVX512(Op, DAG, Subtarget);
if (MaskResult &&
(OpVT.getVectorElementType().getSizeInBits() < 32 &&
OpVT.getVectorElementType().getSizeInBits() >= 8))
return DAG.getNode(ISD::TRUNCATE, dl, VT,
DAG.getNode(ISD::SETCC, dl, OpVT, Op0, Op1, CC));
}
unsigned Opc;
bool Swap = false, Invert = false, FlipSigns = false, MinMax = false;
bool Subus = false;
switch (SetCCOpcode) {
default: llvm_unreachable("Unexpected SETCC condition");
case ISD::SETNE: Invert = true;
case ISD::SETEQ: Opc = X86ISD::PCMPEQ; break;
case ISD::SETLT: Swap = true;
case ISD::SETGT: Opc = X86ISD::PCMPGT; break;
case ISD::SETGE: Swap = true;
case ISD::SETLE: Opc = X86ISD::PCMPGT;
Invert = true; break;
case ISD::SETULT: Swap = true;
case ISD::SETUGT: Opc = X86ISD::PCMPGT;
FlipSigns = true; break;
case ISD::SETUGE: Swap = true;
case ISD::SETULE: Opc = X86ISD::PCMPGT;
FlipSigns = true; Invert = true; break;
}
MVT VET = VT.getVectorElementType();
bool hasMinMax =
(Subtarget->hasSSE41() && (VET >= MVT::i8 && VET <= MVT::i32))
|| (Subtarget->hasSSE2() && (VET == MVT::i8));
if (hasMinMax) {
switch (SetCCOpcode) {
default: break;
case ISD::SETULE: Opc = X86ISD::UMIN; MinMax = true; break;
case ISD::SETUGE: Opc = X86ISD::UMAX; MinMax = true; break;
}
if (MinMax) { Swap = false; Invert = false; FlipSigns = false; }
}
bool hasSubus = Subtarget->hasSSE2() && (VET == MVT::i8 || VET == MVT::i16);
if (!MinMax && hasSubus) {
switch (SetCCOpcode) {
default: break;
case ISD::SETULT: {
if (Subtarget->hasAVX())
break;
SDValue ULEOp1 = ChangeVSETULTtoVSETULE(dl, Op1, DAG);
if (ULEOp1.getNode()) {
Op1 = ULEOp1;
Subus = true; Invert = false; Swap = false;
}
break;
}
case ISD::SETUGE: Subus = true; Invert = false; Swap = true; break;
case ISD::SETULE: Subus = true; Invert = false; Swap = false; break;
}
if (Subus) {
Opc = X86ISD::SUBUS;
FlipSigns = false;
}
}
if (Swap)
std::swap(Op0, Op1);
if (VT == MVT::v2i64) {
if (Opc == X86ISD::PCMPGT && !Subtarget->hasSSE42()) {
assert(Subtarget->hasSSE2() && "Don't know how to lower!");
Op0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op0);
Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op1);
SDValue SB;
if (FlipSigns) {
SB = DAG.getConstant(0x80000000U, MVT::v4i32);
} else {
SDValue Sign = DAG.getConstant(0x80000000U, MVT::i32);
SDValue Zero = DAG.getConstant(0x00000000U, MVT::i32);
SB = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
Sign, Zero, Sign, Zero);
}
Op0 = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Op0, SB);
Op1 = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Op1, SB);
SDValue GT = DAG.getNode(X86ISD::PCMPGT, dl, MVT::v4i32, Op0, Op1);
SDValue EQ = DAG.getNode(X86ISD::PCMPEQ, dl, MVT::v4i32, Op0, Op1);
static const int MaskHi[] = { 1, 1, 3, 3 };
static const int MaskLo[] = { 0, 0, 2, 2 };
SDValue EQHi = DAG.getVectorShuffle(MVT::v4i32, dl, EQ, EQ, MaskHi);
SDValue GTLo = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskLo);
SDValue GTHi = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskHi);
SDValue Result = DAG.getNode(ISD::AND, dl, MVT::v4i32, EQHi, GTLo);
Result = DAG.getNode(ISD::OR, dl, MVT::v4i32, Result, GTHi);
if (Invert)
Result = DAG.getNOT(dl, Result, MVT::v4i32);
return DAG.getNode(ISD::BITCAST, dl, VT, Result);
}
if (Opc == X86ISD::PCMPEQ && !Subtarget->hasSSE41()) {
assert(Subtarget->hasSSE2() && !FlipSigns && "Don't know how to lower!");
Op0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op0);
Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op1);
SDValue Result = DAG.getNode(Opc, dl, MVT::v4i32, Op0, Op1);
static const int Mask[] = { 1, 0, 3, 2 };
SDValue Shuf = DAG.getVectorShuffle(MVT::v4i32, dl, Result, Result, Mask);
Result = DAG.getNode(ISD::AND, dl, MVT::v4i32, Result, Shuf);
if (Invert)
Result = DAG.getNOT(dl, Result, MVT::v4i32);
return DAG.getNode(ISD::BITCAST, dl, VT, Result);
}
}
if (FlipSigns) {
EVT EltVT = VT.getVectorElementType();
SDValue SB = DAG.getConstant(APInt::getSignBit(EltVT.getSizeInBits()), VT);
Op0 = DAG.getNode(ISD::XOR, dl, VT, Op0, SB);
Op1 = DAG.getNode(ISD::XOR, dl, VT, Op1, SB);
}
SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
if (Invert)
Result = DAG.getNOT(dl, Result, VT);
if (MinMax)
Result = DAG.getNode(X86ISD::PCMPEQ, dl, VT, Op0, Result);
if (Subus)
Result = DAG.getNode(X86ISD::PCMPEQ, dl, VT, Result,
getZeroVector(VT, Subtarget, DAG, dl));
return Result;
}
SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
MVT VT = Op.getSimpleValueType();
if (VT.isVector()) return LowerVSETCC(Op, Subtarget, DAG);
assert(((!Subtarget->hasAVX512() && VT == MVT::i8) || (VT == MVT::i1))
&& "SetCC type must be 8-bit or 1-bit integer");
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
SDLoc dl(Op);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
if (Op0.getOpcode() == ISD::AND && Op0.hasOneUse() &&
Op1.getOpcode() == ISD::Constant &&
cast<ConstantSDNode>(Op1)->isNullValue() &&
(CC == ISD::SETEQ || CC == ISD::SETNE)) {
SDValue NewSetCC = LowerToBT(Op0, CC, dl, DAG);
if (NewSetCC.getNode()) {
if (VT == MVT::i1)
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewSetCC);
return NewSetCC;
}
}
if (Op1.getOpcode() == ISD::Constant &&
(cast<ConstantSDNode>(Op1)->getZExtValue() == 1 ||
cast<ConstantSDNode>(Op1)->isNullValue()) &&
(CC == ISD::SETEQ || CC == ISD::SETNE)) {
if (Op0.getOpcode() == X86ISD::SETCC) {
X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0);
bool Invert = (CC == ISD::SETNE) ^
cast<ConstantSDNode>(Op1)->isNullValue();
if (!Invert)
return Op0;
CCode = X86::GetOppositeBranchCondition(CCode);
SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
DAG.getConstant(CCode, MVT::i8),
Op0.getOperand(1));
if (VT == MVT::i1)
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, SetCC);
return SetCC;
}
}
if ((Op0.getValueType() == MVT::i1) && (Op1.getOpcode() == ISD::Constant) &&
(cast<ConstantSDNode>(Op1)->getZExtValue() == 1) &&
(CC == ISD::SETEQ || CC == ISD::SETNE)) {
ISD::CondCode NewCC = ISD::getSetCCInverse(CC, true);
return DAG.getSetCC(dl, VT, Op0, DAG.getConstant(0, MVT::i1), NewCC);
}
bool isFP = Op1.getSimpleValueType().isFloatingPoint();
unsigned X86CC = TranslateX86CC(CC, isFP, Op0, Op1, DAG);
if (X86CC == X86::COND_INVALID)
return SDValue();
SDValue EFLAGS = EmitCmp(Op0, Op1, X86CC, dl, DAG);
EFLAGS = ConvertCmpIfNecessary(EFLAGS, DAG);
SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
DAG.getConstant(X86CC, MVT::i8), EFLAGS);
if (VT == MVT::i1)
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, SetCC);
return SetCC;
}
static bool isX86LogicalCmp(SDValue Op) {
unsigned Opc = Op.getNode()->getOpcode();
if (Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI ||
Opc == X86ISD::SAHF)
return true;
if (Op.getResNo() == 1 &&
(Opc == X86ISD::ADD ||
Opc == X86ISD::SUB ||
Opc == X86ISD::ADC ||
Opc == X86ISD::SBB ||
Opc == X86ISD::SMUL ||
Opc == X86ISD::UMUL ||
Opc == X86ISD::INC ||
Opc == X86ISD::DEC ||
Opc == X86ISD::OR ||
Opc == X86ISD::XOR ||
Opc == X86ISD::AND))
return true;
if (Op.getResNo() == 2 && Opc == X86ISD::UMUL)
return true;
return false;
}
static bool isTruncWithZeroHighBitsInput(SDValue V, SelectionDAG &DAG) {
if (V.getOpcode() != ISD::TRUNCATE)
return false;
SDValue VOp0 = V.getOperand(0);
unsigned InBits = VOp0.getValueSizeInBits();
unsigned Bits = V.getValueSizeInBits();
return DAG.MaskedValueIsZero(VOp0, APInt::getHighBitsSet(InBits,InBits-Bits));
}
SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
bool addTest = true;
SDValue Cond = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
SDValue Op2 = Op.getOperand(2);
SDLoc DL(Op);
EVT VT = Op1.getValueType();
SDValue CC;
if (Cond.getOpcode() == ISD::SETCC &&
((Subtarget->hasSSE2() && (VT == MVT::f32 || VT == MVT::f64)) ||
(Subtarget->hasSSE1() && VT == MVT::f32)) &&
VT == Cond.getOperand(0).getValueType() && Cond->hasOneUse()) {
SDValue CondOp0 = Cond.getOperand(0), CondOp1 = Cond.getOperand(1);
int SSECC = translateX86FSETCC(
cast<CondCodeSDNode>(Cond.getOperand(2))->get(), CondOp0, CondOp1);
if (SSECC != 8) {
if (Subtarget->hasAVX512()) {
SDValue Cmp = DAG.getNode(X86ISD::FSETCC, DL, MVT::i1, CondOp0, CondOp1,
DAG.getConstant(SSECC, MVT::i8));
return DAG.getNode(X86ISD::SELECT, DL, VT, Cmp, Op1, Op2);
}
SDValue Cmp = DAG.getNode(X86ISD::FSETCC, DL, VT, CondOp0, CondOp1,
DAG.getConstant(SSECC, MVT::i8));
SDValue AndN = DAG.getNode(X86ISD::FANDN, DL, VT, Cmp, Op2);
SDValue And = DAG.getNode(X86ISD::FAND, DL, VT, Cmp, Op1);
return DAG.getNode(X86ISD::FOR, DL, VT, AndN, And);
}
}
if (Cond.getOpcode() == ISD::SETCC) {
SDValue NewCond = LowerSETCC(Cond, DAG);
if (NewCond.getNode())
Cond = NewCond;
}
if (Cond.getOpcode() == X86ISD::SETCC &&
Cond.getOperand(1).getOpcode() == X86ISD::CMP &&
isZero(Cond.getOperand(1).getOperand(1))) {
SDValue Cmp = Cond.getOperand(1);
unsigned CondCode =cast<ConstantSDNode>(Cond.getOperand(0))->getZExtValue();
if ((isAllOnes(Op1) || isAllOnes(Op2)) &&
(CondCode == X86::COND_E || CondCode == X86::COND_NE)) {
SDValue Y = isAllOnes(Op2) ? Op1 : Op2;
SDValue CmpOp0 = Cmp.getOperand(0);
if (ConstantSDNode *YC = dyn_cast<ConstantSDNode>(Y))
if (YC->isNullValue() &&
(isAllOnes(Op1) == (CondCode == X86::COND_NE))) {
SDVTList VTs = DAG.getVTList(CmpOp0.getValueType(), MVT::i32);
SDValue Neg = DAG.getNode(X86ISD::SUB, DL, VTs,
DAG.getConstant(0, CmpOp0.getValueType()),
CmpOp0);
SDValue Res = DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
DAG.getConstant(X86::COND_B, MVT::i8),
SDValue(Neg.getNode(), 1));
return Res;
}
Cmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32,
CmpOp0, DAG.getConstant(1, CmpOp0.getValueType()));
Cmp = ConvertCmpIfNecessary(Cmp, DAG);
SDValue Res = DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
DAG.getConstant(X86::COND_B, MVT::i8), Cmp);
if (isAllOnes(Op1) != (CondCode == X86::COND_E))
Res = DAG.getNOT(DL, Res, Res.getValueType());
ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(Op2);
if (!N2C || !N2C->isNullValue())
Res = DAG.getNode(ISD::OR, DL, Res.getValueType(), Res, Y);
return Res;
}
}
if (Cond.getOpcode() == ISD::AND &&
Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) {
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Cond.getOperand(1));
if (C && C->getAPIntValue() == 1)
Cond = Cond.getOperand(0);
}
unsigned CondOpcode = Cond.getOpcode();
if (CondOpcode == X86ISD::SETCC ||
CondOpcode == X86ISD::SETCC_CARRY) {
CC = Cond.getOperand(0);
SDValue Cmp = Cond.getOperand(1);
unsigned Opc = Cmp.getOpcode();
MVT VT = Op.getSimpleValueType();
bool IllegalFPCMov = false;
if (VT.isFloatingPoint() && !VT.isVector() &&
!isScalarFPTypeInSSEReg(VT)) IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSExtValue());
if ((isX86LogicalCmp(Cmp) && !IllegalFPCMov) ||
Opc == X86ISD::BT) { Cond = Cmp;
addTest = false;
}
} else if (CondOpcode == ISD::USUBO || CondOpcode == ISD::SSUBO ||
CondOpcode == ISD::UADDO || CondOpcode == ISD::SADDO ||
((CondOpcode == ISD::UMULO || CondOpcode == ISD::SMULO) &&
Cond.getOperand(0).getValueType() != MVT::i8)) {
SDValue LHS = Cond.getOperand(0);
SDValue RHS = Cond.getOperand(1);
unsigned X86Opcode;
unsigned X86Cond;
SDVTList VTs;
switch (CondOpcode) {
case ISD::UADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_B; break;
case ISD::SADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_O; break;
case ISD::USUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_B; break;
case ISD::SSUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_O; break;
case ISD::UMULO: X86Opcode = X86ISD::UMUL; X86Cond = X86::COND_O; break;
case ISD::SMULO: X86Opcode = X86ISD::SMUL; X86Cond = X86::COND_O; break;
default: llvm_unreachable("unexpected overflowing operator");
}
if (CondOpcode == ISD::UMULO)
VTs = DAG.getVTList(LHS.getValueType(), LHS.getValueType(),
MVT::i32);
else
VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
SDValue X86Op = DAG.getNode(X86Opcode, DL, VTs, LHS, RHS);
if (CondOpcode == ISD::UMULO)
Cond = X86Op.getValue(2);
else
Cond = X86Op.getValue(1);
CC = DAG.getConstant(X86Cond, MVT::i8);
addTest = false;
}
if (addTest) {
if (isTruncWithZeroHighBitsInput(Cond, DAG))
Cond = Cond.getOperand(0);
if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, DL, DAG);
if (NewSetCC.getNode()) {
CC = NewSetCC.getOperand(0);
Cond = NewSetCC.getOperand(1);
addTest = false;
}
}
}
if (addTest) {
CC = DAG.getConstant(X86::COND_NE, MVT::i8);
Cond = EmitTest(Cond, X86::COND_NE, DL, DAG);
}
if (Cond.getOpcode() == X86ISD::SUB) {
Cond = ConvertCmpIfNecessary(Cond, DAG);
unsigned CondCode = cast<ConstantSDNode>(CC)->getZExtValue();
if ((CondCode == X86::COND_AE || CondCode == X86::COND_B) &&
(isAllOnes(Op1) || isAllOnes(Op2)) && (isZero(Op1) || isZero(Op2))) {
SDValue Res = DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
DAG.getConstant(X86::COND_B, MVT::i8), Cond);
if (isAllOnes(Op1) != (CondCode == X86::COND_B))
return DAG.getNOT(DL, Res, Res.getValueType());
return Res;
}
}
if (Op.getValueType() == MVT::i8 &&
Op1.getOpcode() == ISD::TRUNCATE && Op2.getOpcode() == ISD::TRUNCATE) {
SDValue T1 = Op1.getOperand(0), T2 = Op2.getOperand(0);
if (T1.getValueType() == T2.getValueType() &&
T1.getOpcode() != ISD::CopyFromReg && T2.getOpcode()!=ISD::CopyFromReg){
SDVTList VTs = DAG.getVTList(T1.getValueType(), MVT::Glue);
SDValue Cmov = DAG.getNode(X86ISD::CMOV, DL, VTs, T2, T1, CC, Cond);
return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Cmov);
}
}
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
SDValue Ops[] = { Op2, Op1, CC, Cond };
return DAG.getNode(X86ISD::CMOV, DL, VTs, Ops);
}
static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
MVT VT = Op->getSimpleValueType(0);
SDValue In = Op->getOperand(0);
MVT InVT = In.getSimpleValueType();
MVT VTElt = VT.getVectorElementType();
MVT InVTElt = InVT.getVectorElementType();
SDLoc dl(Op);
if ((InVTElt == MVT::i1) &&
(((Subtarget->hasBWI() && Subtarget->hasVLX() &&
VT.getSizeInBits() <= 256 && VTElt.getSizeInBits() <= 16)) ||
((Subtarget->hasBWI() && VT.is512BitVector() &&
VTElt.getSizeInBits() <= 16)) ||
((Subtarget->hasDQI() && Subtarget->hasVLX() &&
VT.getSizeInBits() <= 256 && VTElt.getSizeInBits() >= 32)) ||
((Subtarget->hasDQI() && VT.is512BitVector() &&
VTElt.getSizeInBits() >= 32))))
return DAG.getNode(X86ISD::VSEXT, dl, VT, In);
unsigned int NumElts = VT.getVectorNumElements();
if (NumElts != 8 && NumElts != 16)
return SDValue();
if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1) {
if (In.getOpcode() == X86ISD::VSEXT || In.getOpcode() == X86ISD::VZEXT)
return DAG.getNode(In.getOpcode(), dl, VT, In.getOperand(0));
return DAG.getNode(X86ISD::VSEXT, dl, VT, In);
}
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
assert (InVT.getVectorElementType() == MVT::i1 && "Unexpected vector type");
MVT ExtVT = (NumElts == 8) ? MVT::v8i64 : MVT::v16i32;
Constant *C = ConstantInt::get(*DAG.getContext(),
APInt::getAllOnesValue(ExtVT.getScalarType().getSizeInBits()));
SDValue CP = DAG.getConstantPool(C, TLI.getPointerTy());
unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
SDValue Ld = DAG.getLoad(ExtVT.getScalarType(), dl, DAG.getEntryNode(), CP,
MachinePointerInfo::getConstantPool(),
false, false, false, Alignment);
SDValue Brcst = DAG.getNode(X86ISD::VBROADCASTM, dl, ExtVT, In, Ld);
if (VT.is512BitVector())
return Brcst;
return DAG.getNode(X86ISD::VTRUNC, dl, VT, Brcst);
}
static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
MVT VT = Op->getSimpleValueType(0);
SDValue In = Op->getOperand(0);
MVT InVT = In.getSimpleValueType();
SDLoc dl(Op);
if (VT.is512BitVector() || InVT.getVectorElementType() == MVT::i1)
return LowerSIGN_EXTEND_AVX512(Op, Subtarget, DAG);
if ((VT != MVT::v4i64 || InVT != MVT::v4i32) &&
(VT != MVT::v8i32 || InVT != MVT::v8i16) &&
(VT != MVT::v16i16 || InVT != MVT::v16i8))
return SDValue();
if (Subtarget->hasInt256())
return DAG.getNode(X86ISD::VSEXT, dl, VT, In);
unsigned NumElems = InVT.getVectorNumElements();
SDValue Undef = DAG.getUNDEF(InVT);
SmallVector<int,8> ShufMask1(NumElems, -1);
for (unsigned i = 0; i != NumElems/2; ++i)
ShufMask1[i] = i;
SDValue OpLo = DAG.getVectorShuffle(InVT, dl, In, Undef, &ShufMask1[0]);
SmallVector<int,8> ShufMask2(NumElems, -1);
for (unsigned i = 0; i != NumElems/2; ++i)
ShufMask2[i] = i + NumElems/2;
SDValue OpHi = DAG.getVectorShuffle(InVT, dl, In, Undef, &ShufMask2[0]);
MVT HalfVT = MVT::getVectorVT(VT.getScalarType(),
VT.getVectorNumElements()/2);
OpLo = DAG.getNode(X86ISD::VSEXT, dl, HalfVT, OpLo);
OpHi = DAG.getNode(X86ISD::VSEXT, dl, HalfVT, OpHi);
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
}
static SDValue LowerExtendedLoad(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
MVT RegVT = Op.getSimpleValueType();
assert(RegVT.isVector() && "We only custom lower vector sext loads.");
assert(RegVT.isInteger() &&
"We only custom lower integer vector sext loads.");
assert(Subtarget->hasSSE2() && "We only custom lower sext loads with SSE2.");
LoadSDNode *Ld = cast<LoadSDNode>(Op.getNode());
SDLoc dl(Ld);
EVT MemVT = Ld->getMemoryVT();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
unsigned RegSz = RegVT.getSizeInBits();
ISD::LoadExtType Ext = Ld->getExtensionType();
assert((Ext == ISD::EXTLOAD || Ext == ISD::SEXTLOAD)
&& "Only anyext and sext are currently implemented.");
assert(MemVT != RegVT && "Cannot extend to the same type");
assert(MemVT.isVector() && "Must load a vector from memory");
unsigned NumElems = RegVT.getVectorNumElements();
unsigned MemSz = MemVT.getSizeInBits();
assert(RegSz > MemSz && "Register size must be greater than the mem size");
if (Ext == ISD::SEXTLOAD && RegSz == 256 && !Subtarget->hasInt256()) {
SDValue Load;
if (MemSz == 128) {
assert(TLI.isTypeLegal(MemVT) && "If the memory type is a 128-bit type, "
"it must be a legal 128-bit vector "
"type!");
Load = DAG.getLoad(MemVT, dl, Ld->getChain(), Ld->getBasePtr(),
Ld->getPointerInfo(), Ld->isVolatile(), Ld->isNonTemporal(),
Ld->isInvariant(), Ld->getAlignment());
} else {
assert(MemSz < 128 &&
"Can't extend a type wider than 128 bits to a 256 bit vector!");
EVT HalfEltVT =
EVT::getIntegerVT(*DAG.getContext(), RegVT.getScalarSizeInBits() / 2);
EVT HalfVecVT = EVT::getVectorVT(*DAG.getContext(), HalfEltVT, NumElems);
Load =
DAG.getExtLoad(Ext, dl, HalfVecVT, Ld->getChain(), Ld->getBasePtr(),
Ld->getPointerInfo(), MemVT, Ld->isVolatile(),
Ld->isNonTemporal(), Ld->isInvariant(),
Ld->getAlignment());
}
assert(Load->getNumValues() == 2 && "Loads must carry a chain!");
DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Load.getValue(1));
return DAG.getSExtOrTrunc(Load, dl, RegVT);
}
assert(isPowerOf2_32(RegSz * MemSz * NumElems) &&
"Non-power-of-two elements are not custom lowered!");
MVT SclrLoadTy = MVT::i8;
for (MVT Tp : MVT::integer_valuetypes()) {
if (TLI.isTypeLegal(Tp) && ((MemSz % Tp.getSizeInBits()) == 0)) {
SclrLoadTy = Tp;
}
}
if (TLI.isTypeLegal(MVT::f64) && SclrLoadTy.getSizeInBits() < 64 &&
(64 <= MemSz))
SclrLoadTy = MVT::f64;
unsigned NumLoads = MemSz / SclrLoadTy.getSizeInBits();
assert((Ext != ISD::SEXTLOAD || NumLoads == 1) &&
"Can only lower sext loads with a single scalar load!");
unsigned loadRegZize = RegSz;
if (Ext == ISD::SEXTLOAD && RegSz == 256)
loadRegZize /= 2;
EVT LoadUnitVecVT = EVT::getVectorVT(
*DAG.getContext(), SclrLoadTy, loadRegZize / SclrLoadTy.getSizeInBits());
EVT WideVecVT =
EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
loadRegZize / MemVT.getScalarType().getSizeInBits());
assert(WideVecVT.getSizeInBits() == LoadUnitVecVT.getSizeInBits() &&
"Invalid vector type");
assert(TLI.isTypeLegal(WideVecVT) &&
"We only lower types that form legal widened vector types");
SmallVector<SDValue, 8> Chains;
SDValue Ptr = Ld->getBasePtr();
SDValue Increment =
DAG.getConstant(SclrLoadTy.getSizeInBits() / 8, TLI.getPointerTy());
SDValue Res = DAG.getUNDEF(LoadUnitVecVT);
for (unsigned i = 0; i < NumLoads; ++i) {
SDValue ScalarLoad =
DAG.getLoad(SclrLoadTy, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(),
Ld->isVolatile(), Ld->isNonTemporal(), Ld->isInvariant(),
Ld->getAlignment());
Chains.push_back(ScalarLoad.getValue(1));
if (i == 0)
Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoadUnitVecVT, ScalarLoad);
else
Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, LoadUnitVecVT, Res,
ScalarLoad, DAG.getIntPtrConstant(i));
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
}
SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
SDValue SlicedVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Res);
unsigned SizeRatio = RegSz / MemSz;
if (Ext == ISD::SEXTLOAD) {
if (Subtarget->hasSSE41()) {
SDValue Sext = DAG.getNode(X86ISD::VSEXT, dl, RegVT, SlicedVec);
DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), TF);
return Sext;
}
assert(TLI.isOperationLegalOrCustom(ISD::SRA, RegVT) &&
"We can't implement a sext load without an arithmetic right shift!");
SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
for (unsigned i = 0; i != NumElems; ++i)
ShuffleVec[i * SizeRatio + SizeRatio - 1] = i;
SDValue Shuff = DAG.getVectorShuffle(
WideVecVT, dl, SlicedVec, DAG.getUNDEF(WideVecVT), &ShuffleVec[0]);
Shuff = DAG.getNode(ISD::BITCAST, dl, RegVT, Shuff);
unsigned Amt = RegVT.getVectorElementType().getSizeInBits() -
MemVT.getVectorElementType().getSizeInBits();
Shuff =
DAG.getNode(ISD::SRA, dl, RegVT, Shuff, DAG.getConstant(Amt, RegVT));
DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), TF);
return Shuff;
}
SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
for (unsigned i = 0; i != NumElems; ++i)
ShuffleVec[i * SizeRatio] = i;
SDValue Shuff = DAG.getVectorShuffle(WideVecVT, dl, SlicedVec,
DAG.getUNDEF(WideVecVT), &ShuffleVec[0]);
Shuff = DAG.getNode(ISD::BITCAST, dl, RegVT, Shuff);
DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), TF);
return Shuff;
}
static bool isAndOrOfSetCCs(SDValue Op, unsigned &Opc) {
Opc = Op.getOpcode();
if (Opc != ISD::OR && Opc != ISD::AND)
return false;
return (Op.getOperand(0).getOpcode() == X86ISD::SETCC &&
Op.getOperand(0).hasOneUse() &&
Op.getOperand(1).getOpcode() == X86ISD::SETCC &&
Op.getOperand(1).hasOneUse());
}
static bool isXor1OfSetCC(SDValue Op) {
if (Op.getOpcode() != ISD::XOR)
return false;
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
if (N1C && N1C->getAPIntValue() == 1) {
return Op.getOperand(0).getOpcode() == X86ISD::SETCC &&
Op.getOperand(0).hasOneUse();
}
return false;
}
SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
bool addTest = true;
SDValue Chain = Op.getOperand(0);
SDValue Cond = Op.getOperand(1);
SDValue Dest = Op.getOperand(2);
SDLoc dl(Op);
SDValue CC;
bool Inverted = false;
if (Cond.getOpcode() == ISD::SETCC) {
if (cast<CondCodeSDNode>(Cond.getOperand(2))->get() == ISD::SETEQ &&
isa<ConstantSDNode>(Cond.getOperand(1)) &&
cast<ConstantSDNode>(Cond.getOperand(1))->isNullValue() &&
Cond.getOperand(0).getResNo() == 1 &&
(Cond.getOperand(0).getOpcode() == ISD::SADDO ||
Cond.getOperand(0).getOpcode() == ISD::UADDO ||
Cond.getOperand(0).getOpcode() == ISD::SSUBO ||
Cond.getOperand(0).getOpcode() == ISD::USUBO ||
Cond.getOperand(0).getOpcode() == ISD::SMULO ||
Cond.getOperand(0).getOpcode() == ISD::UMULO)) {
Inverted = true;
Cond = Cond.getOperand(0);
} else {
SDValue NewCond = LowerSETCC(Cond, DAG);
if (NewCond.getNode())
Cond = NewCond;
}
}
#if 0
else if (Cond.getOpcode() == X86ISD::ADD ||
Cond.getOpcode() == X86ISD::SUB ||
Cond.getOpcode() == X86ISD::SMUL ||
Cond.getOpcode() == X86ISD::UMUL)
Cond = LowerXALUO(Cond, DAG);
#endif
if (Cond.getOpcode() == ISD::AND &&
Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) {
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Cond.getOperand(1));
if (C && C->getAPIntValue() == 1)
Cond = Cond.getOperand(0);
}
unsigned CondOpcode = Cond.getOpcode();
if (CondOpcode == X86ISD::SETCC ||
CondOpcode == X86ISD::SETCC_CARRY) {
CC = Cond.getOperand(0);
SDValue Cmp = Cond.getOperand(1);
unsigned Opc = Cmp.getOpcode();
if (isX86LogicalCmp(Cmp) || Opc == X86ISD::BT) {
Cond = Cmp;
addTest = false;
} else {
switch (cast<ConstantSDNode>(CC)->getZExtValue()) {
default: break;
case X86::COND_O:
case X86::COND_B:
Cond = Cond.getNode()->getOperand(1);
addTest = false;
break;
}
}
}
CondOpcode = Cond.getOpcode();
if (CondOpcode == ISD::UADDO || CondOpcode == ISD::SADDO ||
CondOpcode == ISD::USUBO || CondOpcode == ISD::SSUBO ||
((CondOpcode == ISD::UMULO || CondOpcode == ISD::SMULO) &&
Cond.getOperand(0).getValueType() != MVT::i8)) {
SDValue LHS = Cond.getOperand(0);
SDValue RHS = Cond.getOperand(1);
unsigned X86Opcode;
unsigned X86Cond;
SDVTList VTs;
switch (CondOpcode) {
case ISD::UADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_B; break;
case ISD::SADDO:
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS))
if (C->isOne()) {
X86Opcode = X86ISD::INC; X86Cond = X86::COND_O;
break;
}
X86Opcode = X86ISD::ADD; X86Cond = X86::COND_O; break;
case ISD::USUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_B; break;
case ISD::SSUBO:
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS))
if (C->isOne()) {
X86Opcode = X86ISD::DEC; X86Cond = X86::COND_O;
break;
}
X86Opcode = X86ISD::SUB; X86Cond = X86::COND_O; break;
case ISD::UMULO: X86Opcode = X86ISD::UMUL; X86Cond = X86::COND_O; break;
case ISD::SMULO: X86Opcode = X86ISD::SMUL; X86Cond = X86::COND_O; break;
default: llvm_unreachable("unexpected overflowing operator");
}
if (Inverted)
X86Cond = X86::GetOppositeBranchCondition((X86::CondCode)X86Cond);
if (CondOpcode == ISD::UMULO)
VTs = DAG.getVTList(LHS.getValueType(), LHS.getValueType(),
MVT::i32);
else
VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
SDValue X86Op = DAG.getNode(X86Opcode, dl, VTs, LHS, RHS);
if (CondOpcode == ISD::UMULO)
Cond = X86Op.getValue(2);
else
Cond = X86Op.getValue(1);
CC = DAG.getConstant(X86Cond, MVT::i8);
addTest = false;
} else {
unsigned CondOpc;
if (Cond.hasOneUse() && isAndOrOfSetCCs(Cond, CondOpc)) {
SDValue Cmp = Cond.getOperand(0).getOperand(1);
if (CondOpc == ISD::OR) {
if (Cmp == Cond.getOperand(1).getOperand(1) &&
isX86LogicalCmp(Cmp)) {
CC = Cond.getOperand(0).getOperand(0);
Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
Chain, Dest, CC, Cmp);
CC = Cond.getOperand(1).getOperand(0);
Cond = Cmp;
addTest = false;
}
} else { if (Cmp == Cond.getOperand(1).getOperand(1) &&
isX86LogicalCmp(Cmp) &&
Op.getNode()->hasOneUse()) {
X86::CondCode CCode =
(X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0);
CCode = X86::GetOppositeBranchCondition(CCode);
CC = DAG.getConstant(CCode, MVT::i8);
SDNode *User = *Op.getNode()->use_begin();
if (User->getOpcode() == ISD::BR) {
SDValue FalseBB = User->getOperand(1);
SDNode *NewBR =
DAG.UpdateNodeOperands(User, User->getOperand(0), Dest);
assert(NewBR == User);
(void)NewBR;
Dest = FalseBB;
Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
Chain, Dest, CC, Cmp);
X86::CondCode CCode =
(X86::CondCode)Cond.getOperand(1).getConstantOperandVal(0);
CCode = X86::GetOppositeBranchCondition(CCode);
CC = DAG.getConstant(CCode, MVT::i8);
Cond = Cmp;
addTest = false;
}
}
}
} else if (Cond.hasOneUse() && isXor1OfSetCC(Cond)) {
X86::CondCode CCode =
(X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0);
CCode = X86::GetOppositeBranchCondition(CCode);
CC = DAG.getConstant(CCode, MVT::i8);
Cond = Cond.getOperand(0).getOperand(1);
addTest = false;
} else if (Cond.getOpcode() == ISD::SETCC &&
cast<CondCodeSDNode>(Cond.getOperand(2))->get() == ISD::SETOEQ) {
if (Op.getNode()->hasOneUse()) {
SDNode *User = *Op.getNode()->use_begin();
if (User->getOpcode() == ISD::BR) {
SDValue FalseBB = User->getOperand(1);
SDNode *NewBR =
DAG.UpdateNodeOperands(User, User->getOperand(0), Dest);
assert(NewBR == User);
(void)NewBR;
Dest = FalseBB;
SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32,
Cond.getOperand(0), Cond.getOperand(1));
Cmp = ConvertCmpIfNecessary(Cmp, DAG);
CC = DAG.getConstant(X86::COND_NE, MVT::i8);
Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
Chain, Dest, CC, Cmp);
CC = DAG.getConstant(X86::COND_P, MVT::i8);
Cond = Cmp;
addTest = false;
}
}
} else if (Cond.getOpcode() == ISD::SETCC &&
cast<CondCodeSDNode>(Cond.getOperand(2))->get() == ISD::SETUNE) {
if (Op.getNode()->hasOneUse()) {
SDNode *User = *Op.getNode()->use_begin();
if (User->getOpcode() == ISD::BR) {
SDValue FalseBB = User->getOperand(1);
SDNode *NewBR =
DAG.UpdateNodeOperands(User, User->getOperand(0), Dest);
assert(NewBR == User);
(void)NewBR;
SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32,
Cond.getOperand(0), Cond.getOperand(1));
Cmp = ConvertCmpIfNecessary(Cmp, DAG);
CC = DAG.getConstant(X86::COND_NE, MVT::i8);
Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
Chain, Dest, CC, Cmp);
CC = DAG.getConstant(X86::COND_NP, MVT::i8);
Cond = Cmp;
addTest = false;
Dest = FalseBB;
}
}
}
}
if (addTest) {
if (isTruncWithZeroHighBitsInput(Cond, DAG))
Cond = Cond.getOperand(0);
if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, dl, DAG);
if (NewSetCC.getNode()) {
CC = NewSetCC.getOperand(0);
Cond = NewSetCC.getOperand(1);
addTest = false;
}
}
}
if (addTest) {
X86::CondCode X86Cond = Inverted ? X86::COND_E : X86::COND_NE;
CC = DAG.getConstant(X86Cond, MVT::i8);
Cond = EmitTest(Cond, X86Cond, dl, DAG);
}
Cond = ConvertCmpIfNecessary(Cond, DAG);
return DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
Chain, Dest, CC, Cond);
}
SDValue
X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
bool SplitStack = MF.shouldSplitStack();
bool Lower = (Subtarget->isOSWindows() && !Subtarget->isTargetMachO()) ||
SplitStack;
SDLoc dl(Op);
if (!Lower) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDNode* Node = Op.getNode();
unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore();
assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and"
" not tell us which reg is the stack pointer!");
EVT VT = Node->getValueType(0);
SDValue Tmp1 = SDValue(Node, 0);
SDValue Tmp2 = SDValue(Node, 1);
SDValue Tmp3 = Node->getOperand(2);
SDValue Chain = Tmp1.getOperand(0);
Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true),
SDLoc(Node));
SDValue Size = Tmp2.getOperand(1);
SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
Chain = SP.getValue(1);
unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue();
const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
unsigned StackAlign = TFI.getStackAlignment();
Tmp1 = DAG.getNode(ISD::SUB, dl, VT, SP, Size); if (Align > StackAlign)
Tmp1 = DAG.getNode(ISD::AND, dl, VT, Tmp1,
DAG.getConstant(-(uint64_t)Align, VT));
Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1);
Tmp2 = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, true),
DAG.getIntPtrConstant(0, true), SDValue(),
SDLoc(Node));
SDValue Ops[2] = { Tmp1, Tmp2 };
return DAG.getMergeValues(Ops, dl);
}
SDValue Chain = Op.getOperand(0);
SDValue Size = Op.getOperand(1);
unsigned Align = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
EVT VT = Op.getNode()->getValueType(0);
bool Is64Bit = Subtarget->is64Bit();
EVT SPTy = getPointerTy();
if (SplitStack) {
MachineRegisterInfo &MRI = MF.getRegInfo();
if (Is64Bit) {
const Function *F = MF.getFunction();
for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
I != E; ++I)
if (I->hasNestAttr())
report_fatal_error("Cannot use segmented stacks with functions that "
"have nested arguments.");
}
const TargetRegisterClass *AddrRegClass =
getRegClassFor(getPointerTy());
unsigned Vreg = MRI.createVirtualRegister(AddrRegClass);
Chain = DAG.getCopyToReg(Chain, dl, Vreg, Size);
SDValue Value = DAG.getNode(X86ISD::SEG_ALLOCA, dl, SPTy, Chain,
DAG.getRegister(Vreg, SPTy));
SDValue Ops1[2] = { Value, Chain };
return DAG.getMergeValues(Ops1, dl);
} else {
SDValue Flag;
const unsigned Reg = (Subtarget->isTarget64BitLP64() ? X86::RAX : X86::EAX);
Chain = DAG.getCopyToReg(Chain, dl, Reg, Size, Flag);
Flag = Chain.getValue(1);
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, Flag);
const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
unsigned SPReg = RegInfo->getStackRegister();
SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, SPTy);
Chain = SP.getValue(1);
if (Align) {
SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
DAG.getConstant(-(uint64_t)Align, VT));
Chain = DAG.getCopyToReg(Chain, dl, SPReg, SP);
}
SDValue Ops1[2] = { SP, Chain };
return DAG.getMergeValues(Ops1, dl);
}
}
SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
SDLoc DL(Op);
if (!Subtarget->is64Bit() || Subtarget->isTargetWin64()) {
SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
getPointerTy());
return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
MachinePointerInfo(SV), false, false, 0);
}
SmallVector<SDValue, 8> MemOps;
SDValue FIN = Op.getOperand(1);
SDValue Store = DAG.getStore(Op.getOperand(0), DL,
DAG.getConstant(FuncInfo->getVarArgsGPOffset(),
MVT::i32),
FIN, MachinePointerInfo(SV), false, false, 0);
MemOps.push_back(Store);
FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(),
FIN, DAG.getIntPtrConstant(4));
Store = DAG.getStore(Op.getOperand(0), DL,
DAG.getConstant(FuncInfo->getVarArgsFPOffset(),
MVT::i32),
FIN, MachinePointerInfo(SV, 4), false, false, 0);
MemOps.push_back(Store);
FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(),
FIN, DAG.getIntPtrConstant(4));
SDValue OVFIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
getPointerTy());
Store = DAG.getStore(Op.getOperand(0), DL, OVFIN, FIN,
MachinePointerInfo(SV, 8),
false, false, 0);
MemOps.push_back(Store);
FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(),
FIN, DAG.getIntPtrConstant(8));
SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
getPointerTy());
Store = DAG.getStore(Op.getOperand(0), DL, RSFIN, FIN,
MachinePointerInfo(SV, 16), false, false, 0);
MemOps.push_back(Store);
return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
}
SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
assert(Subtarget->is64Bit() &&
"LowerVAARG only handles 64-bit va_arg!");
assert((Subtarget->isTargetLinux() ||
Subtarget->isTargetDarwin()) &&
"Unhandled target in LowerVAARG");
assert(Op.getNode()->getNumOperands() == 4);
SDValue Chain = Op.getOperand(0);
SDValue SrcPtr = Op.getOperand(1);
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
unsigned Align = Op.getConstantOperandVal(3);
SDLoc dl(Op);
EVT ArgVT = Op.getNode()->getValueType(0);
Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
uint32_t ArgSize = getDataLayout()->getTypeAllocSize(ArgTy);
uint8_t ArgMode;
if (ArgVT == MVT::f80) {
llvm_unreachable("va_arg for f80 not yet implemented");
} else if (ArgVT.isFloatingPoint() && ArgSize <= 16 ) {
ArgMode = 2; } else if (ArgVT.isInteger() && ArgSize <= 32 ) {
ArgMode = 1; } else {
llvm_unreachable("Unhandled argument type in LowerVAARG");
}
if (ArgMode == 2) {
assert(!DAG.getTarget().Options.UseSoftFloat &&
!(DAG.getMachineFunction().getFunction()->hasFnAttribute(
Attribute::NoImplicitFloat)) &&
Subtarget->hasSSE1());
}
SmallVector<SDValue, 11> InstOps;
InstOps.push_back(Chain);
InstOps.push_back(SrcPtr);
InstOps.push_back(DAG.getConstant(ArgSize, MVT::i32));
InstOps.push_back(DAG.getConstant(ArgMode, MVT::i8));
InstOps.push_back(DAG.getConstant(Align, MVT::i32));
SDVTList VTs = DAG.getVTList(getPointerTy(), MVT::Other);
SDValue VAARG = DAG.getMemIntrinsicNode(X86ISD::VAARG_64, dl,
VTs, InstOps, MVT::i64,
MachinePointerInfo(SV),
0,
false,
true,
true);
Chain = VAARG.getValue(1);
return DAG.getLoad(ArgVT, dl,
Chain,
VAARG,
MachinePointerInfo(),
false, false, false, 0);
}
static SDValue LowerVACOPY(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
assert(Subtarget->is64Bit() && "This code only handles 64-bit va_copy!");
SDValue Chain = Op.getOperand(0);
SDValue DstPtr = Op.getOperand(1);
SDValue SrcPtr = Op.getOperand(2);
const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
SDLoc DL(Op);
return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr,
DAG.getIntPtrConstant(24), 8, false,
false,
MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
}
static SDValue getTargetVShiftByConstNode(unsigned Opc, SDLoc dl, MVT VT,
SDValue SrcOp, uint64_t ShiftAmt,
SelectionDAG &DAG) {
MVT ElementType = VT.getVectorElementType();
if (ShiftAmt == 0)
return SrcOp;
if (ShiftAmt >= ElementType.getSizeInBits()) {
if (Opc == X86ISD::VSRAI)
ShiftAmt = ElementType.getSizeInBits() - 1;
else
return DAG.getConstant(0, VT);
}
assert((Opc == X86ISD::VSHLI || Opc == X86ISD::VSRLI || Opc == X86ISD::VSRAI)
&& "Unknown target vector shift-by-constant node");
if (VT == SrcOp.getSimpleValueType() &&
ISD::isBuildVectorOfConstantSDNodes(SrcOp.getNode())) {
SmallVector<SDValue, 8> Elts;
unsigned NumElts = SrcOp->getNumOperands();
ConstantSDNode *ND;
switch(Opc) {
default: llvm_unreachable(nullptr);
case X86ISD::VSHLI:
for (unsigned i=0; i!=NumElts; ++i) {
SDValue CurrentOp = SrcOp->getOperand(i);
if (CurrentOp->getOpcode() == ISD::UNDEF) {
Elts.push_back(CurrentOp);
continue;
}
ND = cast<ConstantSDNode>(CurrentOp);
const APInt &C = ND->getAPIntValue();
Elts.push_back(DAG.getConstant(C.shl(ShiftAmt), ElementType));
}
break;
case X86ISD::VSRLI:
for (unsigned i=0; i!=NumElts; ++i) {
SDValue CurrentOp = SrcOp->getOperand(i);
if (CurrentOp->getOpcode() == ISD::UNDEF) {
Elts.push_back(CurrentOp);
continue;
}
ND = cast<ConstantSDNode>(CurrentOp);
const APInt &C = ND->getAPIntValue();
Elts.push_back(DAG.getConstant(C.lshr(ShiftAmt), ElementType));
}
break;
case X86ISD::VSRAI:
for (unsigned i=0; i!=NumElts; ++i) {
SDValue CurrentOp = SrcOp->getOperand(i);
if (CurrentOp->getOpcode() == ISD::UNDEF) {
Elts.push_back(CurrentOp);
continue;
}
ND = cast<ConstantSDNode>(CurrentOp);
const APInt &C = ND->getAPIntValue();
Elts.push_back(DAG.getConstant(C.ashr(ShiftAmt), ElementType));
}
break;
}
return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Elts);
}
return DAG.getNode(Opc, dl, VT, SrcOp, DAG.getConstant(ShiftAmt, MVT::i8));
}
static SDValue getTargetVShiftNode(unsigned Opc, SDLoc dl, MVT VT,
SDValue SrcOp, SDValue ShAmt,
SelectionDAG &DAG) {
MVT SVT = ShAmt.getSimpleValueType();
assert((SVT == MVT::i32 || SVT == MVT::i64) && "Unexpected value type!");
if (ConstantSDNode *CShAmt = dyn_cast<ConstantSDNode>(ShAmt))
return getTargetVShiftByConstNode(Opc, dl, VT, SrcOp,
CShAmt->getZExtValue(), DAG);
switch (Opc) {
default: llvm_unreachable("Unknown target vector shift node");
case X86ISD::VSHLI: Opc = X86ISD::VSHL; break;
case X86ISD::VSRLI: Opc = X86ISD::VSRL; break;
case X86ISD::VSRAI: Opc = X86ISD::VSRA; break;
}
const X86Subtarget &Subtarget =
static_cast<const X86Subtarget &>(DAG.getSubtarget());
if (Subtarget.hasSSE41() && ShAmt.getOpcode() == ISD::ZERO_EXTEND &&
ShAmt.getOperand(0).getSimpleValueType() == MVT::i16) {
SDValue Op0 = ShAmt.getOperand(0);
Op0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(Op0), MVT::v8i16, Op0);
ShAmt = getShuffleVectorZeroOrUndef(Op0, 0, true, &Subtarget, DAG);
} else {
SmallVector<SDValue, 4> ShOps;
ShOps.push_back(ShAmt);
if (SVT == MVT::i32) {
ShOps.push_back(DAG.getConstant(0, SVT));
ShOps.push_back(DAG.getUNDEF(SVT));
}
ShOps.push_back(DAG.getUNDEF(SVT));
MVT BVT = SVT == MVT::i32 ? MVT::v4i32 : MVT::v2i64;
ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, BVT, ShOps);
}
MVT EltVT = VT.getVectorElementType();
EVT ShVT = MVT::getVectorVT(EltVT, 128/EltVT.getSizeInBits());
ShAmt = DAG.getNode(ISD::BITCAST, dl, ShVT, ShAmt);
return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt);
}
static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask,
SDValue PreservedSrc,
const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
EVT VT = Op.getValueType();
EVT MaskVT = EVT::getVectorVT(*DAG.getContext(),
MVT::i1, VT.getVectorNumElements());
EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
Mask.getValueType().getSizeInBits());
SDLoc dl(Op);
assert(MaskVT.isSimple() && "invalid mask type");
if (isAllOnes(Mask))
return Op;
SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
DAG.getNode(ISD::BITCAST, dl, BitcastVT, Mask),
DAG.getIntPtrConstant(0));
switch (Op.getOpcode()) {
default: break;
case X86ISD::PCMPEQM:
case X86ISD::PCMPGTM:
case X86ISD::CMPM:
case X86ISD::CMPMU:
return DAG.getNode(ISD::AND, dl, VT, Op, VMask);
}
if (PreservedSrc.getOpcode() == ISD::UNDEF)
PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl);
return DAG.getNode(ISD::VSELECT, dl, VT, VMask, Op, PreservedSrc);
}
static SDValue getScalarMaskingNode(SDValue Op, SDValue Mask,
SDValue PreservedSrc,
const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
if (isAllOnes(Mask))
return Op;
EVT VT = Op.getValueType();
SDLoc dl(Op);
SDValue IMask = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Mask);
if (PreservedSrc.getOpcode() == ISD::UNDEF)
PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl);
return DAG.getNode(X86ISD::SELECT, dl, VT, IMask, Op, PreservedSrc);
}
static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
SDLoc dl(Op);
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
EVT VT = Op.getValueType();
const IntrinsicData* IntrData = getIntrinsicWithoutChain(IntNo);
if (IntrData) {
switch(IntrData->Type) {
case INTR_TYPE_1OP:
return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1));
case INTR_TYPE_2OP:
return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1),
Op.getOperand(2));
case INTR_TYPE_3OP:
return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1),
Op.getOperand(2), Op.getOperand(3));
case INTR_TYPE_1OP_MASK_RM: {
SDValue Src = Op.getOperand(1);
SDValue Src0 = Op.getOperand(2);
SDValue Mask = Op.getOperand(3);
SDValue RoundingMode = Op.getOperand(4);
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src,
RoundingMode),
Mask, Src0, Subtarget, DAG);
}
case INTR_TYPE_SCALAR_MASK_RM: {
SDValue Src1 = Op.getOperand(1);
SDValue Src2 = Op.getOperand(2);
SDValue Src0 = Op.getOperand(3);
SDValue Mask = Op.getOperand(4);
SDValue RoundingMode = Op.getOperand(5);
return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2,
RoundingMode),
Mask, Src0, Subtarget, DAG);
}
case INTR_TYPE_2OP_MASK: {
SDValue Mask = Op.getOperand(4);
SDValue PassThru = Op.getOperand(3);
unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
if (IntrWithRoundingModeOpcode != 0) {
unsigned Round = cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue();
if (Round != X86::STATIC_ROUNDING::CUR_DIRECTION) {
return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2),
Op.getOperand(3), Op.getOperand(5)),
Mask, PassThru, Subtarget, DAG);
}
}
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
Op.getOperand(1),
Op.getOperand(2)),
Mask, PassThru, Subtarget, DAG);
}
case FMA_OP_MASK: {
SDValue Src1 = Op.getOperand(1);
SDValue Src2 = Op.getOperand(2);
SDValue Src3 = Op.getOperand(3);
SDValue Mask = Op.getOperand(4);
unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
if (IntrWithRoundingModeOpcode != 0) {
SDValue Rnd = Op.getOperand(5);
if (cast<ConstantSDNode>(Rnd)->getZExtValue() !=
X86::STATIC_ROUNDING::CUR_DIRECTION)
return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
dl, Op.getValueType(),
Src1, Src2, Src3, Rnd),
Mask, Src1, Subtarget, DAG);
}
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0,
dl, Op.getValueType(),
Src1, Src2, Src3),
Mask, Src1, Subtarget, DAG);
}
case CMP_MASK:
case CMP_MASK_CC: {
EVT VT = Op.getOperand(1).getValueType();
EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
VT.getVectorNumElements());
SDValue Mask = Op.getOperand((IntrData->Type == CMP_MASK_CC) ? 4 : 3);
EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
Mask.getValueType().getSizeInBits());
SDValue Cmp;
if (IntrData->Type == CMP_MASK_CC) {
Cmp = DAG.getNode(IntrData->Opc0, dl, MaskVT, Op.getOperand(1),
Op.getOperand(2), Op.getOperand(3));
} else {
assert(IntrData->Type == CMP_MASK && "Unexpected intrinsic type!");
Cmp = DAG.getNode(IntrData->Opc0, dl, MaskVT, Op.getOperand(1),
Op.getOperand(2));
}
SDValue CmpMask = getVectorMaskingNode(Cmp, Mask,
DAG.getTargetConstant(0, MaskVT),
Subtarget, DAG);
SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, BitcastVT,
DAG.getUNDEF(BitcastVT), CmpMask,
DAG.getIntPtrConstant(0));
return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
}
case COMI: { ISD::CondCode CC = (ISD::CondCode)IntrData->Opc1;
SDValue LHS = Op.getOperand(1);
SDValue RHS = Op.getOperand(2);
unsigned X86CC = TranslateX86CC(CC, true, LHS, RHS, DAG);
assert(X86CC != X86::COND_INVALID && "Unexpected illegal condition!");
SDValue Cond = DAG.getNode(IntrData->Opc0, dl, MVT::i32, LHS, RHS);
SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
DAG.getConstant(X86CC, MVT::i8), Cond);
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
}
case VSHIFT:
return getTargetVShiftNode(IntrData->Opc0, dl, Op.getSimpleValueType(),
Op.getOperand(1), Op.getOperand(2), DAG);
case VSHIFT_MASK:
return getVectorMaskingNode(getTargetVShiftNode(IntrData->Opc0, dl,
Op.getSimpleValueType(),
Op.getOperand(1),
Op.getOperand(2), DAG),
Op.getOperand(4), Op.getOperand(3), Subtarget,
DAG);
case COMPRESS_EXPAND_IN_REG: {
SDValue Mask = Op.getOperand(3);
SDValue DataToCompress = Op.getOperand(1);
SDValue PassThru = Op.getOperand(2);
if (isAllOnes(Mask)) return Op.getOperand(1);
EVT VT = Op.getValueType();
EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
VT.getVectorNumElements());
EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
Mask.getValueType().getSizeInBits());
SDLoc dl(Op);
SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
DAG.getNode(ISD::BITCAST, dl, BitcastVT, Mask),
DAG.getIntPtrConstant(0));
return DAG.getNode(IntrData->Opc0, dl, VT, VMask, DataToCompress,
PassThru);
}
case BLEND: {
SDValue Mask = Op.getOperand(3);
EVT VT = Op.getValueType();
EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
VT.getVectorNumElements());
EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
Mask.getValueType().getSizeInBits());
SDLoc dl(Op);
SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
DAG.getNode(ISD::BITCAST, dl, BitcastVT, Mask),
DAG.getIntPtrConstant(0));
return DAG.getNode(IntrData->Opc0, dl, VT, VMask, Op.getOperand(1),
Op.getOperand(2));
}
default:
break;
}
}
switch (IntNo) {
default: return SDValue();
case Intrinsic::x86_avx2_permd:
case Intrinsic::x86_avx2_permps:
return DAG.getNode(X86ISD::VPERMV, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(1));
case Intrinsic::x86_avx512_mask_valign_q_512:
case Intrinsic::x86_avx512_mask_valign_d_512:
return getVectorMaskingNode(DAG.getNode(X86ISD::VALIGN, dl,
Op.getValueType(), Op.getOperand(2),
Op.getOperand(1),
Op.getOperand(3)),
Op.getOperand(5), Op.getOperand(4),
Subtarget, DAG);
case Intrinsic::x86_sse41_ptestz:
case Intrinsic::x86_sse41_ptestc:
case Intrinsic::x86_sse41_ptestnzc:
case Intrinsic::x86_avx_ptestz_256:
case Intrinsic::x86_avx_ptestc_256:
case Intrinsic::x86_avx_ptestnzc_256:
case Intrinsic::x86_avx_vtestz_ps:
case Intrinsic::x86_avx_vtestc_ps:
case Intrinsic::x86_avx_vtestnzc_ps:
case Intrinsic::x86_avx_vtestz_pd:
case Intrinsic::x86_avx_vtestc_pd:
case Intrinsic::x86_avx_vtestnzc_pd:
case Intrinsic::x86_avx_vtestz_ps_256:
case Intrinsic::x86_avx_vtestc_ps_256:
case Intrinsic::x86_avx_vtestnzc_ps_256:
case Intrinsic::x86_avx_vtestz_pd_256:
case Intrinsic::x86_avx_vtestc_pd_256:
case Intrinsic::x86_avx_vtestnzc_pd_256: {
bool IsTestPacked = false;
unsigned X86CC;
switch (IntNo) {
default: llvm_unreachable("Bad fallthrough in Intrinsic lowering.");
case Intrinsic::x86_avx_vtestz_ps:
case Intrinsic::x86_avx_vtestz_pd:
case Intrinsic::x86_avx_vtestz_ps_256:
case Intrinsic::x86_avx_vtestz_pd_256:
IsTestPacked = true; case Intrinsic::x86_sse41_ptestz:
case Intrinsic::x86_avx_ptestz_256:
X86CC = X86::COND_E;
break;
case Intrinsic::x86_avx_vtestc_ps:
case Intrinsic::x86_avx_vtestc_pd:
case Intrinsic::x86_avx_vtestc_ps_256:
case Intrinsic::x86_avx_vtestc_pd_256:
IsTestPacked = true; case Intrinsic::x86_sse41_ptestc:
case Intrinsic::x86_avx_ptestc_256:
X86CC = X86::COND_B;
break;
case Intrinsic::x86_avx_vtestnzc_ps:
case Intrinsic::x86_avx_vtestnzc_pd:
case Intrinsic::x86_avx_vtestnzc_ps_256:
case Intrinsic::x86_avx_vtestnzc_pd_256:
IsTestPacked = true; case Intrinsic::x86_sse41_ptestnzc:
case Intrinsic::x86_avx_ptestnzc_256:
X86CC = X86::COND_A;
break;
}
SDValue LHS = Op.getOperand(1);
SDValue RHS = Op.getOperand(2);
unsigned TestOpc = IsTestPacked ? X86ISD::TESTP : X86ISD::PTEST;
SDValue Test = DAG.getNode(TestOpc, dl, MVT::i32, LHS, RHS);
SDValue CC = DAG.getConstant(X86CC, MVT::i8);
SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, CC, Test);
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
}
case Intrinsic::x86_avx512_kortestz_w:
case Intrinsic::x86_avx512_kortestc_w: {
unsigned X86CC = (IntNo == Intrinsic::x86_avx512_kortestz_w)? X86::COND_E: X86::COND_B;
SDValue LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i1, Op.getOperand(1));
SDValue RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i1, Op.getOperand(2));
SDValue CC = DAG.getConstant(X86CC, MVT::i8);
SDValue Test = DAG.getNode(X86ISD::KORTEST, dl, MVT::i32, LHS, RHS);
SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i1, CC, Test);
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
}
case Intrinsic::x86_sse42_pcmpistria128:
case Intrinsic::x86_sse42_pcmpestria128:
case Intrinsic::x86_sse42_pcmpistric128:
case Intrinsic::x86_sse42_pcmpestric128:
case Intrinsic::x86_sse42_pcmpistrio128:
case Intrinsic::x86_sse42_pcmpestrio128:
case Intrinsic::x86_sse42_pcmpistris128:
case Intrinsic::x86_sse42_pcmpestris128:
case Intrinsic::x86_sse42_pcmpistriz128:
case Intrinsic::x86_sse42_pcmpestriz128: {
unsigned Opcode;
unsigned X86CC;
switch (IntNo) {
default: llvm_unreachable("Impossible intrinsic"); case Intrinsic::x86_sse42_pcmpistria128:
Opcode = X86ISD::PCMPISTRI;
X86CC = X86::COND_A;
break;
case Intrinsic::x86_sse42_pcmpestria128:
Opcode = X86ISD::PCMPESTRI;
X86CC = X86::COND_A;
break;
case Intrinsic::x86_sse42_pcmpistric128:
Opcode = X86ISD::PCMPISTRI;
X86CC = X86::COND_B;
break;
case Intrinsic::x86_sse42_pcmpestric128:
Opcode = X86ISD::PCMPESTRI;
X86CC = X86::COND_B;
break;
case Intrinsic::x86_sse42_pcmpistrio128:
Opcode = X86ISD::PCMPISTRI;
X86CC = X86::COND_O;
break;
case Intrinsic::x86_sse42_pcmpestrio128:
Opcode = X86ISD::PCMPESTRI;
X86CC = X86::COND_O;
break;
case Intrinsic::x86_sse42_pcmpistris128:
Opcode = X86ISD::PCMPISTRI;
X86CC = X86::COND_S;
break;
case Intrinsic::x86_sse42_pcmpestris128:
Opcode = X86ISD::PCMPESTRI;
X86CC = X86::COND_S;
break;
case Intrinsic::x86_sse42_pcmpistriz128:
Opcode = X86ISD::PCMPISTRI;
X86CC = X86::COND_E;
break;
case Intrinsic::x86_sse42_pcmpestriz128:
Opcode = X86ISD::PCMPESTRI;
X86CC = X86::COND_E;
break;
}
SmallVector<SDValue, 5> NewOps(Op->op_begin()+1, Op->op_end());
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
SDValue PCMP = DAG.getNode(Opcode, dl, VTs, NewOps);
SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
DAG.getConstant(X86CC, MVT::i8),
SDValue(PCMP.getNode(), 1));
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
}
case Intrinsic::x86_sse42_pcmpistri128:
case Intrinsic::x86_sse42_pcmpestri128: {
unsigned Opcode;
if (IntNo == Intrinsic::x86_sse42_pcmpistri128)
Opcode = X86ISD::PCMPISTRI;
else
Opcode = X86ISD::PCMPESTRI;
SmallVector<SDValue, 5> NewOps(Op->op_begin()+1, Op->op_end());
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
return DAG.getNode(Opcode, dl, VTs, NewOps);
}
}
}
static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
SDValue Src, SDValue Mask, SDValue Base,
SDValue Index, SDValue ScaleOp, SDValue Chain,
const X86Subtarget * Subtarget) {
SDLoc dl(Op);
ConstantSDNode *C = dyn_cast<ConstantSDNode>(ScaleOp);
assert(C && "Invalid scale type");
SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), MVT::i8);
EVT MaskVT = MVT::getVectorVT(MVT::i1,
Index.getSimpleValueType().getVectorNumElements());
SDValue MaskInReg;
ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(Mask);
if (MaskC)
MaskInReg = DAG.getTargetConstant(MaskC->getSExtValue(), MaskVT);
else
MaskInReg = DAG.getNode(ISD::BITCAST, dl, MaskVT, Mask);
SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other);
SDValue Disp = DAG.getTargetConstant(0, MVT::i32);
SDValue Segment = DAG.getRegister(0, MVT::i32);
if (Src.getOpcode() == ISD::UNDEF)
Src = getZeroVector(Op.getValueType(), Subtarget, DAG, dl);
SDValue Ops[] = {Src, MaskInReg, Base, Scale, Index, Disp, Segment, Chain};
SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);
SDValue RetOps[] = { SDValue(Res, 0), SDValue(Res, 2) };
return DAG.getMergeValues(RetOps, dl);
}
static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
SDValue Src, SDValue Mask, SDValue Base,
SDValue Index, SDValue ScaleOp, SDValue Chain) {
SDLoc dl(Op);
ConstantSDNode *C = dyn_cast<ConstantSDNode>(ScaleOp);
assert(C && "Invalid scale type");
SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), MVT::i8);
SDValue Disp = DAG.getTargetConstant(0, MVT::i32);
SDValue Segment = DAG.getRegister(0, MVT::i32);
EVT MaskVT = MVT::getVectorVT(MVT::i1,
Index.getSimpleValueType().getVectorNumElements());
SDValue MaskInReg;
ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(Mask);
if (MaskC)
MaskInReg = DAG.getTargetConstant(MaskC->getSExtValue(), MaskVT);
else
MaskInReg = DAG.getNode(ISD::BITCAST, dl, MaskVT, Mask);
SDVTList VTs = DAG.getVTList(MaskVT, MVT::Other);
SDValue Ops[] = {Base, Scale, Index, Disp, Segment, MaskInReg, Src, Chain};
SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);
return SDValue(Res, 1);
}
static SDValue getPrefetchNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
SDValue Mask, SDValue Base, SDValue Index,
SDValue ScaleOp, SDValue Chain) {
SDLoc dl(Op);
ConstantSDNode *C = dyn_cast<ConstantSDNode>(ScaleOp);
assert(C && "Invalid scale type");
SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), MVT::i8);
SDValue Disp = DAG.getTargetConstant(0, MVT::i32);
SDValue Segment = DAG.getRegister(0, MVT::i32);
EVT MaskVT =
MVT::getVectorVT(MVT::i1, Index.getSimpleValueType().getVectorNumElements());
SDValue MaskInReg;
ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(Mask);
if (MaskC)
MaskInReg = DAG.getTargetConstant(MaskC->getSExtValue(), MaskVT);
else
MaskInReg = DAG.getNode(ISD::BITCAST, dl, MaskVT, Mask);
SDValue Ops[] = {MaskInReg, Base, Scale, Index, Disp, Segment, Chain};
SDNode *Res = DAG.getMachineNode(Opc, dl, MVT::Other, Ops);
return SDValue(Res, 0);
}
static void getReadPerformanceCounter(SDNode *N, SDLoc DL,
SelectionDAG &DAG, const X86Subtarget *Subtarget,
SmallVectorImpl<SDValue> &Results) {
assert(N->getNumOperands() == 3 && "Unexpected number of operands!");
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue LO, HI;
SDValue Chain = DAG.getCopyToReg(N->getOperand(0), DL, X86::ECX,
N->getOperand(2));
SDValue rd = DAG.getNode(X86ISD::RDPMC_DAG, DL, Tys, Chain);
if (Subtarget->is64Bit()) {
LO = DAG.getCopyFromReg(rd, DL, X86::RAX, MVT::i64, rd.getValue(1));
HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::RDX, MVT::i64,
LO.getValue(2));
} else {
LO = DAG.getCopyFromReg(rd, DL, X86::EAX, MVT::i32, rd.getValue(1));
HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::EDX, MVT::i32,
LO.getValue(2));
}
Chain = HI.getValue(1);
if (Subtarget->is64Bit()) {
SDValue Tmp = DAG.getNode(ISD::SHL, DL, MVT::i64, HI,
DAG.getConstant(32, MVT::i8));
Results.push_back(DAG.getNode(ISD::OR, DL, MVT::i64, LO, Tmp));
Results.push_back(Chain);
return;
}
SDValue Ops[] = { LO, HI };
SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops);
Results.push_back(Pair);
Results.push_back(Chain);
}
static void getReadTimeStampCounter(SDNode *N, SDLoc DL, unsigned Opcode,
SelectionDAG &DAG, const X86Subtarget *Subtarget,
SmallVectorImpl<SDValue> &Results) {
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue rd = DAG.getNode(Opcode, DL, Tys, N->getOperand(0));
SDValue LO, HI;
if (Subtarget->is64Bit()) {
LO = DAG.getCopyFromReg(rd, DL, X86::RAX, MVT::i64, rd.getValue(1));
HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::RDX, MVT::i64,
LO.getValue(2));
} else {
LO = DAG.getCopyFromReg(rd, DL, X86::EAX, MVT::i32, rd.getValue(1));
HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::EDX, MVT::i32,
LO.getValue(2));
}
SDValue Chain = HI.getValue(1);
if (Opcode == X86ISD::RDTSCP_DAG) {
assert(N->getNumOperands() == 3 && "Unexpected number of operands!");
SDValue ecx = DAG.getCopyFromReg(Chain, DL, X86::ECX, MVT::i32,
HI.getValue(2));
Chain = DAG.getStore(ecx.getValue(1), DL, ecx, N->getOperand(2),
MachinePointerInfo(), false, false, 0);
}
if (Subtarget->is64Bit()) {
SDValue Tmp = DAG.getNode(ISD::SHL, DL, MVT::i64, HI,
DAG.getConstant(32, MVT::i8));
Results.push_back(DAG.getNode(ISD::OR, DL, MVT::i64, LO, Tmp));
Results.push_back(Chain);
return;
}
SDValue Ops[] = { LO, HI };
SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops);
Results.push_back(Pair);
Results.push_back(Chain);
}
static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
SmallVector<SDValue, 2> Results;
SDLoc DL(Op);
getReadTimeStampCounter(Op.getNode(), DL, X86ISD::RDTSC_DAG, DAG, Subtarget,
Results);
return DAG.getMergeValues(Results, DL);
}
static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
const IntrinsicData* IntrData = getIntrinsicWithChain(IntNo);
if (!IntrData)
return SDValue();
SDLoc dl(Op);
switch(IntrData->Type) {
default:
llvm_unreachable("Unknown Intrinsic Type");
break;
case RDSEED:
case RDRAND: {
SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::Glue, MVT::Other);
SDValue Result = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(0));
SDValue Ops[] = { DAG.getZExtOrTrunc(Result, dl, Op->getValueType(1)),
DAG.getConstant(1, Op->getValueType(1)),
DAG.getConstant(X86::COND_B, MVT::i32),
SDValue(Result.getNode(), 1) };
SDValue isValid = DAG.getNode(X86ISD::CMOV, dl,
DAG.getVTList(Op->getValueType(1), MVT::Glue),
Ops);
return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result, isValid,
SDValue(Result.getNode(), 2));
}
case GATHER: {
SDValue Chain = Op.getOperand(0);
SDValue Src = Op.getOperand(2);
SDValue Base = Op.getOperand(3);
SDValue Index = Op.getOperand(4);
SDValue Mask = Op.getOperand(5);
SDValue Scale = Op.getOperand(6);
return getGatherNode(IntrData->Opc0, Op, DAG, Src, Mask, Base, Index, Scale, Chain,
Subtarget);
}
case SCATTER: {
SDValue Chain = Op.getOperand(0);
SDValue Base = Op.getOperand(2);
SDValue Mask = Op.getOperand(3);
SDValue Index = Op.getOperand(4);
SDValue Src = Op.getOperand(5);
SDValue Scale = Op.getOperand(6);
return getScatterNode(IntrData->Opc0, Op, DAG, Src, Mask, Base, Index, Scale, Chain);
}
case PREFETCH: {
SDValue Hint = Op.getOperand(6);
unsigned HintVal;
if (dyn_cast<ConstantSDNode> (Hint) == nullptr ||
(HintVal = dyn_cast<ConstantSDNode> (Hint)->getZExtValue()) > 1)
llvm_unreachable("Wrong prefetch hint in intrinsic: should be 0 or 1");
unsigned Opcode = (HintVal ? IntrData->Opc1 : IntrData->Opc0);
SDValue Chain = Op.getOperand(0);
SDValue Mask = Op.getOperand(2);
SDValue Index = Op.getOperand(3);
SDValue Base = Op.getOperand(4);
SDValue Scale = Op.getOperand(5);
return getPrefetchNode(Opcode, Op, DAG, Mask, Base, Index, Scale, Chain);
}
case RDTSC: {
SmallVector<SDValue, 2> Results;
getReadTimeStampCounter(Op.getNode(), dl, IntrData->Opc0, DAG, Subtarget, Results);
return DAG.getMergeValues(Results, dl);
}
case RDPMC: {
SmallVector<SDValue, 2> Results;
getReadPerformanceCounter(Op.getNode(), dl, DAG, Subtarget, Results);
return DAG.getMergeValues(Results, dl);
}
case XTEST: {
SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::Other);
SDValue InTrans = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(0));
SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
DAG.getConstant(X86::COND_NE, MVT::i8),
InTrans);
SDValue Ret = DAG.getNode(ISD::ZERO_EXTEND, dl, Op->getValueType(0), SetCC);
return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(),
Ret, SDValue(InTrans.getNode(), 1));
}
case ADX: {
SmallVector<SDValue, 2> Results;
SDVTList CFVTs = DAG.getVTList(Op->getValueType(0), MVT::Other);
SDVTList VTs = DAG.getVTList(Op.getOperand(3)->getValueType(0), MVT::Other);
SDValue GenCF = DAG.getNode(X86ISD::ADD, dl, CFVTs, Op.getOperand(2),
DAG.getConstant(-1, MVT::i8));
SDValue Res = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(3),
Op.getOperand(4), GenCF.getValue(1));
SDValue Store = DAG.getStore(Op.getOperand(0), dl, Res.getValue(0),
Op.getOperand(5), MachinePointerInfo(),
false, false, 0);
SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
DAG.getConstant(X86::COND_B, MVT::i8),
Res.getValue(1));
Results.push_back(SetCC);
Results.push_back(Store);
return DAG.getMergeValues(Results, dl);
}
case COMPRESS_TO_MEM: {
SDLoc dl(Op);
SDValue Mask = Op.getOperand(4);
SDValue DataToCompress = Op.getOperand(3);
SDValue Addr = Op.getOperand(2);
SDValue Chain = Op.getOperand(0);
if (isAllOnes(Mask)) return DAG.getStore(Chain, dl, DataToCompress, Addr,
MachinePointerInfo(), false, false, 0);
EVT VT = DataToCompress.getValueType();
EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
VT.getVectorNumElements());
EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
Mask.getValueType().getSizeInBits());
SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
DAG.getNode(ISD::BITCAST, dl, BitcastVT, Mask),
DAG.getIntPtrConstant(0));
SDValue Compressed = DAG.getNode(IntrData->Opc0, dl, VT, VMask,
DataToCompress, DAG.getUNDEF(VT));
return DAG.getStore(Chain, dl, Compressed, Addr,
MachinePointerInfo(), false, false, 0);
}
case EXPAND_FROM_MEM: {
SDLoc dl(Op);
SDValue Mask = Op.getOperand(4);
SDValue PathThru = Op.getOperand(3);
SDValue Addr = Op.getOperand(2);
SDValue Chain = Op.getOperand(0);
EVT VT = Op.getValueType();
if (isAllOnes(Mask)) return DAG.getLoad(VT, dl, Chain, Addr, MachinePointerInfo(), false, false,
false, 0);
EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
VT.getVectorNumElements());
EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
Mask.getValueType().getSizeInBits());
SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
DAG.getNode(ISD::BITCAST, dl, BitcastVT, Mask),
DAG.getIntPtrConstant(0));
SDValue DataToExpand = DAG.getLoad(VT, dl, Chain, Addr, MachinePointerInfo(),
false, false, false, 0);
SmallVector<SDValue, 2> Results;
Results.push_back(DAG.getNode(IntrData->Opc0, dl, VT, VMask, DataToExpand,
PathThru));
Results.push_back(Chain);
return DAG.getMergeValues(Results, dl);
}
}
}
SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op,
SelectionDAG &DAG) const {
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
MFI->setReturnAddressIsTaken(true);
if (verifyReturnAddressArgumentIsConstant(Op, DAG))
return SDValue();
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
SDLoc dl(Op);
EVT PtrVT = getPointerTy();
if (Depth > 0) {
SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
SDValue Offset = DAG.getConstant(RegInfo->getSlotSize(), PtrVT);
return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
DAG.getNode(ISD::ADD, dl, PtrVT,
FrameAddr, Offset),
MachinePointerInfo(), false, false, false, 0);
}
SDValue RetAddrFI = getReturnAddressFrameIndex(DAG);
return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
RetAddrFI, MachinePointerInfo(), false, false, false, 0);
}
SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
MFI->setFrameAddressIsTaken(true);
EVT VT = Op.getValueType();
SDLoc dl(Op); unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
unsigned FrameReg = RegInfo->getPtrSizedFrameRegister(
DAG.getMachineFunction());
assert(((FrameReg == X86::RBP && VT == MVT::i64) ||
(FrameReg == X86::EBP && VT == MVT::i32)) &&
"Invalid Frame Register!");
SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
while (Depth--)
FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
MachinePointerInfo(),
false, false, false, 0);
return FrameAddr;
}
unsigned X86TargetLowering::getRegisterByName(const char* RegName,
EVT VT) const {
unsigned Reg = StringSwitch<unsigned>(RegName)
.Case("esp", X86::ESP)
.Case("rsp", X86::RSP)
.Default(0);
if (Reg)
return Reg;
report_fatal_error("Invalid register name global variable");
}
SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op,
SelectionDAG &DAG) const {
const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
return DAG.getIntPtrConstant(2 * RegInfo->getSlotSize());
}
SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
SDValue Chain = Op.getOperand(0);
SDValue Offset = Op.getOperand(1);
SDValue Handler = Op.getOperand(2);
SDLoc dl (Op);
EVT PtrVT = getPointerTy();
const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
unsigned FrameReg = RegInfo->getFrameRegister(DAG.getMachineFunction());
assert(((FrameReg == X86::RBP && PtrVT == MVT::i64) ||
(FrameReg == X86::EBP && PtrVT == MVT::i32)) &&
"Invalid Frame Register!");
SDValue Frame = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, PtrVT);
unsigned StoreAddrReg = (PtrVT == MVT::i64) ? X86::RCX : X86::ECX;
SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, PtrVT, Frame,
DAG.getIntPtrConstant(RegInfo->getSlotSize()));
StoreAddr = DAG.getNode(ISD::ADD, dl, PtrVT, StoreAddr, Offset);
Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo(),
false, false, 0);
Chain = DAG.getCopyToReg(Chain, dl, StoreAddrReg, StoreAddr);
return DAG.getNode(X86ISD::EH_RETURN, dl, MVT::Other, Chain,
DAG.getRegister(StoreAddrReg, PtrVT));
}
SDValue X86TargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
return DAG.getNode(X86ISD::EH_SJLJ_SETJMP, DL,
DAG.getVTList(MVT::i32, MVT::Other),
Op.getOperand(0), Op.getOperand(1));
}
SDValue X86TargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
return DAG.getNode(X86ISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
Op.getOperand(0), Op.getOperand(1));
}
static SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) {
return Op.getOperand(0);
}
SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
SelectionDAG &DAG) const {
SDValue Root = Op.getOperand(0);
SDValue Trmp = Op.getOperand(1); SDValue FPtr = Op.getOperand(2); SDValue Nest = Op.getOperand(3); SDLoc dl (Op);
const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
if (Subtarget->is64Bit()) {
SDValue OutChains[6];
const unsigned char JMP64r = 0xFF; const unsigned char MOV64ri = 0xB8;
const unsigned char N86R10 = TRI->getEncodingValue(X86::R10) & 0x7;
const unsigned char N86R11 = TRI->getEncodingValue(X86::R11) & 0x7;
const unsigned char REX_WB = 0x40 | 0x08 | 0x01;
unsigned OpCode = ((MOV64ri | N86R11) << 8) | REX_WB; SDValue Addr = Trmp;
OutChains[0] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16),
Addr, MachinePointerInfo(TrmpAddr),
false, false, 0);
Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
DAG.getConstant(2, MVT::i64));
OutChains[1] = DAG.getStore(Root, dl, FPtr, Addr,
MachinePointerInfo(TrmpAddr, 2),
false, false, 2);
OpCode = ((MOV64ri | N86R10) << 8) | REX_WB; Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
DAG.getConstant(10, MVT::i64));
OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16),
Addr, MachinePointerInfo(TrmpAddr, 10),
false, false, 0);
Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
DAG.getConstant(12, MVT::i64));
OutChains[3] = DAG.getStore(Root, dl, Nest, Addr,
MachinePointerInfo(TrmpAddr, 12),
false, false, 2);
OpCode = (JMP64r << 8) | REX_WB; Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
DAG.getConstant(20, MVT::i64));
OutChains[4] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16),
Addr, MachinePointerInfo(TrmpAddr, 20),
false, false, 0);
unsigned char ModRM = N86R11 | (4 << 3) | (3 << 6); Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
DAG.getConstant(22, MVT::i64));
OutChains[5] = DAG.getStore(Root, dl, DAG.getConstant(ModRM, MVT::i8), Addr,
MachinePointerInfo(TrmpAddr, 22),
false, false, 0);
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
} else {
const Function *Func =
cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue());
CallingConv::ID CC = Func->getCallingConv();
unsigned NestReg;
switch (CC) {
default:
llvm_unreachable("Unsupported calling convention");
case CallingConv::C:
case CallingConv::X86_StdCall: {
NestReg = X86::ECX;
FunctionType *FTy = Func->getFunctionType();
const AttributeSet &Attrs = Func->getAttributes();
if (!Attrs.isEmpty() && !Func->isVarArg()) {
unsigned InRegCount = 0;
unsigned Idx = 1;
for (FunctionType::param_iterator I = FTy->param_begin(),
E = FTy->param_end(); I != E; ++I, ++Idx)
if (Attrs.hasAttribute(Idx, Attribute::InReg))
InRegCount += (TD->getTypeSizeInBits(*I) + 31) / 32;
if (InRegCount > 2) {
report_fatal_error("Nest register in use - reduce number of inreg"
" parameters!");
}
}
break;
}
case CallingConv::X86_FastCall:
case CallingConv::X86_ThisCall:
case CallingConv::Fast:
NestReg = X86::EAX;
break;
}
SDValue OutChains[4];
SDValue Addr, Disp;
Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
DAG.getConstant(10, MVT::i32));
Disp = DAG.getNode(ISD::SUB, dl, MVT::i32, FPtr, Addr);
const unsigned char MOV32ri = 0xB8; const unsigned char N86Reg = TRI->getEncodingValue(NestReg) & 0x7;
OutChains[0] = DAG.getStore(Root, dl,
DAG.getConstant(MOV32ri|N86Reg, MVT::i8),
Trmp, MachinePointerInfo(TrmpAddr),
false, false, 0);
Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
DAG.getConstant(1, MVT::i32));
OutChains[1] = DAG.getStore(Root, dl, Nest, Addr,
MachinePointerInfo(TrmpAddr, 1),
false, false, 1);
const unsigned char JMP = 0xE9; Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
DAG.getConstant(5, MVT::i32));
OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(JMP, MVT::i8), Addr,
MachinePointerInfo(TrmpAddr, 5),
false, false, 1);
Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
DAG.getConstant(6, MVT::i32));
OutChains[3] = DAG.getStore(Root, dl, Disp, Addr,
MachinePointerInfo(TrmpAddr, 6),
false, false, 1);
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
}
}
SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
unsigned StackAlignment = TFI.getStackAlignment();
MVT VT = Op.getSimpleValueType();
SDLoc DL(Op);
int SSFI = MF.getFrameInfo()->CreateStackObject(2, StackAlignment, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
MachineMemOperand *MMO =
MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
MachineMemOperand::MOStore, 2, 2);
SDValue Ops[] = { DAG.getEntryNode(), StackSlot };
SDValue Chain = DAG.getMemIntrinsicNode(X86ISD::FNSTCW16m, DL,
DAG.getVTList(MVT::Other),
Ops, MVT::i16, MMO);
SDValue CWD = DAG.getLoad(MVT::i16, DL, Chain, StackSlot,
MachinePointerInfo(), false, false, false, 0);
SDValue CWD1 =
DAG.getNode(ISD::SRL, DL, MVT::i16,
DAG.getNode(ISD::AND, DL, MVT::i16,
CWD, DAG.getConstant(0x800, MVT::i16)),
DAG.getConstant(11, MVT::i8));
SDValue CWD2 =
DAG.getNode(ISD::SRL, DL, MVT::i16,
DAG.getNode(ISD::AND, DL, MVT::i16,
CWD, DAG.getConstant(0x400, MVT::i16)),
DAG.getConstant(9, MVT::i8));
SDValue RetVal =
DAG.getNode(ISD::AND, DL, MVT::i16,
DAG.getNode(ISD::ADD, DL, MVT::i16,
DAG.getNode(ISD::OR, DL, MVT::i16, CWD1, CWD2),
DAG.getConstant(1, MVT::i16)),
DAG.getConstant(3, MVT::i16));
return DAG.getNode((VT.getSizeInBits() < 16 ?
ISD::TRUNCATE : ISD::ZERO_EXTEND), DL, VT, RetVal);
}
static SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
EVT OpVT = VT;
unsigned NumBits = VT.getSizeInBits();
SDLoc dl(Op);
Op = Op.getOperand(0);
if (VT == MVT::i8) {
OpVT = MVT::i32;
Op = DAG.getNode(ISD::ZERO_EXTEND, dl, OpVT, Op);
}
SDVTList VTs = DAG.getVTList(OpVT, MVT::i32);
Op = DAG.getNode(X86ISD::BSR, dl, VTs, Op);
SDValue Ops[] = {
Op,
DAG.getConstant(NumBits+NumBits-1, OpVT),
DAG.getConstant(X86::COND_E, MVT::i8),
Op.getValue(1)
};
Op = DAG.getNode(X86ISD::CMOV, dl, OpVT, Ops);
Op = DAG.getNode(ISD::XOR, dl, OpVT, Op, DAG.getConstant(NumBits-1, OpVT));
if (VT == MVT::i8)
Op = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op);
return Op;
}
static SDValue LowerCTLZ_ZERO_UNDEF(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
EVT OpVT = VT;
unsigned NumBits = VT.getSizeInBits();
SDLoc dl(Op);
Op = Op.getOperand(0);
if (VT == MVT::i8) {
OpVT = MVT::i32;
Op = DAG.getNode(ISD::ZERO_EXTEND, dl, OpVT, Op);
}
SDVTList VTs = DAG.getVTList(OpVT, MVT::i32);
Op = DAG.getNode(X86ISD::BSR, dl, VTs, Op);
Op = DAG.getNode(ISD::XOR, dl, OpVT, Op, DAG.getConstant(NumBits-1, OpVT));
if (VT == MVT::i8)
Op = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op);
return Op;
}
static SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
unsigned NumBits = VT.getSizeInBits();
SDLoc dl(Op);
Op = Op.getOperand(0);
SDVTList VTs = DAG.getVTList(VT, MVT::i32);
Op = DAG.getNode(X86ISD::BSF, dl, VTs, Op);
SDValue Ops[] = {
Op,
DAG.getConstant(NumBits, VT),
DAG.getConstant(X86::COND_E, MVT::i8),
Op.getValue(1)
};
return DAG.getNode(X86ISD::CMOV, dl, VT, Ops);
}
static SDValue Lower256IntArith(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
assert(VT.is256BitVector() && VT.isInteger() &&
"Unsupported value type for operation");
unsigned NumElems = VT.getVectorNumElements();
SDLoc dl(Op);
SDValue LHS = Op.getOperand(0);
SDValue LHS1 = Extract128BitVector(LHS, 0, DAG, dl);
SDValue LHS2 = Extract128BitVector(LHS, NumElems/2, DAG, dl);
SDValue RHS = Op.getOperand(1);
SDValue RHS1 = Extract128BitVector(RHS, 0, DAG, dl);
SDValue RHS2 = Extract128BitVector(RHS, NumElems/2, DAG, dl);
MVT EltVT = VT.getVectorElementType();
MVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, RHS1),
DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2));
}
static SDValue LowerADD(SDValue Op, SelectionDAG &DAG) {
assert(Op.getSimpleValueType().is256BitVector() &&
Op.getSimpleValueType().isInteger() &&
"Only handle AVX 256-bit vector integer operation");
return Lower256IntArith(Op, DAG);
}
static SDValue LowerSUB(SDValue Op, SelectionDAG &DAG) {
assert(Op.getSimpleValueType().is256BitVector() &&
Op.getSimpleValueType().isInteger() &&
"Only handle AVX 256-bit vector integer operation");
return Lower256IntArith(Op, DAG);
}
static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
SDLoc dl(Op);
MVT VT = Op.getSimpleValueType();
if (VT.is256BitVector() && !Subtarget->hasInt256())
return Lower256IntArith(Op, DAG);
SDValue A = Op.getOperand(0);
SDValue B = Op.getOperand(1);
if (VT == MVT::v4i32) {
assert(Subtarget->hasSSE2() && !Subtarget->hasSSE41() &&
"Should not custom lower when pmuldq is available!");
static const int UnpackMask[] = { 1, -1, 3, -1 };
SDValue Aodds = DAG.getVectorShuffle(VT, dl, A, A, UnpackMask);
SDValue Bodds = DAG.getVectorShuffle(VT, dl, B, B, UnpackMask);
SDValue Evens = DAG.getNode(X86ISD::PMULUDQ, dl, MVT::v2i64, A, B);
SDValue Odds = DAG.getNode(X86ISD::PMULUDQ, dl, MVT::v2i64, Aodds, Bodds);
Evens = DAG.getNode(ISD::BITCAST, dl, VT, Evens);
Odds = DAG.getNode(ISD::BITCAST, dl, VT, Odds);
static const int ShufMask[] = { 0, 4, 2, 6 };
return DAG.getVectorShuffle(VT, dl, Evens, Odds, ShufMask);
}
assert((VT == MVT::v2i64 || VT == MVT::v4i64 || VT == MVT::v8i64) &&
"Only know how to lower V2I64/V4I64/V8I64 multiply");
SDValue Ahi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, A, 32, DAG);
SDValue Bhi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, B, 32, DAG);
EVT MulVT = (VT == MVT::v2i64) ? MVT::v4i32 :
(VT == MVT::v4i64) ? MVT::v8i32 : MVT::v16i32;
A = DAG.getNode(ISD::BITCAST, dl, MulVT, A);
B = DAG.getNode(ISD::BITCAST, dl, MulVT, B);
Ahi = DAG.getNode(ISD::BITCAST, dl, MulVT, Ahi);
Bhi = DAG.getNode(ISD::BITCAST, dl, MulVT, Bhi);
SDValue AloBlo = DAG.getNode(X86ISD::PMULUDQ, dl, VT, A, B);
SDValue AloBhi = DAG.getNode(X86ISD::PMULUDQ, dl, VT, A, Bhi);
SDValue AhiBlo = DAG.getNode(X86ISD::PMULUDQ, dl, VT, Ahi, B);
AloBhi = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, AloBhi, 32, DAG);
AhiBlo = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, AhiBlo, 32, DAG);
SDValue Res = DAG.getNode(ISD::ADD, dl, VT, AloBlo, AloBhi);
return DAG.getNode(ISD::ADD, dl, VT, Res, AhiBlo);
}
SDValue X86TargetLowering::LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const {
assert(Subtarget->isTargetWin64() && "Unexpected target");
EVT VT = Op.getValueType();
assert(VT.isInteger() && VT.getSizeInBits() == 128 &&
"Unexpected return type for lowering");
RTLIB::Libcall LC;
bool isSigned;
switch (Op->getOpcode()) {
default: llvm_unreachable("Unexpected request for libcall!");
case ISD::SDIV: isSigned = true; LC = RTLIB::SDIV_I128; break;
case ISD::UDIV: isSigned = false; LC = RTLIB::UDIV_I128; break;
case ISD::SREM: isSigned = true; LC = RTLIB::SREM_I128; break;
case ISD::UREM: isSigned = false; LC = RTLIB::UREM_I128; break;
case ISD::SDIVREM: isSigned = true; LC = RTLIB::SDIVREM_I128; break;
case ISD::UDIVREM: isSigned = false; LC = RTLIB::UDIVREM_I128; break;
}
SDLoc dl(Op);
SDValue InChain = DAG.getEntryNode();
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) {
EVT ArgVT = Op->getOperand(i).getValueType();
assert(ArgVT.isInteger() && ArgVT.getSizeInBits() == 128 &&
"Unexpected argument type for lowering");
SDValue StackPtr = DAG.CreateStackTemporary(ArgVT, 16);
Entry.Node = StackPtr;
InChain = DAG.getStore(InChain, dl, Op->getOperand(i), StackPtr, MachinePointerInfo(),
false, false, 16);
Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
Entry.Ty = PointerType::get(ArgTy,0);
Entry.isSExt = false;
Entry.isZExt = false;
Args.push_back(Entry);
}
SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
getPointerTy());
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(InChain)
.setCallee(getLibcallCallingConv(LC),
static_cast<EVT>(MVT::v2i64).getTypeForEVT(*DAG.getContext()),
Callee, std::move(Args), 0)
.setInRegister().setSExtResult(isSigned).setZExtResult(!isSigned);
std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
return DAG.getNode(ISD::BITCAST, dl, VT, CallInfo.first);
}
static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
EVT VT = Op0.getValueType();
SDLoc dl(Op);
assert((VT == MVT::v4i32 && Subtarget->hasSSE2()) ||
(VT == MVT::v8i32 && Subtarget->hasInt256()));
const int Mask[] = {1, -1, 3, -1, 5, -1, 7, -1};
SDValue Odd0 = DAG.getVectorShuffle(VT, dl, Op0, Op0, Mask);
SDValue Odd1 = DAG.getVectorShuffle(VT, dl, Op1, Op1, Mask);
MVT MulVT = VT == MVT::v4i32 ? MVT::v2i64 : MVT::v4i64;
bool IsSigned = Op->getOpcode() == ISD::SMUL_LOHI;
unsigned Opcode =
(!IsSigned || !Subtarget->hasSSE41()) ? X86ISD::PMULUDQ : X86ISD::PMULDQ;
SDValue Mul1 = DAG.getNode(ISD::BITCAST, dl, VT,
DAG.getNode(Opcode, dl, MulVT, Op0, Op1));
SDValue Mul2 = DAG.getNode(ISD::BITCAST, dl, VT,
DAG.getNode(Opcode, dl, MulVT, Odd0, Odd1));
SDValue Highs, Lows;
if (VT == MVT::v8i32) {
const int HighMask[] = {1, 9, 3, 11, 5, 13, 7, 15};
Highs = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, HighMask);
const int LowMask[] = {0, 8, 2, 10, 4, 12, 6, 14};
Lows = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, LowMask);
} else {
const int HighMask[] = {1, 5, 3, 7};
Highs = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, HighMask);
const int LowMask[] = {0, 4, 2, 6};
Lows = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, LowMask);
}
if (IsSigned && !Subtarget->hasSSE41()) {
SDValue ShAmt =
DAG.getConstant(31, DAG.getTargetLoweringInfo().getShiftAmountTy(VT));
SDValue T1 = DAG.getNode(ISD::AND, dl, VT,
DAG.getNode(ISD::SRA, dl, VT, Op0, ShAmt), Op1);
SDValue T2 = DAG.getNode(ISD::AND, dl, VT,
DAG.getNode(ISD::SRA, dl, VT, Op1, ShAmt), Op0);
SDValue Fixup = DAG.getNode(ISD::ADD, dl, VT, T1, T2);
Highs = DAG.getNode(ISD::SUB, dl, VT, Highs, Fixup);
}
SDValue Ops[] = {Lows, Highs};
return DAG.getMergeValues(Ops, dl);
}
static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
SDValue R = Op.getOperand(0);
SDValue Amt = Op.getOperand(1);
if (auto *BVAmt = dyn_cast<BuildVectorSDNode>(Amt)) {
if (auto *ShiftConst = BVAmt->getConstantSplatNode()) {
uint64_t ShiftAmt = ShiftConst->getZExtValue();
if (VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
(Subtarget->hasInt256() &&
(VT == MVT::v4i64 || VT == MVT::v8i32 || VT == MVT::v16i16)) ||
(Subtarget->hasAVX512() &&
(VT == MVT::v8i64 || VT == MVT::v16i32))) {
if (Op.getOpcode() == ISD::SHL)
return getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, R, ShiftAmt,
DAG);
if (Op.getOpcode() == ISD::SRL)
return getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, R, ShiftAmt,
DAG);
if (Op.getOpcode() == ISD::SRA && VT != MVT::v2i64 && VT != MVT::v4i64)
return getTargetVShiftByConstNode(X86ISD::VSRAI, dl, VT, R, ShiftAmt,
DAG);
}
if (VT == MVT::v16i8) {
if (Op.getOpcode() == ISD::SHL) {
SDValue SHL = getTargetVShiftByConstNode(X86ISD::VSHLI, dl,
MVT::v8i16, R, ShiftAmt,
DAG);
SHL = DAG.getNode(ISD::BITCAST, dl, VT, SHL);
SmallVector<SDValue, 16> V(16,
DAG.getConstant(uint8_t(-1U << ShiftAmt),
MVT::i8));
return DAG.getNode(ISD::AND, dl, VT, SHL,
DAG.getNode(ISD::BUILD_VECTOR, dl, VT, V));
}
if (Op.getOpcode() == ISD::SRL) {
SDValue SRL = getTargetVShiftByConstNode(X86ISD::VSRLI, dl,
MVT::v8i16, R, ShiftAmt,
DAG);
SRL = DAG.getNode(ISD::BITCAST, dl, VT, SRL);
SmallVector<SDValue, 16> V(16,
DAG.getConstant(uint8_t(-1U) >> ShiftAmt,
MVT::i8));
return DAG.getNode(ISD::AND, dl, VT, SRL,
DAG.getNode(ISD::BUILD_VECTOR, dl, VT, V));
}
if (Op.getOpcode() == ISD::SRA) {
if (ShiftAmt == 7) {
SDValue Zeros = getZeroVector(VT, Subtarget, DAG, dl);
return DAG.getNode(X86ISD::PCMPGT, dl, VT, Zeros, R);
}
SDValue Res = DAG.getNode(ISD::SRL, dl, VT, R, Amt);
SmallVector<SDValue, 16> V(16, DAG.getConstant(128 >> ShiftAmt,
MVT::i8));
SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, V);
Res = DAG.getNode(ISD::XOR, dl, VT, Res, Mask);
Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask);
return Res;
}
llvm_unreachable("Unknown shift opcode.");
}
if (Subtarget->hasInt256() && VT == MVT::v32i8) {
if (Op.getOpcode() == ISD::SHL) {
SDValue SHL = getTargetVShiftByConstNode(X86ISD::VSHLI, dl,
MVT::v16i16, R, ShiftAmt,
DAG);
SHL = DAG.getNode(ISD::BITCAST, dl, VT, SHL);
SmallVector<SDValue, 32> V(32,
DAG.getConstant(uint8_t(-1U << ShiftAmt),
MVT::i8));
return DAG.getNode(ISD::AND, dl, VT, SHL,
DAG.getNode(ISD::BUILD_VECTOR, dl, VT, V));
}
if (Op.getOpcode() == ISD::SRL) {
SDValue SRL = getTargetVShiftByConstNode(X86ISD::VSRLI, dl,
MVT::v16i16, R, ShiftAmt,
DAG);
SRL = DAG.getNode(ISD::BITCAST, dl, VT, SRL);
SmallVector<SDValue, 32> V(32,
DAG.getConstant(uint8_t(-1U) >> ShiftAmt,
MVT::i8));
return DAG.getNode(ISD::AND, dl, VT, SRL,
DAG.getNode(ISD::BUILD_VECTOR, dl, VT, V));
}
if (Op.getOpcode() == ISD::SRA) {
if (ShiftAmt == 7) {
SDValue Zeros = getZeroVector(VT, Subtarget, DAG, dl);
return DAG.getNode(X86ISD::PCMPGT, dl, VT, Zeros, R);
}
SDValue Res = DAG.getNode(ISD::SRL, dl, VT, R, Amt);
SmallVector<SDValue, 32> V(32, DAG.getConstant(128 >> ShiftAmt,
MVT::i8));
SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, V);
Res = DAG.getNode(ISD::XOR, dl, VT, Res, Mask);
Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask);
return Res;
}
llvm_unreachable("Unknown shift opcode.");
}
}
}
if (!Subtarget->is64Bit() &&
(VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64)) &&
Amt.getOpcode() == ISD::BITCAST &&
Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
Amt = Amt.getOperand(0);
unsigned Ratio = Amt.getSimpleValueType().getVectorNumElements() /
VT.getVectorNumElements();
unsigned RatioInLog2 = Log2_32_Ceil(Ratio);
uint64_t ShiftAmt = 0;
for (unsigned i = 0; i != Ratio; ++i) {
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Amt.getOperand(i));
if (!C)
return SDValue();
ShiftAmt |= C->getZExtValue() << (i * (1 << (6 - RatioInLog2)));
}
for (unsigned i = Ratio; i != Amt.getNumOperands(); i += Ratio) {
uint64_t ShAmt = 0;
for (unsigned j = 0; j != Ratio; ++j) {
ConstantSDNode *C =
dyn_cast<ConstantSDNode>(Amt.getOperand(i + j));
if (!C)
return SDValue();
ShAmt |= C->getZExtValue() << (j * (1 << (6 - RatioInLog2)));
}
if (ShAmt != ShiftAmt)
return SDValue();
}
switch (Op.getOpcode()) {
default:
llvm_unreachable("Unknown shift opcode!");
case ISD::SHL:
return getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, R, ShiftAmt,
DAG);
case ISD::SRL:
return getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, R, ShiftAmt,
DAG);
case ISD::SRA:
return getTargetVShiftByConstNode(X86ISD::VSRAI, dl, VT, R, ShiftAmt,
DAG);
}
}
return SDValue();
}
static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
const X86Subtarget* Subtarget) {
MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
SDValue R = Op.getOperand(0);
SDValue Amt = Op.getOperand(1);
if ((VT == MVT::v2i64 && Op.getOpcode() != ISD::SRA) ||
VT == MVT::v4i32 || VT == MVT::v8i16 ||
(Subtarget->hasInt256() &&
((VT == MVT::v4i64 && Op.getOpcode() != ISD::SRA) ||
VT == MVT::v8i32 || VT == MVT::v16i16)) ||
(Subtarget->hasAVX512() && (VT == MVT::v8i64 || VT == MVT::v16i32))) {
SDValue BaseShAmt;
EVT EltVT = VT.getVectorElementType();
if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Amt)) {
BaseShAmt = BV->getSplatValue();
if (BaseShAmt && BaseShAmt.getOpcode() == ISD::UNDEF)
BaseShAmt = SDValue();
} else {
if (Amt.getOpcode() == ISD::EXTRACT_SUBVECTOR)
Amt = Amt.getOperand(0);
ShuffleVectorSDNode *SVN = dyn_cast<ShuffleVectorSDNode>(Amt);
if (SVN && SVN->isSplat()) {
unsigned SplatIdx = (unsigned)SVN->getSplatIndex();
SDValue InVec = Amt.getOperand(0);
if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
assert((SplatIdx < InVec.getValueType().getVectorNumElements()) &&
"Unexpected shuffle index found!");
BaseShAmt = InVec.getOperand(SplatIdx);
} else if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT) {
if (ConstantSDNode *C =
dyn_cast<ConstantSDNode>(InVec.getOperand(2))) {
if (C->getZExtValue() == SplatIdx)
BaseShAmt = InVec.getOperand(1);
}
}
if (!BaseShAmt)
BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InVec,
DAG.getIntPtrConstant(SplatIdx));
}
}
if (BaseShAmt.getNode()) {
assert(EltVT.bitsLE(MVT::i64) && "Unexpected element type!");
if (EltVT != MVT::i64 && EltVT.bitsGT(MVT::i32))
BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, BaseShAmt);
else if (EltVT.bitsLT(MVT::i32))
BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, BaseShAmt);
switch (Op.getOpcode()) {
default:
llvm_unreachable("Unknown shift opcode!");
case ISD::SHL:
switch (VT.SimpleTy) {
default: return SDValue();
case MVT::v2i64:
case MVT::v4i32:
case MVT::v8i16:
case MVT::v4i64:
case MVT::v8i32:
case MVT::v16i16:
case MVT::v16i32:
case MVT::v8i64:
return getTargetVShiftNode(X86ISD::VSHLI, dl, VT, R, BaseShAmt, DAG);
}
case ISD::SRA:
switch (VT.SimpleTy) {
default: return SDValue();
case MVT::v4i32:
case MVT::v8i16:
case MVT::v8i32:
case MVT::v16i16:
case MVT::v16i32:
case MVT::v8i64:
return getTargetVShiftNode(X86ISD::VSRAI, dl, VT, R, BaseShAmt, DAG);
}
case ISD::SRL:
switch (VT.SimpleTy) {
default: return SDValue();
case MVT::v2i64:
case MVT::v4i32:
case MVT::v8i16:
case MVT::v4i64:
case MVT::v8i32:
case MVT::v16i16:
case MVT::v16i32:
case MVT::v8i64:
return getTargetVShiftNode(X86ISD::VSRLI, dl, VT, R, BaseShAmt, DAG);
}
}
}
}
if (!Subtarget->is64Bit() &&
(VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64) ||
(Subtarget->hasAVX512() && VT == MVT::v8i64)) &&
Amt.getOpcode() == ISD::BITCAST &&
Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
Amt = Amt.getOperand(0);
unsigned Ratio = Amt.getSimpleValueType().getVectorNumElements() /
VT.getVectorNumElements();
std::vector<SDValue> Vals(Ratio);
for (unsigned i = 0; i != Ratio; ++i)
Vals[i] = Amt.getOperand(i);
for (unsigned i = Ratio; i != Amt.getNumOperands(); i += Ratio) {
for (unsigned j = 0; j != Ratio; ++j)
if (Vals[j] != Amt.getOperand(i + j))
return SDValue();
}
switch (Op.getOpcode()) {
default:
llvm_unreachable("Unknown shift opcode!");
case ISD::SHL:
return DAG.getNode(X86ISD::VSHL, dl, VT, R, Op.getOperand(1));
case ISD::SRL:
return DAG.getNode(X86ISD::VSRL, dl, VT, R, Op.getOperand(1));
case ISD::SRA:
return DAG.getNode(X86ISD::VSRA, dl, VT, R, Op.getOperand(1));
}
}
return SDValue();
}
static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
SDValue R = Op.getOperand(0);
SDValue Amt = Op.getOperand(1);
SDValue V;
assert(VT.isVector() && "Custom lowering only for vector shifts!");
assert(Subtarget->hasSSE2() && "Only custom lower when we have SSE2!");
V = LowerScalarImmediateShift(Op, DAG, Subtarget);
if (V.getNode())
return V;
V = LowerScalarVariableShift(Op, DAG, Subtarget);
if (V.getNode())
return V;
if (Subtarget->hasAVX512() && (VT == MVT::v16i32 || VT == MVT::v8i64))
return Op;
if (Subtarget->hasInt256()) {
if (Op.getOpcode() == ISD::SRL &&
(VT == MVT::v2i64 || VT == MVT::v4i32 ||
VT == MVT::v4i64 || VT == MVT::v8i32))
return Op;
if (Op.getOpcode() == ISD::SHL &&
(VT == MVT::v2i64 || VT == MVT::v4i32 ||
VT == MVT::v4i64 || VT == MVT::v8i32))
return Op;
if (Op.getOpcode() == ISD::SRA && (VT == MVT::v4i32 || VT == MVT::v8i32))
return Op;
}
if (Op.getOpcode() == ISD::SHL &&
(VT == MVT::v8i16 || VT == MVT::v4i32 ||
(Subtarget->hasInt256() && VT == MVT::v16i16)) &&
ISD::isBuildVectorOfConstantSDNodes(Amt.getNode())) {
SmallVector<SDValue, 8> Elts;
EVT SVT = VT.getScalarType();
unsigned SVTBits = SVT.getSizeInBits();
const APInt &One = APInt(SVTBits, 1);
unsigned NumElems = VT.getVectorNumElements();
for (unsigned i=0; i !=NumElems; ++i) {
SDValue Op = Amt->getOperand(i);
if (Op->getOpcode() == ISD::UNDEF) {
Elts.push_back(Op);
continue;
}
ConstantSDNode *ND = cast<ConstantSDNode>(Op);
const APInt &C = APInt(SVTBits, ND->getAPIntValue().getZExtValue());
uint64_t ShAmt = C.getZExtValue();
if (ShAmt >= SVTBits) {
Elts.push_back(DAG.getUNDEF(SVT));
continue;
}
Elts.push_back(DAG.getConstant(One.shl(ShAmt), SVT));
}
SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Elts);
return DAG.getNode(ISD::MUL, dl, VT, R, BV);
}
if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SHL) {
Op = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(23, VT));
Op = DAG.getNode(ISD::ADD, dl, VT, Op, DAG.getConstant(0x3f800000U, VT));
Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, Op);
Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op);
return DAG.getNode(ISD::MUL, dl, VT, Op, R);
}
if ((VT == MVT::v8i16 || VT == MVT::v4i32) &&
ISD::isBuildVectorOfConstantSDNodes(Amt.getNode())) {
unsigned TargetOpcode = X86ISD::MOVSS;
bool CanBeSimplified;
SDValue Amt1 = Amt->getOperand(0);
SDValue Amt2 = (VT == MVT::v4i32) ? Amt->getOperand(1) :
Amt->getOperand(2);
if (VT == MVT::v4i32) {
CanBeSimplified = Amt2 == Amt->getOperand(2) &&
Amt2 == Amt->getOperand(3);
if (!CanBeSimplified) {
CanBeSimplified = Amt1 == Amt->getOperand(1) &&
Amt->getOperand(2) == Amt->getOperand(3);
TargetOpcode = X86ISD::MOVSD;
Amt2 = Amt->getOperand(2);
}
} else {
CanBeSimplified = Amt1 == Amt->getOperand(1);
for (unsigned i=3; i != 8 && CanBeSimplified; ++i)
CanBeSimplified = Amt2 == Amt->getOperand(i);
if (!CanBeSimplified) {
TargetOpcode = X86ISD::MOVSD;
CanBeSimplified = true;
Amt2 = Amt->getOperand(4);
for (unsigned i=0; i != 4 && CanBeSimplified; ++i)
CanBeSimplified = Amt1 == Amt->getOperand(i);
for (unsigned j=4; j != 8 && CanBeSimplified; ++j)
CanBeSimplified = Amt2 == Amt->getOperand(j);
}
}
if (CanBeSimplified && isa<ConstantSDNode>(Amt1) &&
isa<ConstantSDNode>(Amt2)) {
EVT CastVT = MVT::v4i32;
SDValue Splat1 =
DAG.getConstant(cast<ConstantSDNode>(Amt1)->getAPIntValue(), VT);
SDValue Shift1 = DAG.getNode(Op->getOpcode(), dl, VT, R, Splat1);
SDValue Splat2 =
DAG.getConstant(cast<ConstantSDNode>(Amt2)->getAPIntValue(), VT);
SDValue Shift2 = DAG.getNode(Op->getOpcode(), dl, VT, R, Splat2);
if (TargetOpcode == X86ISD::MOVSD)
CastVT = MVT::v2i64;
SDValue BitCast1 = DAG.getNode(ISD::BITCAST, dl, CastVT, Shift1);
SDValue BitCast2 = DAG.getNode(ISD::BITCAST, dl, CastVT, Shift2);
SDValue Result = getTargetShuffleNode(TargetOpcode, dl, CastVT, BitCast2,
BitCast1, DAG);
return DAG.getNode(ISD::BITCAST, dl, VT, Result);
}
}
if (VT == MVT::v16i8 && Op->getOpcode() == ISD::SHL) {
assert(Subtarget->hasSSE2() && "Need SSE2 for pslli/pcmpeq.");
Op = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(5, VT));
Op = DAG.getNode(ISD::BITCAST, dl, VT, Op);
SDValue VSelM = DAG.getConstant(0x80, VT);
SDValue OpVSel = DAG.getNode(ISD::AND, dl, VT, VSelM, Op);
OpVSel = DAG.getNode(X86ISD::PCMPEQ, dl, VT, OpVSel, VSelM);
SDValue CM1 = DAG.getConstant(0x0f, VT);
SDValue CM2 = DAG.getConstant(0x3f, VT);
SDValue M = DAG.getNode(ISD::AND, dl, VT, R, CM1);
M = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, MVT::v8i16, M, 4, DAG);
M = DAG.getNode(ISD::BITCAST, dl, VT, M);
R = DAG.getNode(ISD::VSELECT, dl, VT, OpVSel, M, R);
Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
OpVSel = DAG.getNode(ISD::AND, dl, VT, VSelM, Op);
OpVSel = DAG.getNode(X86ISD::PCMPEQ, dl, VT, OpVSel, VSelM);
M = DAG.getNode(ISD::AND, dl, VT, R, CM2);
M = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, MVT::v8i16, M, 2, DAG);
M = DAG.getNode(ISD::BITCAST, dl, VT, M);
R = DAG.getNode(ISD::VSELECT, dl, VT, OpVSel, M, R);
Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
OpVSel = DAG.getNode(ISD::AND, dl, VT, VSelM, Op);
OpVSel = DAG.getNode(X86ISD::PCMPEQ, dl, VT, OpVSel, VSelM);
R = DAG.getNode(ISD::VSELECT, dl, VT, OpVSel,
DAG.getNode(ISD::ADD, dl, VT, R, R), R);
return R;
}
if (Subtarget->hasInt256() && VT == MVT::v8i16) {
MVT NewVT = VT == MVT::v8i16 ? MVT::v8i32 : MVT::v16i16;
unsigned ExtOpc =
Op.getOpcode() == ISD::SRA ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
R = DAG.getNode(ExtOpc, dl, NewVT, R);
Amt = DAG.getNode(ISD::ANY_EXTEND, dl, NewVT, Amt);
return DAG.getNode(ISD::TRUNCATE, dl, VT,
DAG.getNode(Op.getOpcode(), dl, NewVT, R, Amt));
}
if (VT.is256BitVector()) {
unsigned NumElems = VT.getVectorNumElements();
MVT EltVT = VT.getVectorElementType();
EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
SDValue V1 = Extract128BitVector(R, 0, DAG, dl);
SDValue V2 = Extract128BitVector(R, NumElems/2, DAG, dl);
SDValue Amt1, Amt2;
if (Amt.getOpcode() == ISD::BUILD_VECTOR) {
SmallVector<SDValue, 4> Amt1Csts;
SmallVector<SDValue, 4> Amt2Csts;
for (unsigned i = 0; i != NumElems/2; ++i)
Amt1Csts.push_back(Amt->getOperand(i));
for (unsigned i = NumElems/2; i != NumElems; ++i)
Amt2Csts.push_back(Amt->getOperand(i));
Amt1 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, Amt1Csts);
Amt2 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, Amt2Csts);
} else {
Amt1 = Extract128BitVector(Amt, 0, DAG, dl);
Amt2 = Extract128BitVector(Amt, NumElems/2, DAG, dl);
}
V1 = DAG.getNode(Op.getOpcode(), dl, NewVT, V1, Amt1);
V2 = DAG.getNode(Op.getOpcode(), dl, NewVT, V2, Amt2);
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, V1, V2);
}
return SDValue();
}
static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
SDNode *N = Op.getNode();
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
unsigned BaseOp = 0;
unsigned Cond = 0;
SDLoc DL(Op);
switch (Op.getOpcode()) {
default: llvm_unreachable("Unknown ovf instruction!");
case ISD::SADDO:
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS))
if (C->isOne()) {
BaseOp = X86ISD::INC;
Cond = X86::COND_O;
break;
}
BaseOp = X86ISD::ADD;
Cond = X86::COND_O;
break;
case ISD::UADDO:
BaseOp = X86ISD::ADD;
Cond = X86::COND_B;
break;
case ISD::SSUBO:
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS))
if (C->isOne()) {
BaseOp = X86ISD::DEC;
Cond = X86::COND_O;
break;
}
BaseOp = X86ISD::SUB;
Cond = X86::COND_O;
break;
case ISD::USUBO:
BaseOp = X86ISD::SUB;
Cond = X86::COND_B;
break;
case ISD::SMULO:
BaseOp = N->getValueType(0) == MVT::i8 ? X86ISD::SMUL8 : X86ISD::SMUL;
Cond = X86::COND_O;
break;
case ISD::UMULO: { if (N->getValueType(0) == MVT::i8) {
BaseOp = X86ISD::UMUL8;
Cond = X86::COND_O;
break;
}
SDVTList VTs = DAG.getVTList(N->getValueType(0), N->getValueType(0),
MVT::i32);
SDValue Sum = DAG.getNode(X86ISD::UMUL, DL, VTs, LHS, RHS);
SDValue SetCC =
DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
DAG.getConstant(X86::COND_O, MVT::i32),
SDValue(Sum.getNode(), 2));
return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, SetCC);
}
}
SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
SDValue Sum = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
SDValue SetCC =
DAG.getNode(X86ISD::SETCC, DL, N->getValueType(1),
DAG.getConstant(Cond, MVT::i32),
SDValue(Sum.getNode(), 1));
return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, SetCC);
}
SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
MVT VT = Op.getSimpleValueType();
if (!Subtarget->hasSSE2() || !VT.isVector())
return SDValue();
unsigned BitsDiff = VT.getScalarType().getSizeInBits() -
ExtraVT.getScalarType().getSizeInBits();
switch (VT.SimpleTy) {
default: return SDValue();
case MVT::v8i32:
case MVT::v16i16:
if (!Subtarget->hasFp256())
return SDValue();
if (!Subtarget->hasInt256()) {
unsigned NumElems = VT.getVectorNumElements();
SDValue LHS = Op.getOperand(0);
SDValue LHS1 = Extract128BitVector(LHS, 0, DAG, dl);
SDValue LHS2 = Extract128BitVector(LHS, NumElems/2, DAG, dl);
MVT EltVT = VT.getVectorElementType();
EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
EVT ExtraEltVT = ExtraVT.getVectorElementType();
unsigned ExtraNumElems = ExtraVT.getVectorNumElements();
ExtraVT = EVT::getVectorVT(*DAG.getContext(), ExtraEltVT,
ExtraNumElems/2);
SDValue Extra = DAG.getValueType(ExtraVT);
LHS1 = DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, Extra);
LHS2 = DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, Extra);
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, LHS1, LHS2);
}
case MVT::v4i32:
case MVT::v8i16: {
SDValue Op0 = Op.getOperand(0);
SDValue Shl = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, Op0,
BitsDiff, DAG);
return getTargetVShiftByConstNode(X86ISD::VSRAI, dl, VT, Shl, BitsDiff,
DAG);
}
}
}
bool X86TargetLowering::needsCmpXchgNb(const Type *MemType) const {
unsigned OpWidth = MemType->getPrimitiveSizeInBits();
if (OpWidth == 64)
return !Subtarget->is64Bit(); else if (OpWidth == 128)
return Subtarget->hasCmpxchg16b();
else
return false;
}
bool X86TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
return needsCmpXchgNb(SI->getValueOperand()->getType());
}
bool X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
auto PTy = cast<PointerType>(LI->getPointerOperand()->getType());
return needsCmpXchgNb(PTy->getElementType());
}
bool X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
unsigned NativeWidth = Subtarget->is64Bit() ? 64 : 32;
const Type *MemType = AI->getType();
if (MemType->getPrimitiveSizeInBits() > NativeWidth)
return needsCmpXchgNb(MemType);
AtomicRMWInst::BinOp Op = AI->getOperation();
switch (Op) {
default:
llvm_unreachable("Unknown atomic operation");
case AtomicRMWInst::Xchg:
case AtomicRMWInst::Add:
case AtomicRMWInst::Sub:
return false;
case AtomicRMWInst::Or:
case AtomicRMWInst::And:
case AtomicRMWInst::Xor:
return !AI->use_empty();
case AtomicRMWInst::Nand:
case AtomicRMWInst::Max:
case AtomicRMWInst::Min:
case AtomicRMWInst::UMax:
case AtomicRMWInst::UMin:
return true;
}
}
static bool hasMFENCE(const X86Subtarget& Subtarget) {
return Subtarget.hasSSE2() || Subtarget.is64Bit();
}
LoadInst *
X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
unsigned NativeWidth = Subtarget->is64Bit() ? 64 : 32;
const Type *MemType = AI->getType();
if (MemType->getPrimitiveSizeInBits() > NativeWidth)
return nullptr;
auto Builder = IRBuilder<>(AI);
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
auto SynchScope = AI->getSynchScope();
auto Order = AtomicCmpXchgInst::getStrongestFailureOrdering(AI->getOrdering());
auto Ptr = AI->getPointerOperand();
if (SynchScope == SingleThread) {
return nullptr;
} else if (hasMFENCE(*Subtarget)) {
Function *MFence = llvm::Intrinsic::getDeclaration(M,
Intrinsic::x86_sse2_mfence);
Builder.CreateCall(MFence);
} else {
return nullptr;
}
LoadInst *Loaded = Builder.CreateAlignedLoad(Ptr,
AI->getType()->getPrimitiveSizeInBits());
Loaded->setAtomic(Order, SynchScope);
AI->replaceAllUsesWith(Loaded);
AI->eraseFromParent();
return Loaded;
}
static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
SDLoc dl(Op);
AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>(
cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
SynchronizationScope FenceScope = static_cast<SynchronizationScope>(
cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
if (FenceOrdering == SequentiallyConsistent && FenceScope == CrossThread) {
if (hasMFENCE(*Subtarget))
return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0));
SDValue Chain = Op.getOperand(0);
SDValue Zero = DAG.getConstant(0, MVT::i32);
SDValue Ops[] = {
DAG.getRegister(X86::ESP, MVT::i32), DAG.getTargetConstant(1, MVT::i8), DAG.getRegister(0, MVT::i32), DAG.getTargetConstant(0, MVT::i32), DAG.getRegister(0, MVT::i32), Zero,
Chain
};
SDNode *Res = DAG.getMachineNode(X86::OR32mrLocked, dl, MVT::Other, Ops);
return SDValue(Res, 0);
}
return DAG.getNode(X86ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
}
static SDValue LowerCMP_SWAP(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
MVT T = Op.getSimpleValueType();
SDLoc DL(Op);
unsigned Reg = 0;
unsigned size = 0;
switch(T.SimpleTy) {
default: llvm_unreachable("Invalid value type!");
case MVT::i8: Reg = X86::AL; size = 1; break;
case MVT::i16: Reg = X86::AX; size = 2; break;
case MVT::i32: Reg = X86::EAX; size = 4; break;
case MVT::i64:
assert(Subtarget->is64Bit() && "Node not type legal!");
Reg = X86::RAX; size = 8;
break;
}
SDValue cpIn = DAG.getCopyToReg(Op.getOperand(0), DL, Reg,
Op.getOperand(2), SDValue());
SDValue Ops[] = { cpIn.getValue(0),
Op.getOperand(1),
Op.getOperand(3),
DAG.getTargetConstant(size, MVT::i8),
cpIn.getValue(1) };
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
MachineMemOperand *MMO = cast<AtomicSDNode>(Op)->getMemOperand();
SDValue Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG_DAG, DL, Tys,
Ops, T, MMO);
SDValue cpOut =
DAG.getCopyFromReg(Result.getValue(0), DL, Reg, T, Result.getValue(1));
SDValue EFLAGS = DAG.getCopyFromReg(cpOut.getValue(1), DL, X86::EFLAGS,
MVT::i32, cpOut.getValue(2));
SDValue Success = DAG.getNode(X86ISD::SETCC, DL, Op->getValueType(1),
DAG.getConstant(X86::COND_E, MVT::i8), EFLAGS);
DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), cpOut);
DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), EFLAGS.getValue(1));
return SDValue();
}
static SDValue LowerBITCAST(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
MVT SrcVT = Op.getOperand(0).getSimpleValueType();
MVT DstVT = Op.getSimpleValueType();
if (SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 || SrcVT == MVT::v8i8) {
assert(Subtarget->hasSSE2() && "Requires at least SSE2!");
if (DstVT != MVT::f64)
return SDValue();
SDValue InVec = Op->getOperand(0);
SDLoc dl(Op);
unsigned NumElts = SrcVT.getVectorNumElements();
EVT SVT = SrcVT.getVectorElementType();
SmallVector<SDValue, 16> Elts;
for (unsigned i = 0, e = NumElts; i != e; ++i)
Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SVT, InVec,
DAG.getIntPtrConstant(i)));
SDValue Undef = DAG.getUNDEF(SVT);
for (unsigned i = NumElts, e = NumElts * 2; i != e; ++i)
Elts.push_back(Undef);
EVT NewVT = EVT::getVectorVT(*DAG.getContext(), SVT, NumElts * 2);
SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, Elts);
SDValue ToV2F64 = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, BV);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, ToV2F64,
DAG.getIntPtrConstant(0));
}
assert(Subtarget->is64Bit() && !Subtarget->hasSSE2() &&
Subtarget->hasMMX() && "Unexpected custom BITCAST");
assert((DstVT == MVT::i64 ||
(DstVT.isVector() && DstVT.getSizeInBits()==64)) &&
"Unexpected custom BITCAST");
if (SrcVT==MVT::i64 && DstVT.isVector())
return Op;
if (DstVT==MVT::i64 && SrcVT.isVector())
return Op;
if (SrcVT.isVector() && DstVT.isVector())
return Op;
return SDValue();
}
static SDValue LowerCTPOP(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
SDNode *Node = Op.getNode();
SDLoc dl(Node);
Op = Op.getOperand(0);
EVT VT = Op.getValueType();
assert((VT.is128BitVector() || VT.is256BitVector()) &&
"CTPOP lowering only implemented for 128/256-bit wide vector types");
unsigned NumElts = VT.getVectorNumElements();
EVT EltVT = VT.getVectorElementType();
unsigned Len = EltVT.getSizeInBits();
assert(EltVT.isInteger() && (Len == 32 || Len == 64) && Len % 8 == 0 &&
"CTPOP not implemented for this vector element type.");
bool NeedsBitcast = EltVT == MVT::i32;
MVT BitcastVT = VT.is256BitVector() ? MVT::v4i64 : MVT::v2i64;
SDValue Cst55 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), EltVT);
SDValue Cst33 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), EltVT);
SDValue Cst0F = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), EltVT);
SmallVector<SDValue, 8> Ones(NumElts, DAG.getConstant(1, EltVT));
SDValue OnesV = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ones);
SDValue Srl = DAG.getNode(ISD::SRL, dl, VT, Op, OnesV);
if (NeedsBitcast)
Srl = DAG.getNode(ISD::BITCAST, dl, BitcastVT, Srl);
SmallVector<SDValue, 8> Mask55(NumElts, Cst55);
SDValue M55 = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Mask55);
if (NeedsBitcast)
M55 = DAG.getNode(ISD::BITCAST, dl, BitcastVT, M55);
SDValue And = DAG.getNode(ISD::AND, dl, Srl.getValueType(), Srl, M55);
if (VT != And.getValueType())
And = DAG.getNode(ISD::BITCAST, dl, VT, And);
SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Op, And);
SmallVector<SDValue, 8> Mask33(NumElts, Cst33);
SDValue M33 = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Mask33);
SmallVector<SDValue, 8> Twos(NumElts, DAG.getConstant(2, EltVT));
SDValue TwosV = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Twos);
Srl = DAG.getNode(ISD::SRL, dl, VT, Sub, TwosV);
if (NeedsBitcast) {
Srl = DAG.getNode(ISD::BITCAST, dl, BitcastVT, Srl);
M33 = DAG.getNode(ISD::BITCAST, dl, BitcastVT, M33);
Sub = DAG.getNode(ISD::BITCAST, dl, BitcastVT, Sub);
}
SDValue AndRHS = DAG.getNode(ISD::AND, dl, M33.getValueType(), Srl, M33);
SDValue AndLHS = DAG.getNode(ISD::AND, dl, M33.getValueType(), Sub, M33);
if (VT != AndRHS.getValueType()) {
AndRHS = DAG.getNode(ISD::BITCAST, dl, VT, AndRHS);
AndLHS = DAG.getNode(ISD::BITCAST, dl, VT, AndLHS);
}
SDValue Add = DAG.getNode(ISD::ADD, dl, VT, AndLHS, AndRHS);
SmallVector<SDValue, 8> Fours(NumElts, DAG.getConstant(4, EltVT));
SDValue FoursV = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Fours);
Srl = DAG.getNode(ISD::SRL, dl, VT, Add, FoursV);
Add = DAG.getNode(ISD::ADD, dl, VT, Add, Srl);
SmallVector<SDValue, 8> Mask0F(NumElts, Cst0F);
SDValue M0F = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Mask0F);
if (NeedsBitcast) {
Add = DAG.getNode(ISD::BITCAST, dl, BitcastVT, Add);
M0F = DAG.getNode(ISD::BITCAST, dl, BitcastVT, M0F);
}
And = DAG.getNode(ISD::AND, dl, M0F.getValueType(), Add, M0F);
if (VT != And.getValueType())
And = DAG.getNode(ISD::BITCAST, dl, VT, And);
Add = And;
SmallVector<SDValue, 8> Csts;
for (unsigned i = 8; i <= Len/2; i *= 2) {
Csts.assign(NumElts, DAG.getConstant(i, EltVT));
SDValue CstsV = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Csts);
Srl = DAG.getNode(ISD::SRL, dl, VT, Add, CstsV);
Add = DAG.getNode(ISD::ADD, dl, VT, Add, Srl);
Csts.clear();
}
SDValue Cst3F = DAG.getConstant(APInt(Len, Len == 32 ? 0x3F : 0x7F), EltVT);
SmallVector<SDValue, 8> Cst3FV(NumElts, Cst3F);
SDValue M3F = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Cst3FV);
if (NeedsBitcast) {
Add = DAG.getNode(ISD::BITCAST, dl, BitcastVT, Add);
M3F = DAG.getNode(ISD::BITCAST, dl, BitcastVT, M3F);
}
And = DAG.getNode(ISD::AND, dl, M3F.getValueType(), Add, M3F);
if (VT != And.getValueType())
And = DAG.getNode(ISD::BITCAST, dl, VT, And);
return And;
}
static SDValue LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) {
SDNode *Node = Op.getNode();
SDLoc dl(Node);
EVT T = Node->getValueType(0);
SDValue negOp = DAG.getNode(ISD::SUB, dl, T,
DAG.getConstant(0, T), Node->getOperand(2));
return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, dl,
cast<AtomicSDNode>(Node)->getMemoryVT(),
Node->getOperand(0),
Node->getOperand(1), negOp,
cast<AtomicSDNode>(Node)->getMemOperand(),
cast<AtomicSDNode>(Node)->getOrdering(),
cast<AtomicSDNode>(Node)->getSynchScope());
}
static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) {
SDNode *Node = Op.getNode();
SDLoc dl(Node);
EVT VT = cast<AtomicSDNode>(Node)->getMemoryVT();
if (cast<AtomicSDNode>(Node)->getOrdering() == SequentiallyConsistent ||
!DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl,
cast<AtomicSDNode>(Node)->getMemoryVT(),
Node->getOperand(0),
Node->getOperand(1), Node->getOperand(2),
cast<AtomicSDNode>(Node)->getMemOperand(),
cast<AtomicSDNode>(Node)->getOrdering(),
cast<AtomicSDNode>(Node)->getSynchScope());
return Swap.getValue(1);
}
return Op;
}
static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
EVT VT = Op.getNode()->getSimpleValueType(0);
if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
return SDValue();
SDVTList VTs = DAG.getVTList(VT, MVT::i32);
unsigned Opc;
bool ExtraOp = false;
switch (Op.getOpcode()) {
default: llvm_unreachable("Invalid code");
case ISD::ADDC: Opc = X86ISD::ADD; break;
case ISD::ADDE: Opc = X86ISD::ADC; ExtraOp = true; break;
case ISD::SUBC: Opc = X86ISD::SUB; break;
case ISD::SUBE: Opc = X86ISD::SBB; ExtraOp = true; break;
}
if (!ExtraOp)
return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0),
Op.getOperand(1));
return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0),
Op.getOperand(1), Op.getOperand(2));
}
static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
assert(Subtarget->isTargetDarwin() && Subtarget->is64Bit());
SDLoc dl(Op);
SDValue Arg = Op.getOperand(0);
EVT ArgVT = Arg.getValueType();
Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
Entry.Node = Arg;
Entry.Ty = ArgTy;
Entry.isSExt = false;
Entry.isZExt = false;
Args.push_back(Entry);
bool isF64 = ArgVT == MVT::f64;
const char *LibcallName = isF64 ? "__sincos_stret" : "__sincosf_stret";
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue Callee = DAG.getExternalSymbol(LibcallName, TLI.getPointerTy());
Type *RetTy = isF64
? (Type*)StructType::get(ArgTy, ArgTy, nullptr)
: (Type*)VectorType::get(ArgTy, 4);
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
.setCallee(CallingConv::C, RetTy, Callee, std::move(Args), 0);
std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
if (isF64)
return CallResult.first;
SDValue SinVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ArgVT,
CallResult.first, DAG.getIntPtrConstant(0));
SDValue CosVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ArgVT,
CallResult.first, DAG.getIntPtrConstant(1));
SDVTList Tys = DAG.getVTList(ArgVT, ArgVT);
return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, SinVal, CosVal);
}
SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default: llvm_unreachable("Should not custom lower this!");
case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op,DAG);
case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, Subtarget, DAG);
case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
return LowerCMP_SWAP(Op, Subtarget, DAG);
case ISD::CTPOP: return LowerCTPOP(Op, Subtarget, DAG);
case ISD::ATOMIC_LOAD_SUB: return LowerLOAD_SUB(Op,DAG);
case ISD::ATOMIC_STORE: return LowerATOMIC_STORE(Op,DAG);
case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
case ISD::VSELECT: return LowerVSELECT(Op, DAG);
case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op,Subtarget,DAG);
case ISD::INSERT_SUBVECTOR: return LowerINSERT_SUBVECTOR(Op, Subtarget,DAG);
case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG);
case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
case ISD::SHL_PARTS:
case ISD::SRA_PARTS:
case ISD::SRL_PARTS: return LowerShiftParts(Op, DAG);
case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
case ISD::ZERO_EXTEND: return LowerZERO_EXTEND(Op, Subtarget, DAG);
case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, Subtarget, DAG);
case ISD::ANY_EXTEND: return LowerANY_EXTEND(Op, Subtarget, DAG);
case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG);
case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
case ISD::LOAD: return LowerExtendedLoad(Op, Subtarget, DAG);
case ISD::FABS:
case ISD::FNEG: return LowerFABSorFNEG(Op, DAG);
case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
case ISD::FGETSIGN: return LowerFGETSIGN(Op, DAG);
case ISD::SETCC: return LowerSETCC(Op, DAG);
case ISD::SELECT: return LowerSELECT(Op, DAG);
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
case ISD::JumpTable: return LowerJumpTable(Op, DAG);
case ISD::VASTART: return LowerVASTART(Op, DAG);
case ISD::VAARG: return LowerVAARG(Op, DAG);
case ISD::VACOPY: return LowerVACOPY(Op, Subtarget, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, Subtarget, DAG);
case ISD::INTRINSIC_VOID:
case ISD::INTRINSIC_W_CHAIN: return LowerINTRINSIC_W_CHAIN(Op, Subtarget, DAG);
case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
case ISD::FRAME_TO_ARGS_OFFSET:
return LowerFRAME_TO_ARGS_OFFSET(Op, DAG);
case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG);
case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
case ISD::CTLZ: return LowerCTLZ(Op, DAG);
case ISD::CTLZ_ZERO_UNDEF: return LowerCTLZ_ZERO_UNDEF(Op, DAG);
case ISD::CTTZ: return LowerCTTZ(Op, DAG);
case ISD::MUL: return LowerMUL(Op, Subtarget, DAG);
case ISD::UMUL_LOHI:
case ISD::SMUL_LOHI: return LowerMUL_LOHI(Op, Subtarget, DAG);
case ISD::SRA:
case ISD::SRL:
case ISD::SHL: return LowerShift(Op, Subtarget, DAG);
case ISD::SADDO:
case ISD::UADDO:
case ISD::SSUBO:
case ISD::USUBO:
case ISD::SMULO:
case ISD::UMULO: return LowerXALUO(Op, DAG);
case ISD::READCYCLECOUNTER: return LowerREADCYCLECOUNTER(Op, Subtarget,DAG);
case ISD::BITCAST: return LowerBITCAST(Op, Subtarget, DAG);
case ISD::ADDC:
case ISD::ADDE:
case ISD::SUBC:
case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
case ISD::ADD: return LowerADD(Op, DAG);
case ISD::SUB: return LowerSUB(Op, DAG);
case ISD::FSINCOS: return LowerFSINCOS(Op, Subtarget, DAG);
}
}
void X86TargetLowering::ReplaceNodeResults(SDNode *N,
SmallVectorImpl<SDValue>&Results,
SelectionDAG &DAG) const {
SDLoc dl(N);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
switch (N->getOpcode()) {
default:
llvm_unreachable("Do not know how to custom type legalize this operation!");
case X86ISD::FMINC:
case X86ISD::FMIN:
case X86ISD::FMAXC:
case X86ISD::FMAX: {
EVT VT = N->getValueType(0);
if (VT != MVT::v2f32)
llvm_unreachable("Unexpected type (!= v2f32) on FMIN/FMAX.");
SDValue UNDEF = DAG.getUNDEF(VT);
SDValue LHS = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32,
N->getOperand(0), UNDEF);
SDValue RHS = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32,
N->getOperand(1), UNDEF);
Results.push_back(DAG.getNode(N->getOpcode(), dl, MVT::v4f32, LHS, RHS));
return;
}
case ISD::SIGN_EXTEND_INREG:
case ISD::ADDC:
case ISD::ADDE:
case ISD::SUBC:
case ISD::SUBE:
return;
case ISD::SDIV:
case ISD::UDIV:
case ISD::SREM:
case ISD::UREM:
case ISD::SDIVREM:
case ISD::UDIVREM: {
SDValue V = LowerWin64_i128OP(SDValue(N,0), DAG);
Results.push_back(V);
return;
}
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT: {
bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
if (!IsSigned && !isIntegerTypeFTOL(SDValue(N, 0).getValueType()))
return;
std::pair<SDValue,SDValue> Vals =
FP_TO_INTHelper(SDValue(N, 0), DAG, IsSigned, true);
SDValue FIST = Vals.first, StackSlot = Vals.second;
if (FIST.getNode()) {
EVT VT = N->getValueType(0);
if (StackSlot.getNode())
Results.push_back(DAG.getLoad(VT, dl, FIST, StackSlot,
MachinePointerInfo(),
false, false, false, 0));
else
Results.push_back(FIST);
}
return;
}
case ISD::UINT_TO_FP: {
assert(Subtarget->hasSSE2() && "Requires at least SSE2!");
if (N->getOperand(0).getValueType() != MVT::v2i32 ||
N->getValueType(0) != MVT::v2f32)
return;
SDValue ZExtIn = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v2i64,
N->getOperand(0));
SDValue Bias = DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL),
MVT::f64);
SDValue VBias = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2f64, Bias, Bias);
SDValue Or = DAG.getNode(ISD::OR, dl, MVT::v2i64, ZExtIn,
DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, VBias));
Or = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Or);
SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, Or, VBias);
Results.push_back(DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, Sub));
return;
}
case ISD::FP_ROUND: {
if (!TLI.isTypeLegal(N->getOperand(0).getValueType()))
return;
SDValue V = DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, N->getOperand(0));
Results.push_back(V);
return;
}
case ISD::INTRINSIC_W_CHAIN: {
unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
switch (IntNo) {
default : llvm_unreachable("Do not know how to custom type "
"legalize this intrinsic operation!");
case Intrinsic::x86_rdtsc:
return getReadTimeStampCounter(N, dl, X86ISD::RDTSC_DAG, DAG, Subtarget,
Results);
case Intrinsic::x86_rdtscp:
return getReadTimeStampCounter(N, dl, X86ISD::RDTSCP_DAG, DAG, Subtarget,
Results);
case Intrinsic::x86_rdpmc:
return getReadPerformanceCounter(N, dl, DAG, Subtarget, Results);
}
}
case ISD::READCYCLECOUNTER: {
return getReadTimeStampCounter(N, dl, X86ISD::RDTSC_DAG, DAG, Subtarget,
Results);
}
case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: {
EVT T = N->getValueType(0);
assert((T == MVT::i64 || T == MVT::i128) && "can only expand cmpxchg pair");
bool Regs64bit = T == MVT::i128;
EVT HalfT = Regs64bit ? MVT::i64 : MVT::i32;
SDValue cpInL, cpInH;
cpInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(2),
DAG.getConstant(0, HalfT));
cpInH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(2),
DAG.getConstant(1, HalfT));
cpInL = DAG.getCopyToReg(N->getOperand(0), dl,
Regs64bit ? X86::RAX : X86::EAX,
cpInL, SDValue());
cpInH = DAG.getCopyToReg(cpInL.getValue(0), dl,
Regs64bit ? X86::RDX : X86::EDX,
cpInH, cpInL.getValue(1));
SDValue swapInL, swapInH;
swapInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(3),
DAG.getConstant(0, HalfT));
swapInH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(3),
DAG.getConstant(1, HalfT));
swapInL = DAG.getCopyToReg(cpInH.getValue(0), dl,
Regs64bit ? X86::RBX : X86::EBX,
swapInL, cpInH.getValue(1));
swapInH = DAG.getCopyToReg(swapInL.getValue(0), dl,
Regs64bit ? X86::RCX : X86::ECX,
swapInH, swapInL.getValue(1));
SDValue Ops[] = { swapInH.getValue(0),
N->getOperand(1),
swapInH.getValue(1) };
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
unsigned Opcode = Regs64bit ? X86ISD::LCMPXCHG16_DAG :
X86ISD::LCMPXCHG8_DAG;
SDValue Result = DAG.getMemIntrinsicNode(Opcode, dl, Tys, Ops, T, MMO);
SDValue cpOutL = DAG.getCopyFromReg(Result.getValue(0), dl,
Regs64bit ? X86::RAX : X86::EAX,
HalfT, Result.getValue(1));
SDValue cpOutH = DAG.getCopyFromReg(cpOutL.getValue(1), dl,
Regs64bit ? X86::RDX : X86::EDX,
HalfT, cpOutL.getValue(2));
SDValue OpsF[] = { cpOutL.getValue(0), cpOutH.getValue(0)};
SDValue EFLAGS = DAG.getCopyFromReg(cpOutH.getValue(1), dl, X86::EFLAGS,
MVT::i32, cpOutH.getValue(2));
SDValue Success =
DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
DAG.getConstant(X86::COND_E, MVT::i8), EFLAGS);
Success = DAG.getZExtOrTrunc(Success, dl, N->getValueType(1));
Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, T, OpsF));
Results.push_back(Success);
Results.push_back(EFLAGS.getValue(1));
return;
}
case ISD::ATOMIC_SWAP:
case ISD::ATOMIC_LOAD_ADD:
case ISD::ATOMIC_LOAD_SUB:
case ISD::ATOMIC_LOAD_AND:
case ISD::ATOMIC_LOAD_OR:
case ISD::ATOMIC_LOAD_XOR:
case ISD::ATOMIC_LOAD_NAND:
case ISD::ATOMIC_LOAD_MIN:
case ISD::ATOMIC_LOAD_MAX:
case ISD::ATOMIC_LOAD_UMIN:
case ISD::ATOMIC_LOAD_UMAX:
case ISD::ATOMIC_LOAD: {
break;
}
case ISD::BITCAST: {
assert(Subtarget->hasSSE2() && "Requires at least SSE2!");
EVT DstVT = N->getValueType(0);
EVT SrcVT = N->getOperand(0)->getValueType(0);
if (SrcVT != MVT::f64 ||
(DstVT != MVT::v2i32 && DstVT != MVT::v4i16 && DstVT != MVT::v8i8))
return;
unsigned NumElts = DstVT.getVectorNumElements();
EVT SVT = DstVT.getVectorElementType();
EVT WiderVT = EVT::getVectorVT(*DAG.getContext(), SVT, NumElts * 2);
SDValue Expanded = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
MVT::v2f64, N->getOperand(0));
SDValue ToVecInt = DAG.getNode(ISD::BITCAST, dl, WiderVT, Expanded);
if (ExperimentalVectorWideningLegalization) {
Results.push_back(ToVecInt);
return;
}
SmallVector<SDValue, 8> Elts;
for (unsigned i = 0, e = NumElts; i != e; ++i)
Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SVT,
ToVecInt, DAG.getIntPtrConstant(i)));
Results.push_back(DAG.getNode(ISD::BUILD_VECTOR, dl, DstVT, Elts));
}
}
}
const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (Opcode) {
default: return nullptr;
case X86ISD::BSF: return "X86ISD::BSF";
case X86ISD::BSR: return "X86ISD::BSR";
case X86ISD::SHLD: return "X86ISD::SHLD";
case X86ISD::SHRD: return "X86ISD::SHRD";
case X86ISD::FAND: return "X86ISD::FAND";
case X86ISD::FANDN: return "X86ISD::FANDN";
case X86ISD::FOR: return "X86ISD::FOR";
case X86ISD::FXOR: return "X86ISD::FXOR";
case X86ISD::FSRL: return "X86ISD::FSRL";
case X86ISD::FILD: return "X86ISD::FILD";
case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG";
case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM";
case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM";
case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM";
case X86ISD::FLD: return "X86ISD::FLD";
case X86ISD::FST: return "X86ISD::FST";
case X86ISD::CALL: return "X86ISD::CALL";
case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG";
case X86ISD::RDTSCP_DAG: return "X86ISD::RDTSCP_DAG";
case X86ISD::RDPMC_DAG: return "X86ISD::RDPMC_DAG";
case X86ISD::BT: return "X86ISD::BT";
case X86ISD::CMP: return "X86ISD::CMP";
case X86ISD::COMI: return "X86ISD::COMI";
case X86ISD::UCOMI: return "X86ISD::UCOMI";
case X86ISD::CMPM: return "X86ISD::CMPM";
case X86ISD::CMPMU: return "X86ISD::CMPMU";
case X86ISD::SETCC: return "X86ISD::SETCC";
case X86ISD::SETCC_CARRY: return "X86ISD::SETCC_CARRY";
case X86ISD::FSETCC: return "X86ISD::FSETCC";
case X86ISD::CMOV: return "X86ISD::CMOV";
case X86ISD::BRCOND: return "X86ISD::BRCOND";
case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG";
case X86ISD::REP_STOS: return "X86ISD::REP_STOS";
case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS";
case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg";
case X86ISD::Wrapper: return "X86ISD::Wrapper";
case X86ISD::WrapperRIP: return "X86ISD::WrapperRIP";
case X86ISD::PEXTRB: return "X86ISD::PEXTRB";
case X86ISD::PEXTRW: return "X86ISD::PEXTRW";
case X86ISD::INSERTPS: return "X86ISD::INSERTPS";
case X86ISD::PINSRB: return "X86ISD::PINSRB";
case X86ISD::PINSRW: return "X86ISD::PINSRW";
case X86ISD::PSHUFB: return "X86ISD::PSHUFB";
case X86ISD::ANDNP: return "X86ISD::ANDNP";
case X86ISD::PSIGN: return "X86ISD::PSIGN";
case X86ISD::BLENDI: return "X86ISD::BLENDI";
case X86ISD::SHRUNKBLEND: return "X86ISD::SHRUNKBLEND";
case X86ISD::SUBUS: return "X86ISD::SUBUS";
case X86ISD::HADD: return "X86ISD::HADD";
case X86ISD::HSUB: return "X86ISD::HSUB";
case X86ISD::FHADD: return "X86ISD::FHADD";
case X86ISD::FHSUB: return "X86ISD::FHSUB";
case X86ISD::UMAX: return "X86ISD::UMAX";
case X86ISD::UMIN: return "X86ISD::UMIN";
case X86ISD::SMAX: return "X86ISD::SMAX";
case X86ISD::SMIN: return "X86ISD::SMIN";
case X86ISD::FMAX: return "X86ISD::FMAX";
case X86ISD::FMIN: return "X86ISD::FMIN";
case X86ISD::FMAXC: return "X86ISD::FMAXC";
case X86ISD::FMINC: return "X86ISD::FMINC";
case X86ISD::FRSQRT: return "X86ISD::FRSQRT";
case X86ISD::FRCP: return "X86ISD::FRCP";
case X86ISD::TLSADDR: return "X86ISD::TLSADDR";
case X86ISD::TLSBASEADDR: return "X86ISD::TLSBASEADDR";
case X86ISD::TLSCALL: return "X86ISD::TLSCALL";
case X86ISD::EH_SJLJ_SETJMP: return "X86ISD::EH_SJLJ_SETJMP";
case X86ISD::EH_SJLJ_LONGJMP: return "X86ISD::EH_SJLJ_LONGJMP";
case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN";
case X86ISD::TC_RETURN: return "X86ISD::TC_RETURN";
case X86ISD::FNSTCW16m: return "X86ISD::FNSTCW16m";
case X86ISD::FNSTSW16r: return "X86ISD::FNSTSW16r";
case X86ISD::LCMPXCHG_DAG: return "X86ISD::LCMPXCHG_DAG";
case X86ISD::LCMPXCHG8_DAG: return "X86ISD::LCMPXCHG8_DAG";
case X86ISD::LCMPXCHG16_DAG: return "X86ISD::LCMPXCHG16_DAG";
case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL";
case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD";
case X86ISD::VZEXT: return "X86ISD::VZEXT";
case X86ISD::VSEXT: return "X86ISD::VSEXT";
case X86ISD::VTRUNC: return "X86ISD::VTRUNC";
case X86ISD::VTRUNCM: return "X86ISD::VTRUNCM";
case X86ISD::VINSERT: return "X86ISD::VINSERT";
case X86ISD::VFPEXT: return "X86ISD::VFPEXT";
case X86ISD::VFPROUND: return "X86ISD::VFPROUND";
case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ";
case X86ISD::VSRLDQ: return "X86ISD::VSRLDQ";
case X86ISD::VSHL: return "X86ISD::VSHL";
case X86ISD::VSRL: return "X86ISD::VSRL";
case X86ISD::VSRA: return "X86ISD::VSRA";
case X86ISD::VSHLI: return "X86ISD::VSHLI";
case X86ISD::VSRLI: return "X86ISD::VSRLI";
case X86ISD::VSRAI: return "X86ISD::VSRAI";
case X86ISD::CMPP: return "X86ISD::CMPP";
case X86ISD::PCMPEQ: return "X86ISD::PCMPEQ";
case X86ISD::PCMPGT: return "X86ISD::PCMPGT";
case X86ISD::PCMPEQM: return "X86ISD::PCMPEQM";
case X86ISD::PCMPGTM: return "X86ISD::PCMPGTM";
case X86ISD::ADD: return "X86ISD::ADD";
case X86ISD::SUB: return "X86ISD::SUB";
case X86ISD::ADC: return "X86ISD::ADC";
case X86ISD::SBB: return "X86ISD::SBB";
case X86ISD::SMUL: return "X86ISD::SMUL";
case X86ISD::UMUL: return "X86ISD::UMUL";
case X86ISD::SMUL8: return "X86ISD::SMUL8";
case X86ISD::UMUL8: return "X86ISD::UMUL8";
case X86ISD::SDIVREM8_SEXT_HREG: return "X86ISD::SDIVREM8_SEXT_HREG";
case X86ISD::UDIVREM8_ZEXT_HREG: return "X86ISD::UDIVREM8_ZEXT_HREG";
case X86ISD::INC: return "X86ISD::INC";
case X86ISD::DEC: return "X86ISD::DEC";
case X86ISD::OR: return "X86ISD::OR";
case X86ISD::XOR: return "X86ISD::XOR";
case X86ISD::AND: return "X86ISD::AND";
case X86ISD::BEXTR: return "X86ISD::BEXTR";
case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM";
case X86ISD::PTEST: return "X86ISD::PTEST";
case X86ISD::TESTP: return "X86ISD::TESTP";
case X86ISD::TESTM: return "X86ISD::TESTM";
case X86ISD::TESTNM: return "X86ISD::TESTNM";
case X86ISD::KORTEST: return "X86ISD::KORTEST";
case X86ISD::PACKSS: return "X86ISD::PACKSS";
case X86ISD::PACKUS: return "X86ISD::PACKUS";
case X86ISD::PALIGNR: return "X86ISD::PALIGNR";
case X86ISD::VALIGN: return "X86ISD::VALIGN";
case X86ISD::PSHUFD: return "X86ISD::PSHUFD";
case X86ISD::PSHUFHW: return "X86ISD::PSHUFHW";
case X86ISD::PSHUFLW: return "X86ISD::PSHUFLW";
case X86ISD::SHUFP: return "X86ISD::SHUFP";
case X86ISD::MOVLHPS: return "X86ISD::MOVLHPS";
case X86ISD::MOVLHPD: return "X86ISD::MOVLHPD";
case X86ISD::MOVHLPS: return "X86ISD::MOVHLPS";
case X86ISD::MOVLPS: return "X86ISD::MOVLPS";
case X86ISD::MOVLPD: return "X86ISD::MOVLPD";
case X86ISD::MOVDDUP: return "X86ISD::MOVDDUP";
case X86ISD::MOVSHDUP: return "X86ISD::MOVSHDUP";
case X86ISD::MOVSLDUP: return "X86ISD::MOVSLDUP";
case X86ISD::MOVSD: return "X86ISD::MOVSD";
case X86ISD::MOVSS: return "X86ISD::MOVSS";
case X86ISD::UNPCKL: return "X86ISD::UNPCKL";
case X86ISD::UNPCKH: return "X86ISD::UNPCKH";
case X86ISD::VBROADCAST: return "X86ISD::VBROADCAST";
case X86ISD::VBROADCASTM: return "X86ISD::VBROADCASTM";
case X86ISD::VEXTRACT: return "X86ISD::VEXTRACT";
case X86ISD::VPERMILPI: return "X86ISD::VPERMILPI";
case X86ISD::VPERM2X128: return "X86ISD::VPERM2X128";
case X86ISD::VPERMV: return "X86ISD::VPERMV";
case X86ISD::VPERMV3: return "X86ISD::VPERMV3";
case X86ISD::VPERMIV3: return "X86ISD::VPERMIV3";
case X86ISD::VPERMI: return "X86ISD::VPERMI";
case X86ISD::PMULUDQ: return "X86ISD::PMULUDQ";
case X86ISD::PMULDQ: return "X86ISD::PMULDQ";
case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS";
case X86ISD::VAARG_64: return "X86ISD::VAARG_64";
case X86ISD::WIN_ALLOCA: return "X86ISD::WIN_ALLOCA";
case X86ISD::MEMBARRIER: return "X86ISD::MEMBARRIER";
case X86ISD::SEG_ALLOCA: return "X86ISD::SEG_ALLOCA";
case X86ISD::WIN_FTOL: return "X86ISD::WIN_FTOL";
case X86ISD::SAHF: return "X86ISD::SAHF";
case X86ISD::RDRAND: return "X86ISD::RDRAND";
case X86ISD::RDSEED: return "X86ISD::RDSEED";
case X86ISD::FMADD: return "X86ISD::FMADD";
case X86ISD::FMSUB: return "X86ISD::FMSUB";
case X86ISD::FNMADD: return "X86ISD::FNMADD";
case X86ISD::FNMSUB: return "X86ISD::FNMSUB";
case X86ISD::FMADDSUB: return "X86ISD::FMADDSUB";
case X86ISD::FMSUBADD: return "X86ISD::FMSUBADD";
case X86ISD::PCMPESTRI: return "X86ISD::PCMPESTRI";
case X86ISD::PCMPISTRI: return "X86ISD::PCMPISTRI";
case X86ISD::XTEST: return "X86ISD::XTEST";
case X86ISD::COMPRESS: return "X86ISD::COMPRESS";
case X86ISD::EXPAND: return "X86ISD::EXPAND";
case X86ISD::SELECT: return "X86ISD::SELECT";
case X86ISD::ADDSUB: return "X86ISD::ADDSUB";
case X86ISD::RCP28: return "X86ISD::RCP28";
case X86ISD::RSQRT28: return "X86ISD::RSQRT28";
}
}
bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM,
Type *Ty) const {
CodeModel::Model M = getTargetMachine().getCodeModel();
Reloc::Model R = getTargetMachine().getRelocationModel();
if (!X86::isOffsetSuitableForCodeModel(AM.BaseOffs, M, AM.BaseGV != nullptr))
return false;
if (AM.BaseGV) {
unsigned GVFlags =
Subtarget->ClassifyGlobalReference(AM.BaseGV, getTargetMachine());
if (isGlobalStubReference(GVFlags))
return false;
if (AM.HasBaseReg && isGlobalRelativeToPICBase(GVFlags))
return false;
if ((M != CodeModel::Small || R != Reloc::Static) &&
Subtarget->is64Bit() && (AM.BaseOffs || AM.Scale > 1))
return false;
}
switch (AM.Scale) {
case 0:
case 1:
case 2:
case 4:
case 8:
break;
case 3:
case 5:
case 9:
if (AM.HasBaseReg)
return false;
break;
default: return false;
}
return true;
}
bool X86TargetLowering::isVectorShiftByScalarCheap(Type *Ty) const {
unsigned Bits = Ty->getScalarSizeInBits();
if (Bits == 8)
return false;
if (Subtarget->hasInt256() && (Bits == 32 || Bits == 64))
return false;
return true;
}
bool X86TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
return false;
unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
return NumBits1 > NumBits2;
}
bool X86TargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const {
if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
return false;
if (!isTypeLegal(EVT::getEVT(Ty1)))
return false;
assert(Ty1->getPrimitiveSizeInBits() <= 64 && "i128 is probably not a noop");
return true;
}
bool X86TargetLowering::isLegalICmpImmediate(int64_t Imm) const {
return isInt<32>(Imm);
}
bool X86TargetLowering::isLegalAddImmediate(int64_t Imm) const {
return isInt<32>(Imm);
}
bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
if (!VT1.isInteger() || !VT2.isInteger())
return false;
unsigned NumBits1 = VT1.getSizeInBits();
unsigned NumBits2 = VT2.getSizeInBits();
return NumBits1 > NumBits2;
}
bool X86TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const {
return Ty1->isIntegerTy(32) && Ty2->isIntegerTy(64) && Subtarget->is64Bit();
}
bool X86TargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
return VT1 == MVT::i32 && VT2 == MVT::i64 && Subtarget->is64Bit();
}
bool X86TargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
EVT VT1 = Val.getValueType();
if (isZExtFree(VT1, VT2))
return true;
if (Val.getOpcode() != ISD::LOAD)
return false;
if (!VT1.isSimple() || !VT1.isInteger() ||
!VT2.isSimple() || !VT2.isInteger())
return false;
switch (VT1.getSimpleVT().SimpleTy) {
default: break;
case MVT::i8:
case MVT::i16:
case MVT::i32:
return true;
}
return false;
}
bool X86TargetLowering::isVectorLoadExtDesirable(SDValue) const { return true; }
bool
X86TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
if (!(Subtarget->hasFMA() || Subtarget->hasFMA4()))
return false;
VT = VT.getScalarType();
if (!VT.isSimple())
return false;
switch (VT.getSimpleVT().SimpleTy) {
case MVT::f32:
case MVT::f64:
return true;
default:
break;
}
return false;
}
bool X86TargetLowering::isNarrowingProfitable(EVT VT1, EVT VT2) const {
return !(VT1 == MVT::i32 && VT2 == MVT::i16);
}
bool
X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
EVT VT) const {
if (!VT.isSimple())
return false;
MVT SVT = VT.getSimpleVT();
if (VT.getSizeInBits() == 64)
return false;
if (ExperimentalVectorShuffleLegality)
return isTypeLegal(SVT);
if ((SVT.is128BitVector() && Subtarget->hasSSSE3()) ||
(SVT.is256BitVector() && Subtarget->hasInt256())) {
bool isLegal = true;
for (unsigned I = 0, E = M.size(); I != E; ++I) {
if (M[I] >= (int)SVT.getVectorNumElements() ||
ShuffleCrosses128bitLane(SVT, I, M[I])) {
isLegal = false;
break;
}
}
if (isLegal)
return true;
}
return (SVT.getVectorNumElements() == 2 ||
ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
isMOVLMask(M, SVT) ||
isCommutedMOVLMask(M, SVT) ||
isMOVHLPSMask(M, SVT) ||
isSHUFPMask(M, SVT) ||
isSHUFPMask(M, SVT, true) ||
isPSHUFDMask(M, SVT) ||
isPSHUFDMask(M, SVT, true) ||
isPSHUFHWMask(M, SVT, Subtarget->hasInt256()) ||
isPSHUFLWMask(M, SVT, Subtarget->hasInt256()) ||
isPALIGNRMask(M, SVT, Subtarget) ||
isUNPCKLMask(M, SVT, Subtarget->hasInt256()) ||
isUNPCKHMask(M, SVT, Subtarget->hasInt256()) ||
isUNPCKL_v_undef_Mask(M, SVT, Subtarget->hasInt256()) ||
isUNPCKH_v_undef_Mask(M, SVT, Subtarget->hasInt256()) ||
isBlendMask(M, SVT, Subtarget->hasSSE41(), Subtarget->hasInt256()) ||
(Subtarget->hasSSE41() && isINSERTPSMask(M, SVT)));
}
bool
X86TargetLowering::isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask,
EVT VT) const {
if (!VT.isSimple())
return false;
MVT SVT = VT.getSimpleVT();
if (ExperimentalVectorShuffleLegality)
return isShuffleMaskLegal(Mask, VT);
unsigned NumElts = SVT.getVectorNumElements();
if (NumElts == 2)
return true;
if (NumElts == 4 && SVT.is128BitVector()) {
return (isMOVLMask(Mask, SVT) ||
isCommutedMOVLMask(Mask, SVT, true) ||
isSHUFPMask(Mask, SVT) ||
isSHUFPMask(Mask, SVT, true) ||
isBlendMask(Mask, SVT, Subtarget->hasSSE41(),
Subtarget->hasInt256()));
}
return false;
}
static MachineBasicBlock *EmitXBegin(MachineInstr *MI, MachineBasicBlock *MBB,
const TargetInstrInfo *TII) {
DebugLoc DL = MI->getDebugLoc();
const BasicBlock *BB = MBB->getBasicBlock();
MachineFunction::iterator I = MBB;
++I;
MachineBasicBlock *thisMBB = MBB;
MachineFunction *MF = MBB->getParent();
MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
MF->insert(I, mainMBB);
MF->insert(I, sinkMBB);
sinkMBB->splice(sinkMBB->begin(), MBB,
std::next(MachineBasicBlock::iterator(MI)), MBB->end());
sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
BuildMI(thisMBB, DL, TII->get(X86::XBEGIN_4)).addMBB(sinkMBB);
thisMBB->addSuccessor(mainMBB);
thisMBB->addSuccessor(sinkMBB);
BuildMI(mainMBB, DL, TII->get(X86::MOV32ri), X86::EAX).addImm(-1);
mainMBB->addSuccessor(sinkMBB);
sinkMBB->addLiveIn(X86::EAX);
BuildMI(*sinkMBB, sinkMBB->begin(), DL,
TII->get(TargetOpcode::COPY), MI->getOperand(0).getReg())
.addReg(X86::EAX);
MI->eraseFromParent();
return sinkMBB;
}
static MachineBasicBlock *EmitPCMPSTRM(MachineInstr *MI, MachineBasicBlock *BB,
const TargetInstrInfo *TII) {
unsigned Opc;
switch (MI->getOpcode()) {
default: llvm_unreachable("illegal opcode!");
case X86::PCMPISTRM128REG: Opc = X86::PCMPISTRM128rr; break;
case X86::VPCMPISTRM128REG: Opc = X86::VPCMPISTRM128rr; break;
case X86::PCMPISTRM128MEM: Opc = X86::PCMPISTRM128rm; break;
case X86::VPCMPISTRM128MEM: Opc = X86::VPCMPISTRM128rm; break;
case X86::PCMPESTRM128REG: Opc = X86::PCMPESTRM128rr; break;
case X86::VPCMPESTRM128REG: Opc = X86::VPCMPESTRM128rr; break;
case X86::PCMPESTRM128MEM: Opc = X86::PCMPESTRM128rm; break;
case X86::VPCMPESTRM128MEM: Opc = X86::VPCMPESTRM128rm; break;
}
DebugLoc dl = MI->getDebugLoc();
MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(Opc));
unsigned NumArgs = MI->getNumOperands();
for (unsigned i = 1; i < NumArgs; ++i) {
MachineOperand &Op = MI->getOperand(i);
if (!(Op.isReg() && Op.isImplicit()))
MIB.addOperand(Op);
}
if (MI->hasOneMemOperand())
MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
BuildMI(*BB, MI, dl,
TII->get(TargetOpcode::COPY), MI->getOperand(0).getReg())
.addReg(X86::XMM0);
MI->eraseFromParent();
return BB;
}
static MachineBasicBlock *EmitPCMPSTRI(MachineInstr *MI, MachineBasicBlock *BB,
const TargetInstrInfo *TII) {
unsigned Opc;
switch (MI->getOpcode()) {
default: llvm_unreachable("illegal opcode!");
case X86::PCMPISTRIREG: Opc = X86::PCMPISTRIrr; break;
case X86::VPCMPISTRIREG: Opc = X86::VPCMPISTRIrr; break;
case X86::PCMPISTRIMEM: Opc = X86::PCMPISTRIrm; break;
case X86::VPCMPISTRIMEM: Opc = X86::VPCMPISTRIrm; break;
case X86::PCMPESTRIREG: Opc = X86::PCMPESTRIrr; break;
case X86::VPCMPESTRIREG: Opc = X86::VPCMPESTRIrr; break;
case X86::PCMPESTRIMEM: Opc = X86::PCMPESTRIrm; break;
case X86::VPCMPESTRIMEM: Opc = X86::VPCMPESTRIrm; break;
}
DebugLoc dl = MI->getDebugLoc();
MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(Opc));
unsigned NumArgs = MI->getNumOperands(); for (unsigned i = 1; i < NumArgs; ++i) {
MachineOperand &Op = MI->getOperand(i);
if (!(Op.isReg() && Op.isImplicit()))
MIB.addOperand(Op);
}
if (MI->hasOneMemOperand())
MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
BuildMI(*BB, MI, dl,
TII->get(TargetOpcode::COPY), MI->getOperand(0).getReg())
.addReg(X86::ECX);
MI->eraseFromParent();
return BB;
}
static MachineBasicBlock *EmitMonitor(MachineInstr *MI, MachineBasicBlock *BB,
const X86Subtarget *Subtarget) {
DebugLoc dl = MI->getDebugLoc();
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
unsigned MemOpc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r;
unsigned MemReg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(MemOpc), MemReg);
for (int i = 0; i < X86::AddrNumOperands; ++i)
MIB.addOperand(MI->getOperand(i));
unsigned ValOps = X86::AddrNumOperands;
BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::ECX)
.addReg(MI->getOperand(ValOps).getReg());
BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::EDX)
.addReg(MI->getOperand(ValOps+1).getReg());
BuildMI(*BB, MI, dl, TII->get(X86::MONITORrrr));
MI->eraseFromParent(); return BB;
}
MachineBasicBlock *
X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr *MI,
MachineBasicBlock *MBB) const {
assert(MI->getNumOperands() == 10 && "VAARG_64 should have 10 operands!");
assert(X86::AddrNumOperands == 5 && "VAARG_64 assumes 5 address operands");
unsigned DestReg = MI->getOperand(0).getReg();
MachineOperand &Base = MI->getOperand(1);
MachineOperand &Scale = MI->getOperand(2);
MachineOperand &Index = MI->getOperand(3);
MachineOperand &Disp = MI->getOperand(4);
MachineOperand &Segment = MI->getOperand(5);
unsigned ArgSize = MI->getOperand(6).getImm();
unsigned ArgMode = MI->getOperand(7).getImm();
unsigned Align = MI->getOperand(8).getImm();
assert(MI->hasOneMemOperand() && "Expected VAARG_64 to have one memoperand");
MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
const TargetRegisterClass *AddrRegClass = getRegClassFor(MVT::i64);
const TargetRegisterClass *OffsetRegClass = getRegClassFor(MVT::i32);
DebugLoc DL = MI->getDebugLoc();
unsigned TotalNumIntRegs = 6;
unsigned TotalNumXMMRegs = 8;
bool UseGPOffset = (ArgMode == 1);
bool UseFPOffset = (ArgMode == 2);
unsigned MaxOffset = TotalNumIntRegs * 8 +
(UseFPOffset ? TotalNumXMMRegs * 16 : 0);
unsigned ArgSizeA8 = (ArgSize + 7) & ~7;
bool NeedsAlign = (Align > 8);
MachineBasicBlock *thisMBB = MBB;
MachineBasicBlock *overflowMBB;
MachineBasicBlock *offsetMBB;
MachineBasicBlock *endMBB;
unsigned OffsetDestReg = 0; unsigned OverflowDestReg = 0; unsigned OffsetReg = 0;
if (!UseGPOffset && !UseFPOffset) {
OffsetDestReg = 0; OverflowDestReg = DestReg;
offsetMBB = nullptr;
overflowMBB = thisMBB;
endMBB = thisMBB;
} else {
OffsetDestReg = MRI.createVirtualRegister(AddrRegClass);
OverflowDestReg = MRI.createVirtualRegister(AddrRegClass);
const BasicBlock *LLVM_BB = MBB->getBasicBlock();
MachineFunction *MF = MBB->getParent();
overflowMBB = MF->CreateMachineBasicBlock(LLVM_BB);
offsetMBB = MF->CreateMachineBasicBlock(LLVM_BB);
endMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineFunction::iterator MBBIter = MBB;
++MBBIter;
MF->insert(MBBIter, offsetMBB);
MF->insert(MBBIter, overflowMBB);
MF->insert(MBBIter, endMBB);
endMBB->splice(endMBB->begin(), thisMBB,
std::next(MachineBasicBlock::iterator(MI)), thisMBB->end());
endMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
thisMBB->addSuccessor(offsetMBB);
thisMBB->addSuccessor(overflowMBB);
offsetMBB->addSuccessor(endMBB);
overflowMBB->addSuccessor(endMBB);
OffsetReg = MRI.createVirtualRegister(OffsetRegClass);
BuildMI(thisMBB, DL, TII->get(X86::MOV32rm), OffsetReg)
.addOperand(Base)
.addOperand(Scale)
.addOperand(Index)
.addDisp(Disp, UseFPOffset ? 4 : 0)
.addOperand(Segment)
.setMemRefs(MMOBegin, MMOEnd);
BuildMI(thisMBB, DL, TII->get(X86::CMP32ri))
.addReg(OffsetReg)
.addImm(MaxOffset + 8 - ArgSizeA8);
BuildMI(thisMBB, DL, TII->get(X86::GetCondBranchFromCond(X86::COND_AE)))
.addMBB(overflowMBB);
}
if (offsetMBB) {
assert(OffsetReg != 0);
unsigned RegSaveReg = MRI.createVirtualRegister(AddrRegClass);
BuildMI(offsetMBB, DL, TII->get(X86::MOV64rm), RegSaveReg)
.addOperand(Base)
.addOperand(Scale)
.addOperand(Index)
.addDisp(Disp, 16)
.addOperand(Segment)
.setMemRefs(MMOBegin, MMOEnd);
unsigned OffsetReg64 = MRI.createVirtualRegister(AddrRegClass);
BuildMI(offsetMBB, DL, TII->get(X86::SUBREG_TO_REG), OffsetReg64)
.addImm(0)
.addReg(OffsetReg)
.addImm(X86::sub_32bit);
BuildMI(offsetMBB, DL, TII->get(X86::ADD64rr), OffsetDestReg)
.addReg(OffsetReg64)
.addReg(RegSaveReg);
unsigned NextOffsetReg = MRI.createVirtualRegister(OffsetRegClass);
BuildMI(offsetMBB, DL, TII->get(X86::ADD32ri), NextOffsetReg)
.addReg(OffsetReg)
.addImm(UseFPOffset ? 16 : 8);
BuildMI(offsetMBB, DL, TII->get(X86::MOV32mr))
.addOperand(Base)
.addOperand(Scale)
.addOperand(Index)
.addDisp(Disp, UseFPOffset ? 4 : 0)
.addOperand(Segment)
.addReg(NextOffsetReg)
.setMemRefs(MMOBegin, MMOEnd);
BuildMI(offsetMBB, DL, TII->get(X86::JMP_1))
.addMBB(endMBB);
}
unsigned OverflowAddrReg = MRI.createVirtualRegister(AddrRegClass);
BuildMI(overflowMBB, DL, TII->get(X86::MOV64rm), OverflowAddrReg)
.addOperand(Base)
.addOperand(Scale)
.addOperand(Index)
.addDisp(Disp, 8)
.addOperand(Segment)
.setMemRefs(MMOBegin, MMOEnd);
if (NeedsAlign) {
assert((Align & (Align-1)) == 0 && "Alignment must be a power of 2");
unsigned TmpReg = MRI.createVirtualRegister(AddrRegClass);
BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), TmpReg)
.addReg(OverflowAddrReg)
.addImm(Align-1);
BuildMI(overflowMBB, DL, TII->get(X86::AND64ri32), OverflowDestReg)
.addReg(TmpReg)
.addImm(~(uint64_t)(Align-1));
} else {
BuildMI(overflowMBB, DL, TII->get(TargetOpcode::COPY), OverflowDestReg)
.addReg(OverflowAddrReg);
}
unsigned NextAddrReg = MRI.createVirtualRegister(AddrRegClass);
BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), NextAddrReg)
.addReg(OverflowDestReg)
.addImm(ArgSizeA8);
BuildMI(overflowMBB, DL, TII->get(X86::MOV64mr))
.addOperand(Base)
.addOperand(Scale)
.addOperand(Index)
.addDisp(Disp, 8)
.addOperand(Segment)
.addReg(NextAddrReg)
.setMemRefs(MMOBegin, MMOEnd);
if (offsetMBB) {
BuildMI(*endMBB, endMBB->begin(), DL,
TII->get(X86::PHI), DestReg)
.addReg(OffsetDestReg).addMBB(offsetMBB)
.addReg(OverflowDestReg).addMBB(overflowMBB);
}
MI->eraseFromParent();
return endMBB;
}
MachineBasicBlock *
X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
MachineInstr *MI,
MachineBasicBlock *MBB) const {
const BasicBlock *LLVM_BB = MBB->getBasicBlock();
MachineFunction *F = MBB->getParent();
MachineFunction::iterator MBBIter = MBB;
++MBBIter;
MachineBasicBlock *XMMSaveMBB = F->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *EndMBB = F->CreateMachineBasicBlock(LLVM_BB);
F->insert(MBBIter, XMMSaveMBB);
F->insert(MBBIter, EndMBB);
EndMBB->splice(EndMBB->begin(), MBB,
std::next(MachineBasicBlock::iterator(MI)), MBB->end());
EndMBB->transferSuccessorsAndUpdatePHIs(MBB);
MBB->addSuccessor(XMMSaveMBB);
XMMSaveMBB->addSuccessor(EndMBB);
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
unsigned CountReg = MI->getOperand(0).getReg();
int64_t RegSaveFrameIndex = MI->getOperand(1).getImm();
int64_t VarArgsFPOffset = MI->getOperand(2).getImm();
if (!Subtarget->isTargetWin64()) {
BuildMI(MBB, DL, TII->get(X86::TEST8rr)).addReg(CountReg).addReg(CountReg);
BuildMI(MBB, DL, TII->get(X86::JE_1)).addMBB(EndMBB);
MBB->addSuccessor(EndMBB);
}
assert((MI->getNumOperands() <= 3 ||
!MI->getOperand(MI->getNumOperands() - 1).isReg() ||
MI->getOperand(MI->getNumOperands() - 1).getReg() == X86::EFLAGS)
&& "Expected last argument to be EFLAGS");
unsigned MOVOpc = Subtarget->hasFp256() ? X86::VMOVAPSmr : X86::MOVAPSmr;
for (int i = 3, e = MI->getNumOperands() - 1; i != e; ++i) {
int64_t Offset = (i - 3) * 16 + VarArgsFPOffset;
MachineMemOperand *MMO =
F->getMachineMemOperand(
MachinePointerInfo::getFixedStack(RegSaveFrameIndex, Offset),
MachineMemOperand::MOStore,
16, 16);
BuildMI(XMMSaveMBB, DL, TII->get(MOVOpc))
.addFrameIndex(RegSaveFrameIndex)
.addImm(1)
.addReg(0)
.addImm(Offset)
.addReg(0)
.addReg(MI->getOperand(i).getReg())
.addMemOperand(MMO);
}
MI->eraseFromParent();
return EndMBB;
}
static bool checkAndUpdateEFLAGSKill(MachineBasicBlock::iterator SelectItr,
MachineBasicBlock* BB,
const TargetRegisterInfo* TRI) {
MachineBasicBlock::iterator miI(std::next(SelectItr));
for (MachineBasicBlock::iterator miE = BB->end(); miI != miE; ++miI) {
const MachineInstr& mi = *miI;
if (mi.readsRegister(X86::EFLAGS))
return false;
if (mi.definesRegister(X86::EFLAGS))
break; }
if (miI == BB->end()) {
for (MachineBasicBlock::succ_iterator sItr = BB->succ_begin(),
sEnd = BB->succ_end();
sItr != sEnd; ++sItr) {
MachineBasicBlock* succ = *sItr;
if (succ->isLiveIn(X86::EFLAGS))
return false;
}
}
SelectItr->addRegisterKilled(X86::EFLAGS, TRI);
return true;
}
MachineBasicBlock *
X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
MachineBasicBlock *BB) const {
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineFunction::iterator It = BB;
++It;
MachineBasicBlock *thisMBB = BB;
MachineFunction *F = BB->getParent();
MachineInstr *NextCMOV = nullptr;
MachineBasicBlock::iterator NextMIIt =
std::next(MachineBasicBlock::iterator(MI));
if (NextMIIt != BB->end() && NextMIIt->getOpcode() == MI->getOpcode() &&
NextMIIt->getOperand(2).getReg() == MI->getOperand(2).getReg() &&
NextMIIt->getOperand(1).getReg() == MI->getOperand(0).getReg())
NextCMOV = &*NextMIIt;
MachineBasicBlock *jcc1MBB = nullptr;
if (NextCMOV) {
jcc1MBB = F->CreateMachineBasicBlock(LLVM_BB);
F->insert(It, jcc1MBB);
jcc1MBB->addLiveIn(X86::EFLAGS);
}
MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
F->insert(It, copy0MBB);
F->insert(It, sinkMBB);
const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
MachineInstr *LastEFLAGSUser = NextCMOV ? NextCMOV : MI;
if (!LastEFLAGSUser->killsRegister(X86::EFLAGS) &&
!checkAndUpdateEFLAGSKill(LastEFLAGSUser, BB, TRI)) {
copy0MBB->addLiveIn(X86::EFLAGS);
sinkMBB->addLiveIn(X86::EFLAGS);
}
sinkMBB->splice(sinkMBB->begin(), BB,
std::next(MachineBasicBlock::iterator(MI)), BB->end());
sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
if (NextCMOV) {
BB->addSuccessor(jcc1MBB);
jcc1MBB->addSuccessor(copy0MBB);
jcc1MBB->addSuccessor(sinkMBB);
} else {
BB->addSuccessor(copy0MBB);
}
BB->addSuccessor(sinkMBB);
unsigned Opc =
X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm());
BuildMI(BB, DL, TII->get(Opc)).addMBB(sinkMBB);
if (NextCMOV) {
unsigned Opc2 = X86::GetCondBranchFromCond(
(X86::CondCode)NextCMOV->getOperand(3).getImm());
BuildMI(jcc1MBB, DL, TII->get(Opc2)).addMBB(sinkMBB);
}
copy0MBB->addSuccessor(sinkMBB);
MachineInstrBuilder MIB =
BuildMI(*sinkMBB, sinkMBB->begin(), DL, TII->get(X86::PHI),
MI->getOperand(0).getReg())
.addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
.addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
if (NextCMOV) {
MIB.addReg(MI->getOperand(2).getReg()).addMBB(jcc1MBB);
BuildMI(*sinkMBB, std::next(MachineBasicBlock::iterator(MIB.getInstr())),
DL, TII->get(TargetOpcode::COPY), NextCMOV->getOperand(0).getReg())
.addReg(MI->getOperand(0).getReg());
NextCMOV->eraseFromParent();
}
MI->eraseFromParent(); return sinkMBB;
}
MachineBasicBlock *
X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI,
MachineBasicBlock *BB) const {
MachineFunction *MF = BB->getParent();
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
assert(MF->shouldSplitStack());
const bool Is64Bit = Subtarget->is64Bit();
const bool IsLP64 = Subtarget->isTarget64BitLP64();
const unsigned TlsReg = Is64Bit ? X86::FS : X86::GS;
const unsigned TlsOffset = IsLP64 ? 0x70 : Is64Bit ? 0x40 : 0x30;
MachineBasicBlock *mallocMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *bumpMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *continueMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineRegisterInfo &MRI = MF->getRegInfo();
const TargetRegisterClass *AddrRegClass =
getRegClassFor(getPointerTy());
unsigned mallocPtrVReg = MRI.createVirtualRegister(AddrRegClass),
bumpSPPtrVReg = MRI.createVirtualRegister(AddrRegClass),
tmpSPVReg = MRI.createVirtualRegister(AddrRegClass),
SPLimitVReg = MRI.createVirtualRegister(AddrRegClass),
sizeVReg = MI->getOperand(1).getReg(),
physSPReg = IsLP64 || Subtarget->isTargetNaCl64() ? X86::RSP : X86::ESP;
MachineFunction::iterator MBBIter = BB;
++MBBIter;
MF->insert(MBBIter, bumpMBB);
MF->insert(MBBIter, mallocMBB);
MF->insert(MBBIter, continueMBB);
continueMBB->splice(continueMBB->begin(), BB,
std::next(MachineBasicBlock::iterator(MI)), BB->end());
continueMBB->transferSuccessorsAndUpdatePHIs(BB);
BuildMI(BB, DL, TII->get(TargetOpcode::COPY), tmpSPVReg).addReg(physSPReg);
BuildMI(BB, DL, TII->get(IsLP64 ? X86::SUB64rr:X86::SUB32rr), SPLimitVReg)
.addReg(tmpSPVReg).addReg(sizeVReg);
BuildMI(BB, DL, TII->get(IsLP64 ? X86::CMP64mr:X86::CMP32mr))
.addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg)
.addReg(SPLimitVReg);
BuildMI(BB, DL, TII->get(X86::JG_1)).addMBB(mallocMBB);
BuildMI(bumpMBB, DL, TII->get(TargetOpcode::COPY), physSPReg)
.addReg(SPLimitVReg);
BuildMI(bumpMBB, DL, TII->get(TargetOpcode::COPY), bumpSPPtrVReg)
.addReg(SPLimitVReg);
BuildMI(bumpMBB, DL, TII->get(X86::JMP_1)).addMBB(continueMBB);
const uint32_t *RegMask =
Subtarget->getRegisterInfo()->getCallPreservedMask(*MF, CallingConv::C);
if (IsLP64) {
BuildMI(mallocMBB, DL, TII->get(X86::MOV64rr), X86::RDI)
.addReg(sizeVReg);
BuildMI(mallocMBB, DL, TII->get(X86::CALL64pcrel32))
.addExternalSymbol("__morestack_allocate_stack_space")
.addRegMask(RegMask)
.addReg(X86::RDI, RegState::Implicit)
.addReg(X86::RAX, RegState::ImplicitDefine);
} else if (Is64Bit) {
BuildMI(mallocMBB, DL, TII->get(X86::MOV32rr), X86::EDI)
.addReg(sizeVReg);
BuildMI(mallocMBB, DL, TII->get(X86::CALL64pcrel32))
.addExternalSymbol("__morestack_allocate_stack_space")
.addRegMask(RegMask)
.addReg(X86::EDI, RegState::Implicit)
.addReg(X86::EAX, RegState::ImplicitDefine);
} else {
BuildMI(mallocMBB, DL, TII->get(X86::SUB32ri), physSPReg).addReg(physSPReg)
.addImm(12);
BuildMI(mallocMBB, DL, TII->get(X86::PUSH32r)).addReg(sizeVReg);
BuildMI(mallocMBB, DL, TII->get(X86::CALLpcrel32))
.addExternalSymbol("__morestack_allocate_stack_space")
.addRegMask(RegMask)
.addReg(X86::EAX, RegState::ImplicitDefine);
}
if (!Is64Bit)
BuildMI(mallocMBB, DL, TII->get(X86::ADD32ri), physSPReg).addReg(physSPReg)
.addImm(16);
BuildMI(mallocMBB, DL, TII->get(TargetOpcode::COPY), mallocPtrVReg)
.addReg(IsLP64 ? X86::RAX : X86::EAX);
BuildMI(mallocMBB, DL, TII->get(X86::JMP_1)).addMBB(continueMBB);
BB->addSuccessor(bumpMBB);
BB->addSuccessor(mallocMBB);
mallocMBB->addSuccessor(continueMBB);
bumpMBB->addSuccessor(continueMBB);
BuildMI(*continueMBB, continueMBB->begin(), DL, TII->get(X86::PHI),
MI->getOperand(0).getReg())
.addReg(mallocPtrVReg).addMBB(mallocMBB)
.addReg(bumpSPPtrVReg).addMBB(bumpMBB);
MI->eraseFromParent();
return continueMBB;
}
MachineBasicBlock *
X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI,
MachineBasicBlock *BB) const {
DebugLoc DL = MI->getDebugLoc();
assert(!Subtarget->isTargetMachO());
X86FrameLowering::emitStackProbeCall(*BB->getParent(), *BB, MI, DL);
MI->eraseFromParent(); return BB;
}
MachineBasicBlock *
X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI,
MachineBasicBlock *BB) const {
MachineFunction *F = BB->getParent();
const X86InstrInfo *TII = Subtarget->getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
assert(Subtarget->isTargetDarwin() && "Darwin only instr emitted?");
assert(MI->getOperand(3).isGlobal() && "This should be a global");
const uint32_t *RegMask =
Subtarget->getRegisterInfo()->getCallPreservedMask(*F, CallingConv::C);
if (Subtarget->is64Bit()) {
MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
TII->get(X86::MOV64rm), X86::RDI)
.addReg(X86::RIP)
.addImm(0).addReg(0)
.addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
MI->getOperand(3).getTargetFlags())
.addReg(0);
MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL64m));
addDirectMem(MIB, X86::RDI);
MIB.addReg(X86::RAX, RegState::ImplicitDefine).addRegMask(RegMask);
} else if (F->getTarget().getRelocationModel() != Reloc::PIC_) {
MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
TII->get(X86::MOV32rm), X86::EAX)
.addReg(0)
.addImm(0).addReg(0)
.addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
MI->getOperand(3).getTargetFlags())
.addReg(0);
MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m));
addDirectMem(MIB, X86::EAX);
MIB.addReg(X86::EAX, RegState::ImplicitDefine).addRegMask(RegMask);
} else {
MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
TII->get(X86::MOV32rm), X86::EAX)
.addReg(TII->getGlobalBaseReg(F))
.addImm(0).addReg(0)
.addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
MI->getOperand(3).getTargetFlags())
.addReg(0);
MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m));
addDirectMem(MIB, X86::EAX);
MIB.addReg(X86::EAX, RegState::ImplicitDefine).addRegMask(RegMask);
}
MI->eraseFromParent(); return BB;
}
MachineBasicBlock *
X86TargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
MachineBasicBlock *MBB) const {
DebugLoc DL = MI->getDebugLoc();
MachineFunction *MF = MBB->getParent();
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
MachineRegisterInfo &MRI = MF->getRegInfo();
const BasicBlock *BB = MBB->getBasicBlock();
MachineFunction::iterator I = MBB;
++I;
MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
unsigned DstReg;
unsigned MemOpndSlot = 0;
unsigned CurOp = 0;
DstReg = MI->getOperand(CurOp++).getReg();
const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
assert(RC->hasType(MVT::i32) && "Invalid destination!");
unsigned mainDstReg = MRI.createVirtualRegister(RC);
unsigned restoreDstReg = MRI.createVirtualRegister(RC);
MemOpndSlot = CurOp;
MVT PVT = getPointerTy();
assert((PVT == MVT::i64 || PVT == MVT::i32) &&
"Invalid Pointer Size!");
MachineBasicBlock *thisMBB = MBB;
MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
MachineBasicBlock *restoreMBB = MF->CreateMachineBasicBlock(BB);
MF->insert(I, mainMBB);
MF->insert(I, sinkMBB);
MF->push_back(restoreMBB);
MachineInstrBuilder MIB;
sinkMBB->splice(sinkMBB->begin(), MBB,
std::next(MachineBasicBlock::iterator(MI)), MBB->end());
sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
unsigned PtrStoreOpc = 0;
unsigned LabelReg = 0;
const int64_t LabelOffset = 1 * PVT.getStoreSize();
Reloc::Model RM = MF->getTarget().getRelocationModel();
bool UseImmLabel = (MF->getTarget().getCodeModel() == CodeModel::Small) &&
(RM == Reloc::Static || RM == Reloc::DynamicNoPIC);
if (!UseImmLabel) {
PtrStoreOpc = (PVT == MVT::i64) ? X86::MOV64mr : X86::MOV32mr;
const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
LabelReg = MRI.createVirtualRegister(PtrRC);
if (Subtarget->is64Bit()) {
MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::LEA64r), LabelReg)
.addReg(X86::RIP)
.addImm(0)
.addReg(0)
.addMBB(restoreMBB)
.addReg(0);
} else {
const X86InstrInfo *XII = static_cast<const X86InstrInfo*>(TII);
MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::LEA32r), LabelReg)
.addReg(XII->getGlobalBaseReg(MF))
.addImm(0)
.addReg(0)
.addMBB(restoreMBB, Subtarget->ClassifyBlockAddressReference())
.addReg(0);
}
} else
PtrStoreOpc = (PVT == MVT::i64) ? X86::MOV64mi32 : X86::MOV32mi;
MIB = BuildMI(*thisMBB, MI, DL, TII->get(PtrStoreOpc));
for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
if (i == X86::AddrDisp)
MIB.addDisp(MI->getOperand(MemOpndSlot + i), LabelOffset);
else
MIB.addOperand(MI->getOperand(MemOpndSlot + i));
}
if (!UseImmLabel)
MIB.addReg(LabelReg);
else
MIB.addMBB(restoreMBB);
MIB.setMemRefs(MMOBegin, MMOEnd);
MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::EH_SjLj_Setup))
.addMBB(restoreMBB);
const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
MIB.addRegMask(RegInfo->getNoPreservedMask());
thisMBB->addSuccessor(mainMBB);
thisMBB->addSuccessor(restoreMBB);
BuildMI(mainMBB, DL, TII->get(X86::MOV32r0), mainDstReg);
mainMBB->addSuccessor(sinkMBB);
BuildMI(*sinkMBB, sinkMBB->begin(), DL,
TII->get(X86::PHI), DstReg)
.addReg(mainDstReg).addMBB(mainMBB)
.addReg(restoreDstReg).addMBB(restoreMBB);
if (RegInfo->hasBasePointer(*MF)) {
const bool Uses64BitFramePtr =
Subtarget->isTarget64BitLP64() || Subtarget->isTargetNaCl64();
X86MachineFunctionInfo *X86FI = MF->getInfo<X86MachineFunctionInfo>();
X86FI->setRestoreBasePointer(MF);
unsigned FramePtr = RegInfo->getFrameRegister(*MF);
unsigned BasePtr = RegInfo->getBaseRegister();
unsigned Opm = Uses64BitFramePtr ? X86::MOV64rm : X86::MOV32rm;
addRegOffset(BuildMI(restoreMBB, DL, TII->get(Opm), BasePtr),
FramePtr, true, X86FI->getRestoreBasePointerOffset())
.setMIFlag(MachineInstr::FrameSetup);
}
BuildMI(restoreMBB, DL, TII->get(X86::MOV32ri), restoreDstReg).addImm(1);
BuildMI(restoreMBB, DL, TII->get(X86::JMP_1)).addMBB(sinkMBB);
restoreMBB->addSuccessor(sinkMBB);
MI->eraseFromParent();
return sinkMBB;
}
MachineBasicBlock *
X86TargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
MachineBasicBlock *MBB) const {
DebugLoc DL = MI->getDebugLoc();
MachineFunction *MF = MBB->getParent();
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
MachineRegisterInfo &MRI = MF->getRegInfo();
MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
MVT PVT = getPointerTy();
assert((PVT == MVT::i64 || PVT == MVT::i32) &&
"Invalid Pointer Size!");
const TargetRegisterClass *RC =
(PVT == MVT::i64) ? &X86::GR64RegClass : &X86::GR32RegClass;
unsigned Tmp = MRI.createVirtualRegister(RC);
const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
unsigned FP = (PVT == MVT::i64) ? X86::RBP : X86::EBP;
unsigned SP = RegInfo->getStackRegister();
MachineInstrBuilder MIB;
const int64_t LabelOffset = 1 * PVT.getStoreSize();
const int64_t SPOffset = 2 * PVT.getStoreSize();
unsigned PtrLoadOpc = (PVT == MVT::i64) ? X86::MOV64rm : X86::MOV32rm;
unsigned IJmpOpc = (PVT == MVT::i64) ? X86::JMP64r : X86::JMP32r;
MIB = BuildMI(*MBB, MI, DL, TII->get(PtrLoadOpc), FP);
for (unsigned i = 0; i < X86::AddrNumOperands; ++i)
MIB.addOperand(MI->getOperand(i));
MIB.setMemRefs(MMOBegin, MMOEnd);
MIB = BuildMI(*MBB, MI, DL, TII->get(PtrLoadOpc), Tmp);
for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
if (i == X86::AddrDisp)
MIB.addDisp(MI->getOperand(i), LabelOffset);
else
MIB.addOperand(MI->getOperand(i));
}
MIB.setMemRefs(MMOBegin, MMOEnd);
MIB = BuildMI(*MBB, MI, DL, TII->get(PtrLoadOpc), SP);
for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
if (i == X86::AddrDisp)
MIB.addDisp(MI->getOperand(i), SPOffset);
else
MIB.addOperand(MI->getOperand(i));
}
MIB.setMemRefs(MMOBegin, MMOEnd);
BuildMI(*MBB, MI, DL, TII->get(IJmpOpc)).addReg(Tmp);
MI->eraseFromParent();
return MBB;
}
MachineBasicBlock *
X86TargetLowering::emitFMA3Instr(MachineInstr *MI,
MachineBasicBlock *MBB) const {
MachineOperand &AddendOp = MI->getOperand(3);
if (!AddendOp.isReg())
return MBB;
MachineFunction &MF = *MBB->getParent();
MachineRegisterInfo &MRI = MF.getRegInfo();
assert(MRI.hasOneDef(AddendOp.getReg()) && "Multiple defs in SSA?");
MachineInstr &AddendDef = *MRI.def_instr_begin(AddendOp.getReg());
if (!AddendDef.isPHI())
return MBB;
for (unsigned i = 1, e = AddendDef.getNumOperands(); i < e; i += 2) {
assert(AddendDef.getOperand(i).isReg());
MachineOperand PHISrcOp = AddendDef.getOperand(i);
MachineInstr &PHISrcInst = *MRI.def_instr_begin(PHISrcOp.getReg());
if (&PHISrcInst == MI) {
unsigned NewFMAOpc = 0;
switch (MI->getOpcode()) {
case X86::VFMADDPDr213r: NewFMAOpc = X86::VFMADDPDr231r; break;
case X86::VFMADDPSr213r: NewFMAOpc = X86::VFMADDPSr231r; break;
case X86::VFMADDSDr213r: NewFMAOpc = X86::VFMADDSDr231r; break;
case X86::VFMADDSSr213r: NewFMAOpc = X86::VFMADDSSr231r; break;
case X86::VFMSUBPDr213r: NewFMAOpc = X86::VFMSUBPDr231r; break;
case X86::VFMSUBPSr213r: NewFMAOpc = X86::VFMSUBPSr231r; break;
case X86::VFMSUBSDr213r: NewFMAOpc = X86::VFMSUBSDr231r; break;
case X86::VFMSUBSSr213r: NewFMAOpc = X86::VFMSUBSSr231r; break;
case X86::VFNMADDPDr213r: NewFMAOpc = X86::VFNMADDPDr231r; break;
case X86::VFNMADDPSr213r: NewFMAOpc = X86::VFNMADDPSr231r; break;
case X86::VFNMADDSDr213r: NewFMAOpc = X86::VFNMADDSDr231r; break;
case X86::VFNMADDSSr213r: NewFMAOpc = X86::VFNMADDSSr231r; break;
case X86::VFNMSUBPDr213r: NewFMAOpc = X86::VFNMSUBPDr231r; break;
case X86::VFNMSUBPSr213r: NewFMAOpc = X86::VFNMSUBPSr231r; break;
case X86::VFNMSUBSDr213r: NewFMAOpc = X86::VFNMSUBSDr231r; break;
case X86::VFNMSUBSSr213r: NewFMAOpc = X86::VFNMSUBSSr231r; break;
case X86::VFMADDSUBPDr213r: NewFMAOpc = X86::VFMADDSUBPDr231r; break;
case X86::VFMADDSUBPSr213r: NewFMAOpc = X86::VFMADDSUBPSr231r; break;
case X86::VFMSUBADDPDr213r: NewFMAOpc = X86::VFMSUBADDPDr231r; break;
case X86::VFMSUBADDPSr213r: NewFMAOpc = X86::VFMSUBADDPSr231r; break;
case X86::VFMADDPDr213rY: NewFMAOpc = X86::VFMADDPDr231rY; break;
case X86::VFMADDPSr213rY: NewFMAOpc = X86::VFMADDPSr231rY; break;
case X86::VFMSUBPDr213rY: NewFMAOpc = X86::VFMSUBPDr231rY; break;
case X86::VFMSUBPSr213rY: NewFMAOpc = X86::VFMSUBPSr231rY; break;
case X86::VFNMADDPDr213rY: NewFMAOpc = X86::VFNMADDPDr231rY; break;
case X86::VFNMADDPSr213rY: NewFMAOpc = X86::VFNMADDPSr231rY; break;
case X86::VFNMSUBPDr213rY: NewFMAOpc = X86::VFNMSUBPDr231rY; break;
case X86::VFNMSUBPSr213rY: NewFMAOpc = X86::VFNMSUBPSr231rY; break;
case X86::VFMADDSUBPDr213rY: NewFMAOpc = X86::VFMADDSUBPDr231rY; break;
case X86::VFMADDSUBPSr213rY: NewFMAOpc = X86::VFMADDSUBPSr231rY; break;
case X86::VFMSUBADDPDr213rY: NewFMAOpc = X86::VFMSUBADDPDr231rY; break;
case X86::VFMSUBADDPSr213rY: NewFMAOpc = X86::VFMSUBADDPSr231rY; break;
default: llvm_unreachable("Unrecognized FMA variant.");
}
const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
MachineInstrBuilder MIB =
BuildMI(MF, MI->getDebugLoc(), TII.get(NewFMAOpc))
.addOperand(MI->getOperand(0))
.addOperand(MI->getOperand(3))
.addOperand(MI->getOperand(2))
.addOperand(MI->getOperand(1));
MBB->insert(MachineBasicBlock::iterator(MI), MIB);
MI->eraseFromParent();
}
}
return MBB;
}
MachineBasicBlock *
X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
switch (MI->getOpcode()) {
default: llvm_unreachable("Unexpected instr type to insert");
case X86::TAILJMPd64:
case X86::TAILJMPr64:
case X86::TAILJMPm64:
case X86::TAILJMPd64_REX:
case X86::TAILJMPr64_REX:
case X86::TAILJMPm64_REX:
llvm_unreachable("TAILJMP64 would not be touched here.");
case X86::TCRETURNdi64:
case X86::TCRETURNri64:
case X86::TCRETURNmi64:
return BB;
case X86::WIN_ALLOCA:
return EmitLoweredWinAlloca(MI, BB);
case X86::SEG_ALLOCA_32:
case X86::SEG_ALLOCA_64:
return EmitLoweredSegAlloca(MI, BB);
case X86::TLSCall_32:
case X86::TLSCall_64:
return EmitLoweredTLSCall(MI, BB);
case X86::CMOV_GR8:
case X86::CMOV_FR32:
case X86::CMOV_FR64:
case X86::CMOV_V4F32:
case X86::CMOV_V2F64:
case X86::CMOV_V2I64:
case X86::CMOV_V8F32:
case X86::CMOV_V4F64:
case X86::CMOV_V4I64:
case X86::CMOV_V16F32:
case X86::CMOV_V8F64:
case X86::CMOV_V8I64:
case X86::CMOV_GR16:
case X86::CMOV_GR32:
case X86::CMOV_RFP32:
case X86::CMOV_RFP64:
case X86::CMOV_RFP80:
return EmitLoweredSelect(MI, BB);
case X86::FP32_TO_INT16_IN_MEM:
case X86::FP32_TO_INT32_IN_MEM:
case X86::FP32_TO_INT64_IN_MEM:
case X86::FP64_TO_INT16_IN_MEM:
case X86::FP64_TO_INT32_IN_MEM:
case X86::FP64_TO_INT64_IN_MEM:
case X86::FP80_TO_INT16_IN_MEM:
case X86::FP80_TO_INT32_IN_MEM:
case X86::FP80_TO_INT64_IN_MEM: {
MachineFunction *F = BB->getParent();
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2, false);
addFrameReference(BuildMI(*BB, MI, DL,
TII->get(X86::FNSTCW16m)), CWFrameIdx);
unsigned OldCW =
F->getRegInfo().createVirtualRegister(&X86::GR16RegClass);
addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16rm), OldCW),
CWFrameIdx);
addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16mi)), CWFrameIdx)
.addImm(0xC7F);
addFrameReference(BuildMI(*BB, MI, DL,
TII->get(X86::FLDCW16m)), CWFrameIdx);
addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16mr)), CWFrameIdx)
.addReg(OldCW);
unsigned Opc;
switch (MI->getOpcode()) {
default: llvm_unreachable("illegal opcode!");
case X86::FP32_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m32; break;
case X86::FP32_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m32; break;
case X86::FP32_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m32; break;
case X86::FP64_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m64; break;
case X86::FP64_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m64; break;
case X86::FP64_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m64; break;
case X86::FP80_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m80; break;
case X86::FP80_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m80; break;
case X86::FP80_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m80; break;
}
X86AddressMode AM;
MachineOperand &Op = MI->getOperand(0);
if (Op.isReg()) {
AM.BaseType = X86AddressMode::RegBase;
AM.Base.Reg = Op.getReg();
} else {
AM.BaseType = X86AddressMode::FrameIndexBase;
AM.Base.FrameIndex = Op.getIndex();
}
Op = MI->getOperand(1);
if (Op.isImm())
AM.Scale = Op.getImm();
Op = MI->getOperand(2);
if (Op.isImm())
AM.IndexReg = Op.getImm();
Op = MI->getOperand(3);
if (Op.isGlobal()) {
AM.GV = Op.getGlobal();
} else {
AM.Disp = Op.getImm();
}
addFullAddress(BuildMI(*BB, MI, DL, TII->get(Opc)), AM)
.addReg(MI->getOperand(X86::AddrNumOperands).getReg());
addFrameReference(BuildMI(*BB, MI, DL,
TII->get(X86::FLDCW16m)), CWFrameIdx);
MI->eraseFromParent(); return BB;
}
case X86::PCMPISTRM128REG:
case X86::VPCMPISTRM128REG:
case X86::PCMPISTRM128MEM:
case X86::VPCMPISTRM128MEM:
case X86::PCMPESTRM128REG:
case X86::VPCMPESTRM128REG:
case X86::PCMPESTRM128MEM:
case X86::VPCMPESTRM128MEM:
assert(Subtarget->hasSSE42() &&
"Target must have SSE4.2 or AVX features enabled");
return EmitPCMPSTRM(MI, BB, Subtarget->getInstrInfo());
case X86::PCMPISTRIREG:
case X86::VPCMPISTRIREG:
case X86::PCMPISTRIMEM:
case X86::VPCMPISTRIMEM:
case X86::PCMPESTRIREG:
case X86::VPCMPESTRIREG:
case X86::PCMPESTRIMEM:
case X86::VPCMPESTRIMEM:
assert(Subtarget->hasSSE42() &&
"Target must have SSE4.2 or AVX features enabled");
return EmitPCMPSTRI(MI, BB, Subtarget->getInstrInfo());
case X86::MONITOR:
return EmitMonitor(MI, BB, Subtarget);
case X86::XBEGIN:
return EmitXBegin(MI, BB, Subtarget->getInstrInfo());
case X86::VASTART_SAVE_XMM_REGS:
return EmitVAStartSaveXMMRegsWithCustomInserter(MI, BB);
case X86::VAARG_64:
return EmitVAARG64WithCustomInserter(MI, BB);
case X86::EH_SjLj_SetJmp32:
case X86::EH_SjLj_SetJmp64:
return emitEHSjLjSetJmp(MI, BB);
case X86::EH_SjLj_LongJmp32:
case X86::EH_SjLj_LongJmp64:
return emitEHSjLjLongJmp(MI, BB);
case TargetOpcode::STATEPOINT:
return emitPatchPoint(MI, BB);
case TargetOpcode::STACKMAP:
case TargetOpcode::PATCHPOINT:
return emitPatchPoint(MI, BB);
case X86::VFMADDPDr213r:
case X86::VFMADDPSr213r:
case X86::VFMADDSDr213r:
case X86::VFMADDSSr213r:
case X86::VFMSUBPDr213r:
case X86::VFMSUBPSr213r:
case X86::VFMSUBSDr213r:
case X86::VFMSUBSSr213r:
case X86::VFNMADDPDr213r:
case X86::VFNMADDPSr213r:
case X86::VFNMADDSDr213r:
case X86::VFNMADDSSr213r:
case X86::VFNMSUBPDr213r:
case X86::VFNMSUBPSr213r:
case X86::VFNMSUBSDr213r:
case X86::VFNMSUBSSr213r:
case X86::VFMADDSUBPDr213r:
case X86::VFMADDSUBPSr213r:
case X86::VFMSUBADDPDr213r:
case X86::VFMSUBADDPSr213r:
case X86::VFMADDPDr213rY:
case X86::VFMADDPSr213rY:
case X86::VFMSUBPDr213rY:
case X86::VFMSUBPSr213rY:
case X86::VFNMADDPDr213rY:
case X86::VFNMADDPSr213rY:
case X86::VFNMSUBPDr213rY:
case X86::VFNMSUBPSr213rY:
case X86::VFMADDSUBPDr213rY:
case X86::VFMADDSUBPSr213rY:
case X86::VFMSUBADDPDr213rY:
case X86::VFMSUBADDPSr213rY:
return emitFMA3Instr(MI, BB);
}
}
void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
APInt &KnownZero,
APInt &KnownOne,
const SelectionDAG &DAG,
unsigned Depth) const {
unsigned BitWidth = KnownZero.getBitWidth();
unsigned Opc = Op.getOpcode();
assert((Opc >= ISD::BUILTIN_OP_END ||
Opc == ISD::INTRINSIC_WO_CHAIN ||
Opc == ISD::INTRINSIC_W_CHAIN ||
Opc == ISD::INTRINSIC_VOID) &&
"Should use MaskedValueIsZero if you don't know whether Op"
" is a target node!");
KnownZero = KnownOne = APInt(BitWidth, 0); switch (Opc) {
default: break;
case X86ISD::ADD:
case X86ISD::SUB:
case X86ISD::ADC:
case X86ISD::SBB:
case X86ISD::SMUL:
case X86ISD::UMUL:
case X86ISD::INC:
case X86ISD::DEC:
case X86ISD::OR:
case X86ISD::XOR:
case X86ISD::AND:
if (Op.getResNo() == 0)
break;
case X86ISD::SETCC:
KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
break;
case ISD::INTRINSIC_WO_CHAIN: {
unsigned IntId = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
unsigned NumLoBits = 0;
switch (IntId) {
default: break;
case Intrinsic::x86_sse_movmsk_ps:
case Intrinsic::x86_avx_movmsk_ps_256:
case Intrinsic::x86_sse2_movmsk_pd:
case Intrinsic::x86_avx_movmsk_pd_256:
case Intrinsic::x86_mmx_pmovmskb:
case Intrinsic::x86_sse2_pmovmskb_128:
case Intrinsic::x86_avx2_pmovmskb: {
switch (IntId) {
default: llvm_unreachable("Impossible intrinsic"); case Intrinsic::x86_sse_movmsk_ps: NumLoBits = 4; break;
case Intrinsic::x86_avx_movmsk_ps_256: NumLoBits = 8; break;
case Intrinsic::x86_sse2_movmsk_pd: NumLoBits = 2; break;
case Intrinsic::x86_avx_movmsk_pd_256: NumLoBits = 4; break;
case Intrinsic::x86_mmx_pmovmskb: NumLoBits = 8; break;
case Intrinsic::x86_sse2_pmovmskb_128: NumLoBits = 16; break;
case Intrinsic::x86_avx2_pmovmskb: NumLoBits = 32; break;
}
KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - NumLoBits);
break;
}
}
break;
}
}
}
unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode(
SDValue Op,
const SelectionDAG &,
unsigned Depth) const {
if (Op.getOpcode() == X86ISD::SETCC_CARRY)
return Op.getValueType().getScalarType().getSizeInBits();
return 1;
}
bool X86TargetLowering::isGAPlusOffset(SDNode *N,
const GlobalValue* &GA,
int64_t &Offset) const {
if (N->getOpcode() == X86ISD::Wrapper) {
if (isa<GlobalAddressSDNode>(N->getOperand(0))) {
GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal();
Offset = cast<GlobalAddressSDNode>(N->getOperand(0))->getOffset();
return true;
}
}
return TargetLowering::isGAPlusOffset(N, GA, Offset);
}
static bool isShuffleHigh128VectorInsertLow(ShuffleVectorSDNode *SVOp) {
EVT VT = SVOp->getValueType(0);
unsigned NumElems = VT.getVectorNumElements();
for (unsigned i = 0, j = NumElems/2; i != NumElems/2; ++i, ++j)
if (!isUndefOrEqual(SVOp->getMaskElt(i), j) ||
SVOp->getMaskElt(j) >= 0)
return false;
return true;
}
static bool isShuffleLow128VectorInsertHigh(ShuffleVectorSDNode *SVOp) {
EVT VT = SVOp->getValueType(0);
unsigned NumElems = VT.getVectorNumElements();
for (unsigned i = NumElems/2, j = 0; i != NumElems; ++i, ++j)
if (!isUndefOrEqual(SVOp->getMaskElt(i), j) ||
SVOp->getMaskElt(j) >= 0)
return false;
return true;
}
static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget* Subtarget) {
SDLoc dl(N);
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
SDValue V1 = SVOp->getOperand(0);
SDValue V2 = SVOp->getOperand(1);
EVT VT = SVOp->getValueType(0);
unsigned NumElems = VT.getVectorNumElements();
if (V1.getOpcode() == ISD::CONCAT_VECTORS &&
V2.getOpcode() == ISD::CONCAT_VECTORS) {
if (V2.getOperand(0).getOpcode() != ISD::BUILD_VECTOR ||
V2.getOperand(1).getOpcode() != ISD::UNDEF ||
V1.getOperand(1).getOpcode() != ISD::UNDEF)
return SDValue();
if (!ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()))
return SDValue();
for (unsigned i = 0; i != NumElems/2; ++i)
if (!isUndefOrEqual(SVOp->getMaskElt(i), i) ||
!isUndefOrEqual(SVOp->getMaskElt(i+NumElems/2), NumElems))
return SDValue();
if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(V1.getOperand(0))) {
if (Ld->hasNUsesOfValue(1, 0)) {
SDVTList Tys = DAG.getVTList(MVT::v4i64, MVT::Other);
SDValue Ops[] = { Ld->getChain(), Ld->getBasePtr() };
SDValue ResNode =
DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops,
Ld->getMemoryVT(),
Ld->getPointerInfo(),
Ld->getAlignment(),
false, true,
false);
if (Ld->hasAnyUseOfValue(1)) {
SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
SDValue(Ld, 1), SDValue(ResNode.getNode(), 1));
DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), NewChain);
DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(Ld, 1),
SDValue(ResNode.getNode(), 1));
}
return DAG.getNode(ISD::BITCAST, dl, VT, ResNode);
}
}
SDValue Zeros = getZeroVector(VT, Subtarget, DAG, dl);
SDValue InsV = Insert128BitVector(Zeros, V1.getOperand(0), 0, DAG, dl);
return DCI.CombineTo(N, InsV);
}
if (isShuffleHigh128VectorInsertLow(SVOp)) {
SDValue V = Extract128BitVector(V1, NumElems/2, DAG, dl);
SDValue InsV = Insert128BitVector(DAG.getUNDEF(VT), V, 0, DAG, dl);
return DCI.CombineTo(N, InsV);
}
if (isShuffleLow128VectorInsertHigh(SVOp)) {
SDValue V = Extract128BitVector(V1, 0, DAG, dl);
SDValue InsV = Insert128BitVector(DAG.getUNDEF(VT), V, NumElems/2, DAG, dl);
return DCI.CombineTo(N, InsV);
}
return SDValue();
}
static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask,
int Depth, bool HasPSHUFB, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
assert(!Mask.empty() && "Cannot combine an empty shuffle mask!");
SDValue Input = Op.getOperand(0);
while (Input.getOpcode() == ISD::BITCAST)
Input = Input.getOperand(0);
MVT VT = Input.getSimpleValueType();
MVT RootVT = Root.getSimpleValueType();
SDLoc DL(Root);
if (Mask.size() == 1) {
DCI.CombineTo(Root.getNode(), DAG.getNode(ISD::BITCAST, DL, RootVT, Input),
true);
return true;
}
bool FloatDomain = VT.isFloatingPoint();
if (FloatDomain) {
if (Mask.equals(0, 0) || Mask.equals(1, 1)) {
bool Lo = Mask.equals(0, 0);
unsigned Shuffle;
MVT ShuffleVT;
if (Lo && Subtarget->hasSSE3()) {
Shuffle = X86ISD::MOVDDUP;
ShuffleVT = MVT::v2f64;
} else {
Shuffle = Lo ? X86ISD::MOVLHPS : X86ISD::MOVHLPS;
ShuffleVT = MVT::v4f32;
}
if (Depth == 1 && Root->getOpcode() == Shuffle)
return false; Op = DAG.getNode(ISD::BITCAST, DL, ShuffleVT, Input);
DCI.AddToWorklist(Op.getNode());
if (Shuffle == X86ISD::MOVDDUP)
Op = DAG.getNode(Shuffle, DL, ShuffleVT, Op);
else
Op = DAG.getNode(Shuffle, DL, ShuffleVT, Op, Op);
DCI.AddToWorklist(Op.getNode());
DCI.CombineTo(Root.getNode(), DAG.getNode(ISD::BITCAST, DL, RootVT, Op),
true);
return true;
}
if (Subtarget->hasSSE3() &&
(Mask.equals(0, 0, 2, 2) || Mask.equals(1, 1, 3, 3))) {
bool Lo = Mask.equals(0, 0, 2, 2);
unsigned Shuffle = Lo ? X86ISD::MOVSLDUP : X86ISD::MOVSHDUP;
MVT ShuffleVT = MVT::v4f32;
if (Depth == 1 && Root->getOpcode() == Shuffle)
return false; Op = DAG.getNode(ISD::BITCAST, DL, ShuffleVT, Input);
DCI.AddToWorklist(Op.getNode());
Op = DAG.getNode(Shuffle, DL, ShuffleVT, Op);
DCI.AddToWorklist(Op.getNode());
DCI.CombineTo(Root.getNode(), DAG.getNode(ISD::BITCAST, DL, RootVT, Op),
true);
return true;
}
if (Mask.equals(0, 0, 1, 1) || Mask.equals(2, 2, 3, 3)) {
bool Lo = Mask.equals(0, 0, 1, 1);
unsigned Shuffle = Lo ? X86ISD::UNPCKL : X86ISD::UNPCKH;
MVT ShuffleVT = MVT::v4f32;
if (Depth == 1 && Root->getOpcode() == Shuffle)
return false; Op = DAG.getNode(ISD::BITCAST, DL, ShuffleVT, Input);
DCI.AddToWorklist(Op.getNode());
Op = DAG.getNode(Shuffle, DL, ShuffleVT, Op, Op);
DCI.AddToWorklist(Op.getNode());
DCI.CombineTo(Root.getNode(), DAG.getNode(ISD::BITCAST, DL, RootVT, Op),
true);
return true;
}
}
if (!FloatDomain &&
(Mask.equals(0, 0, 1, 1, 2, 2, 3, 3) ||
Mask.equals(4, 4, 5, 5, 6, 6, 7, 7) ||
Mask.equals(0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7) ||
Mask.equals(8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15,
15))) {
bool Lo = Mask[0] == 0;
unsigned Shuffle = Lo ? X86ISD::UNPCKL : X86ISD::UNPCKH;
if (Depth == 1 && Root->getOpcode() == Shuffle)
return false; MVT ShuffleVT;
switch (Mask.size()) {
case 8:
ShuffleVT = MVT::v8i16;
break;
case 16:
ShuffleVT = MVT::v16i8;
break;
default:
llvm_unreachable("Impossible mask size!");
};
Op = DAG.getNode(ISD::BITCAST, DL, ShuffleVT, Input);
DCI.AddToWorklist(Op.getNode());
Op = DAG.getNode(Shuffle, DL, ShuffleVT, Op, Op);
DCI.AddToWorklist(Op.getNode());
DCI.CombineTo(Root.getNode(), DAG.getNode(ISD::BITCAST, DL, RootVT, Op),
true);
return true;
}
if (Depth < 2)
return false;
if ((Depth >= 3 || HasPSHUFB) && Subtarget->hasSSSE3()) {
SmallVector<SDValue, 16> PSHUFBMask;
assert(Mask.size() <= 16 && "Can't shuffle elements smaller than bytes!");
int Ratio = 16 / Mask.size();
for (unsigned i = 0; i < 16; ++i) {
if (Mask[i / Ratio] == SM_SentinelUndef) {
PSHUFBMask.push_back(DAG.getUNDEF(MVT::i8));
continue;
}
int M = Mask[i / Ratio] != SM_SentinelZero
? Ratio * Mask[i / Ratio] + i % Ratio
: 255;
PSHUFBMask.push_back(DAG.getConstant(M, MVT::i8));
}
Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Input);
DCI.AddToWorklist(Op.getNode());
SDValue PSHUFBMaskOp =
DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v16i8, PSHUFBMask);
DCI.AddToWorklist(PSHUFBMaskOp.getNode());
Op = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, Op, PSHUFBMaskOp);
DCI.AddToWorklist(Op.getNode());
DCI.CombineTo(Root.getNode(), DAG.getNode(ISD::BITCAST, DL, RootVT, Op),
true);
return true;
}
return false;
}
static bool combineX86ShufflesRecursively(SDValue Op, SDValue Root,
ArrayRef<int> RootMask,
int Depth, bool HasPSHUFB,
SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
if (Depth > 8)
return false;
while (Op.getOpcode() == ISD::BITCAST && Op.getOperand(0).hasOneUse())
Op = Op.getOperand(0);
MVT VT = Op.getSimpleValueType();
if (!VT.isVector())
return false; if (VT.getSizeInBits() != 128)
return false;
assert(Root.getSimpleValueType().isVector() &&
"Shuffles operate on vector types!");
assert(VT.getSizeInBits() == Root.getSimpleValueType().getSizeInBits() &&
"Can only combine shuffles of the same vector register size.");
if (!isTargetShuffle(Op.getOpcode()))
return false;
SmallVector<int, 16> OpMask;
bool IsUnary;
bool HaveMask = getTargetShuffleMask(Op.getNode(), VT, OpMask, IsUnary);
if (!HaveMask || !IsUnary)
return false;
assert(VT.getVectorNumElements() == OpMask.size() &&
"Different mask size from vector size!");
assert(((RootMask.size() > OpMask.size() &&
RootMask.size() % OpMask.size() == 0) ||
(OpMask.size() > RootMask.size() &&
OpMask.size() % RootMask.size() == 0) ||
OpMask.size() == RootMask.size()) &&
"The smaller number of elements must divide the larger.");
int RootRatio = std::max<int>(1, OpMask.size() / RootMask.size());
int OpRatio = std::max<int>(1, RootMask.size() / OpMask.size());
assert(((RootRatio == 1 && OpRatio == 1) ||
(RootRatio == 1) != (OpRatio == 1)) &&
"Must not have a ratio for both incoming and op masks!");
SmallVector<int, 16> Mask;
Mask.reserve(std::max(OpMask.size(), RootMask.size()));
for (int i = 0, e = std::max(OpMask.size(), RootMask.size()); i < e; ++i) {
int RootIdx = i / RootRatio;
if (RootMask[RootIdx] < 0) {
Mask.push_back(RootMask[RootIdx]);
continue;
}
int RootMaskedIdx = RootMask[RootIdx] * RootRatio + i % RootRatio;
int OpIdx = RootMaskedIdx / OpRatio;
if (OpMask[OpIdx] < 0) {
Mask.push_back(OpMask[OpIdx]);
continue;
}
Mask.push_back(OpMask[OpIdx] * OpRatio +
RootMaskedIdx % OpRatio);
}
switch (Op.getOpcode()) {
case X86ISD::PSHUFB:
HasPSHUFB = true;
case X86ISD::PSHUFD:
case X86ISD::PSHUFHW:
case X86ISD::PSHUFLW:
if (Op.getOperand(0).hasOneUse() &&
combineX86ShufflesRecursively(Op.getOperand(0), Root, Mask, Depth + 1,
HasPSHUFB, DAG, DCI, Subtarget))
return true;
break;
case X86ISD::UNPCKL:
case X86ISD::UNPCKH:
assert(Op.getOperand(0) == Op.getOperand(1) && "We only combine unary shuffles!");
if (Op->isOnlyUserOf(Op.getOperand(0).getNode()) &&
combineX86ShufflesRecursively(Op.getOperand(0), Root, Mask, Depth + 1,
HasPSHUFB, DAG, DCI, Subtarget))
return true;
break;
}
SmallVector<int, 16> WidenedMask;
while (Mask.size() > 1 && canWidenShuffleElements(Mask, WidenedMask)) {
Mask = std::move(WidenedMask);
WidenedMask.clear();
}
return combineX86ShuffleChain(Op, Root, Mask, Depth, HasPSHUFB, DAG, DCI,
Subtarget);
}
static SmallVector<int, 4> getPSHUFShuffleMask(SDValue N) {
SmallVector<int, 4> Mask;
bool IsUnary;
bool HaveMask = getTargetShuffleMask(N.getNode(), N.getSimpleValueType(), Mask, IsUnary);
(void)HaveMask;
assert(HaveMask);
switch (N.getOpcode()) {
case X86ISD::PSHUFD:
return Mask;
case X86ISD::PSHUFLW:
Mask.resize(4);
return Mask;
case X86ISD::PSHUFHW:
Mask.erase(Mask.begin(), Mask.begin() + 4);
for (int &M : Mask)
M -= 4;
return Mask;
default:
llvm_unreachable("No valid shuffle instruction found!");
}
}
static SDValue
combineRedundantDWordShuffle(SDValue N, MutableArrayRef<int> Mask,
SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
assert(N.getOpcode() == X86ISD::PSHUFD &&
"Called with something other than an x86 128-bit half shuffle!");
SDLoc DL(N);
SmallVector<SDValue, 8> Chain;
SDValue V = N.getOperand(0);
for (; V.hasOneUse(); V = V.getOperand(0)) {
switch (V.getOpcode()) {
default:
return SDValue();
case ISD::BITCAST:
continue;
case X86ISD::PSHUFD:
break;
case X86ISD::PSHUFLW:
if (Mask[0] != 0 || Mask[1] != 1 ||
!(Mask[2] >= 2 && Mask[2] < 4 && Mask[3] >= 2 && Mask[3] < 4))
return SDValue();
Chain.push_back(V);
continue;
case X86ISD::PSHUFHW:
if (Mask[2] != 2 || Mask[3] != 3 ||
!(Mask[0] >= 0 && Mask[0] < 2 && Mask[1] >= 0 && Mask[1] < 2))
return SDValue();
Chain.push_back(V);
continue;
case X86ISD::UNPCKL:
case X86ISD::UNPCKH:
if (V.getValueType() != MVT::v16i8 && V.getValueType() != MVT::v8i16)
return SDValue();
unsigned CombineOp =
V.getOpcode() == X86ISD::UNPCKL ? X86ISD::PSHUFLW : X86ISD::PSHUFHW;
if (V.getOperand(0) != V.getOperand(1) ||
!V->isOnlyUserOf(V.getOperand(0).getNode()))
return SDValue();
Chain.push_back(V);
V = V.getOperand(0);
do {
switch (V.getOpcode()) {
default:
return SDValue();
case X86ISD::PSHUFLW:
case X86ISD::PSHUFHW:
if (V.getOpcode() == CombineOp)
break;
Chain.push_back(V);
case ISD::BITCAST:
V = V.getOperand(0);
continue;
}
break;
} while (V.hasOneUse());
break;
}
break;
}
if (!V.hasOneUse())
return SDValue();
SmallVector<int, 4> VMask = getPSHUFShuffleMask(V);
for (int &M : Mask)
M = VMask[M];
V = DAG.getNode(V.getOpcode(), DL, V.getValueType(), V.getOperand(0),
getV4X86ShuffleImm8ForMask(Mask, DAG));
while (!Chain.empty()) {
SDValue W = Chain.pop_back_val();
if (V.getValueType() != W.getOperand(0).getValueType())
V = DAG.getNode(ISD::BITCAST, DL, W.getOperand(0).getValueType(), V);
switch (W.getOpcode()) {
default:
llvm_unreachable("Only PSHUF and UNPCK instructions get here!");
case X86ISD::UNPCKL:
case X86ISD::UNPCKH:
V = DAG.getNode(W.getOpcode(), DL, W.getValueType(), V, V);
break;
case X86ISD::PSHUFD:
case X86ISD::PSHUFLW:
case X86ISD::PSHUFHW:
V = DAG.getNode(W.getOpcode(), DL, W.getValueType(), V, W.getOperand(1));
break;
}
}
if (V.getValueType() != N.getValueType())
V = DAG.getNode(ISD::BITCAST, DL, N.getValueType(), V);
return V;
}
static bool combineRedundantHalfShuffle(SDValue N, MutableArrayRef<int> Mask,
SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
assert(
(N.getOpcode() == X86ISD::PSHUFLW || N.getOpcode() == X86ISD::PSHUFHW) &&
"Called with something other than an x86 128-bit half shuffle!");
SDLoc DL(N);
unsigned CombineOpcode = N.getOpcode();
SDValue V = N.getOperand(0);
for (; V.hasOneUse(); V = V.getOperand(0)) {
switch (V.getOpcode()) {
default:
return false;
case ISD::BITCAST:
continue;
case X86ISD::PSHUFLW:
case X86ISD::PSHUFHW:
if (V.getOpcode() == CombineOpcode)
break;
continue;
}
break;
}
if (!V.hasOneUse())
return false;
DCI.CombineTo(N.getNode(), N.getOperand(0), true);
SDValue Old = V;
SmallVector<int, 4> VMask = getPSHUFShuffleMask(V);
for (int &M : Mask)
M = VMask[M];
V = DAG.getNode(V.getOpcode(), DL, MVT::v8i16, V.getOperand(0),
getV4X86ShuffleImm8ForMask(Mask, DAG));
if (Old != V)
DCI.CombineTo(Old.getNode(), V, true);
return true;
}
static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
SDLoc DL(N);
MVT VT = N.getSimpleValueType();
SmallVector<int, 4> Mask;
switch (N.getOpcode()) {
case X86ISD::PSHUFD:
case X86ISD::PSHUFLW:
case X86ISD::PSHUFHW:
Mask = getPSHUFShuffleMask(N);
assert(Mask.size() == 4);
break;
default:
return SDValue();
}
if (isNoopShuffleMask(Mask))
return DCI.CombineTo(N.getNode(), N.getOperand(0), true);
SDValue V = N.getOperand(0);
switch (N.getOpcode()) {
default:
break;
case X86ISD::PSHUFLW:
case X86ISD::PSHUFHW:
assert(VT == MVT::v8i16);
(void)VT;
if (combineRedundantHalfShuffle(N, Mask, DAG, DCI))
return SDValue();
if (Mask[0] == 2 && Mask[1] == 3 && Mask[2] == 0 && Mask[3] == 1) {
int DMask[] = {0, 1, 2, 3};
int DOffset = N.getOpcode() == X86ISD::PSHUFLW ? 0 : 2;
DMask[DOffset + 0] = DOffset + 1;
DMask[DOffset + 1] = DOffset + 0;
V = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, V);
DCI.AddToWorklist(V.getNode());
V = DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V,
getV4X86ShuffleImm8ForMask(DMask, DAG));
DCI.AddToWorklist(V.getNode());
return DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V);
}
if (Mask[0] == Mask[1] && Mask[2] == Mask[3] &&
(V.getOpcode() == X86ISD::PSHUFLW ||
V.getOpcode() == X86ISD::PSHUFHW) &&
V.getOpcode() != N.getOpcode() &&
V.hasOneUse()) {
SDValue D = V.getOperand(0);
while (D.getOpcode() == ISD::BITCAST && D.hasOneUse())
D = D.getOperand(0);
if (D.getOpcode() == X86ISD::PSHUFD && D.hasOneUse()) {
SmallVector<int, 4> VMask = getPSHUFShuffleMask(V);
SmallVector<int, 4> DMask = getPSHUFShuffleMask(D);
int NOffset = N.getOpcode() == X86ISD::PSHUFLW ? 0 : 4;
int VOffset = V.getOpcode() == X86ISD::PSHUFLW ? 0 : 4;
int WordMask[8];
for (int i = 0; i < 4; ++i) {
WordMask[i + NOffset] = Mask[i] + NOffset;
WordMask[i + VOffset] = VMask[i] + VOffset;
}
int MappedMask[8];
for (int i = 0; i < 8; ++i)
MappedMask[i] = 2 * DMask[WordMask[i] / 2] + WordMask[i] % 2;
const int UnpackLoMask[] = {0, 0, 1, 1, 2, 2, 3, 3};
const int UnpackHiMask[] = {4, 4, 5, 5, 6, 6, 7, 7};
if (std::equal(std::begin(MappedMask), std::end(MappedMask),
std::begin(UnpackLoMask)) ||
std::equal(std::begin(MappedMask), std::end(MappedMask),
std::begin(UnpackHiMask))) {
V = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, D.getOperand(0));
DCI.AddToWorklist(V.getNode());
return DAG.getNode(MappedMask[0] == 0 ? X86ISD::UNPCKL
: X86ISD::UNPCKH,
DL, MVT::v8i16, V, V);
}
}
}
break;
case X86ISD::PSHUFD:
if (SDValue NewN = combineRedundantDWordShuffle(N, Mask, DAG, DCI))
return NewN;
break;
}
return SDValue();
}
static SDValue combineShuffleToAddSub(SDNode *N, SelectionDAG &DAG) {
SDLoc DL(N);
EVT VT = N->getValueType(0);
if (N->getOpcode() != ISD::VECTOR_SHUFFLE)
return SDValue();
auto *SVN = cast<ShuffleVectorSDNode>(N);
ArrayRef<int> Mask = SVN->getMask();
SDValue V1 = N->getOperand(0);
SDValue V2 = N->getOperand(1);
if (V1->getOpcode() != ISD::FSUB || V2->getOpcode() != ISD::FADD)
return SDValue();
if (!V1->hasOneUse() || !V2->hasOneUse())
return SDValue();
SDValue LHS = V1->getOperand(0), RHS = V1->getOperand(1);
if ((V2->getOperand(0) != LHS || V2->getOperand(1) != RHS) &&
(V2->getOperand(0) != RHS || V2->getOperand(1) != LHS))
return SDValue();
if (!(isShuffleEquivalent(Mask, 0, 3) ||
isShuffleEquivalent(Mask, 0, 5, 2, 7) ||
isShuffleEquivalent(Mask, 0, 9, 2, 11, 4, 13, 6, 15)))
return SDValue();
assert((VT == MVT::v4f32 || VT == MVT::v2f64 || VT == MVT::v8f32 ||
VT == MVT::v4f64) &&
"Unknown vector type encountered!");
return DAG.getNode(X86ISD::ADDSUB, DL, VT, LHS, RHS);
}
static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
SDLoc dl(N);
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!DCI.isBeforeLegalize() && !TLI.isTypeLegal(VT.getVectorElementType()))
return SDValue();
if (TLI.isTypeLegal(VT) && Subtarget->hasSSE3())
if (SDValue AddSub = combineShuffleToAddSub(N, DAG))
return AddSub;
if (Subtarget->hasFp256() && VT.is256BitVector() &&
N->getOpcode() == ISD::VECTOR_SHUFFLE)
return PerformShuffleCombine256(N, DAG, DCI, Subtarget);
if (!DCI.isBeforeLegalize() && DCI.isBeforeLegalizeOps() &&
N1.getOpcode() == ISD::UNDEF && N0.hasOneUse() &&
N0.getOpcode() == ISD::BITCAST) {
SDValue BC0 = N0.getOperand(0);
EVT SVT = BC0.getValueType();
unsigned Opcode = BC0.getOpcode();
unsigned NumElts = VT.getVectorNumElements();
if (BC0.hasOneUse() && SVT.isVector() &&
SVT.getVectorNumElements() * 2 == NumElts &&
TLI.isOperationLegal(Opcode, VT)) {
bool CanFold = false;
switch (Opcode) {
default : break;
case ISD::ADD :
case ISD::FADD :
case ISD::SUB :
case ISD::FSUB :
case ISD::MUL :
case ISD::FMUL :
CanFold = true;
}
unsigned SVTNumElts = SVT.getVectorNumElements();
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
for (unsigned i = 0, e = SVTNumElts; i != e && CanFold; ++i)
CanFold = SVOp->getMaskElt(i) == (int)(i * 2);
for (unsigned i = SVTNumElts, e = NumElts; i != e && CanFold; ++i)
CanFold = SVOp->getMaskElt(i) < 0;
if (CanFold) {
SDValue BC00 = DAG.getNode(ISD::BITCAST, dl, VT, BC0.getOperand(0));
SDValue BC01 = DAG.getNode(ISD::BITCAST, dl, VT, BC0.getOperand(1));
SDValue NewBinOp = DAG.getNode(BC0.getOpcode(), dl, VT, BC00, BC01);
return DAG.getVectorShuffle(VT, dl, NewBinOp, N1, &SVOp->getMask()[0]);
}
}
}
if (!VT.is128BitVector())
return SDValue();
SmallVector<SDValue, 16> Elts;
for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
Elts.push_back(getShuffleScalarElt(N, i, DAG, 0));
SDValue LD = EltsFromConsecutiveLoads(VT, Elts, dl, DAG, true);
if (LD.getNode())
return LD;
if (isTargetShuffle(N->getOpcode())) {
SDValue Shuffle =
PerformTargetShuffleCombine(SDValue(N, 0), DAG, DCI, Subtarget);
if (Shuffle.getNode())
return Shuffle;
SmallVector<int, 1> NonceMask; NonceMask.push_back(0);
if (combineX86ShufflesRecursively(SDValue(N, 0), SDValue(N, 0), NonceMask,
1, false, DAG,
DCI, Subtarget))
return SDValue(); }
return SDValue();
}
static SDValue PerformTruncateCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
return SDValue();
}
static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
if (DCI.isBeforeLegalizeOps())
return SDValue();
SDValue InVec = N->getOperand(0);
SDValue EltNo = N->getOperand(1);
if (!isa<ConstantSDNode>(EltNo))
return SDValue();
EVT OriginalVT = InVec.getValueType();
if (InVec.getOpcode() == ISD::BITCAST) {
if (!InVec.hasOneUse())
return SDValue();
EVT BCVT = InVec.getOperand(0).getValueType();
if (BCVT.getVectorNumElements() != OriginalVT.getVectorNumElements())
return SDValue();
InVec = InVec.getOperand(0);
}
EVT CurrentVT = InVec.getValueType();
if (!isTargetShuffle(InVec.getOpcode()))
return SDValue();
if (!InVec.hasOneUse())
return SDValue();
SmallVector<int, 16> ShuffleMask;
bool UnaryShuffle;
if (!getTargetShuffleMask(InVec.getNode(), CurrentVT.getSimpleVT(),
ShuffleMask, UnaryShuffle))
return SDValue();
unsigned NumElems = CurrentVT.getVectorNumElements();
int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
int Idx = (Elt > (int)NumElems) ? -1 : ShuffleMask[Elt];
SDValue LdNode = (Idx < (int)NumElems) ? InVec.getOperand(0)
: InVec.getOperand(1);
unsigned AllowedUses = InVec.getNumOperands() > 1 &&
InVec.getOperand(0) == InVec.getOperand(1) ? 2 : 1;
if (LdNode.getOpcode() == ISD::BITCAST) {
if (!LdNode.getNode()->hasNUsesOfValue(AllowedUses, 0))
return SDValue();
AllowedUses = 1; LdNode = LdNode.getOperand(0);
}
if (!ISD::isNormalLoad(LdNode.getNode()))
return SDValue();
LoadSDNode *LN0 = cast<LoadSDNode>(LdNode);
if (!LN0 ||!LN0->hasNUsesOfValue(AllowedUses, 0) || LN0->isVolatile())
return SDValue();
EVT EltVT = N->getValueType(0);
unsigned Align = LN0->getAlignment();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
unsigned NewAlign = TLI.getDataLayout()->getABITypeAlignment(
EltVT.getTypeForEVT(*DAG.getContext()));
if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, EltVT))
return SDValue();
SDLoc dl(N);
SDValue Shuffle = (UnaryShuffle) ? DAG.getUNDEF(CurrentVT)
: InVec.getOperand(1);
Shuffle = DAG.getVectorShuffle(CurrentVT, dl,
InVec.getOperand(0), Shuffle,
&ShuffleMask[0]);
Shuffle = DAG.getNode(ISD::BITCAST, dl, OriginalVT, Shuffle);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0), Shuffle,
EltNo);
}
static SDValue PerformBITCASTCombine(SDNode *N, SelectionDAG &DAG) {
if (N->getValueType(0) != MVT::x86mmx ||
N->getOperand(0)->getOpcode() != ISD::BUILD_VECTOR ||
N->getOperand(0)->getValueType(0) != MVT::v2i32)
return SDValue();
SDValue V = N->getOperand(0);
ConstantSDNode *C = dyn_cast<ConstantSDNode>(V.getOperand(1));
if (C && C->getZExtValue() == 0 && V.getOperand(0).getValueType() == MVT::i32)
return DAG.getNode(X86ISD::MMX_MOVW2D, SDLoc(V.getOperand(0)),
N->getValueType(0), V.getOperand(0));
return SDValue();
}
static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
SDValue NewOp = XFormVExtractWithShuffleIntoLoad(N, DAG, DCI);
if (NewOp.getNode())
return NewOp;
SDValue InputVector = N->getOperand(0);
if (InputVector.getOpcode() == ISD::BITCAST && InputVector.hasOneUse() &&
N->getValueType(0) == MVT::i32 &&
InputVector.getValueType() == MVT::v2i32) {
SDValue MMXSrc = InputVector.getNode()->getOperand(0);
if (MMXSrc.getValueType() == MVT::x86mmx)
return DAG.getNode(X86ISD::MMX_MOVD2W, SDLoc(InputVector),
N->getValueType(0),
InputVector.getNode()->getOperand(0));
SDValue MMXSrcOp = MMXSrc.getOperand(0);
if (MMXSrc.getOpcode() == ISD::EXTRACT_VECTOR_ELT && MMXSrc.hasOneUse() &&
MMXSrc.getValueType() == MVT::i64 && MMXSrcOp.hasOneUse() &&
MMXSrcOp.getOpcode() == ISD::BITCAST &&
MMXSrcOp.getValueType() == MVT::v1i64 &&
MMXSrcOp.getOperand(0).getValueType() == MVT::x86mmx)
return DAG.getNode(X86ISD::MMX_MOVD2W, SDLoc(InputVector),
N->getValueType(0),
MMXSrcOp.getOperand(0));
}
if (InputVector.getValueType() != MVT::v4i32)
return SDValue();
SmallVector<SDNode *, 4> Uses;
unsigned ExtractedElements = 0;
for (SDNode::use_iterator UI = InputVector.getNode()->use_begin(),
UE = InputVector.getNode()->use_end(); UI != UE; ++UI) {
if (UI.getUse().getResNo() != InputVector.getResNo())
return SDValue();
SDNode *Extract = *UI;
if (Extract->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
return SDValue();
if (Extract->getValueType(0) != MVT::i32)
return SDValue();
if (!Extract->hasOneUse())
return SDValue();
if (Extract->use_begin()->getOpcode() != ISD::SIGN_EXTEND &&
Extract->use_begin()->getOpcode() != ISD::ZERO_EXTEND)
return SDValue();
if (!isa<ConstantSDNode>(Extract->getOperand(1)))
return SDValue();
ExtractedElements |=
1 << cast<ConstantSDNode>(Extract->getOperand(1))->getZExtValue();
Uses.push_back(Extract);
}
if (ExtractedElements != 15)
return SDValue();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue Vals[4];
SDLoc dl(InputVector);
if (TLI.isOperationLegal(ISD::SRA, MVT::i64)) {
SDValue Cst = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, InputVector);
EVT VecIdxTy = DAG.getTargetLoweringInfo().getVectorIdxTy();
SDValue BottomHalf = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Cst,
DAG.getConstant(0, VecIdxTy));
SDValue TopHalf = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Cst,
DAG.getConstant(1, VecIdxTy));
SDValue ShAmt = DAG.getConstant(32,
DAG.getTargetLoweringInfo().getShiftAmountTy(MVT::i64));
Vals[0] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BottomHalf);
Vals[1] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
DAG.getNode(ISD::SRA, dl, MVT::i64, BottomHalf, ShAmt));
Vals[2] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, TopHalf);
Vals[3] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
DAG.getNode(ISD::SRA, dl, MVT::i64, TopHalf, ShAmt));
} else {
SDValue StackPtr = DAG.CreateStackTemporary(InputVector.getValueType());
SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, InputVector, StackPtr,
MachinePointerInfo(), false, false, 0);
EVT ElementType = InputVector.getValueType().getVectorElementType();
unsigned EltSize = ElementType.getSizeInBits() / 8;
for (unsigned i = 0; i < 4; ++i) {
uint64_t Offset = EltSize * i;
SDValue OffsetVal = DAG.getConstant(Offset, TLI.getPointerTy());
SDValue ScalarAddr = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(),
StackPtr, OffsetVal);
Vals[i] = DAG.getLoad(ElementType, dl, Ch,
ScalarAddr, MachinePointerInfo(),
false, false, false, 0);
}
}
for (SmallVectorImpl<SDNode *>::iterator UI = Uses.begin(),
UE = Uses.end(); UI != UE; ++UI) {
SDNode *Extract = *UI;
SDValue Idx = Extract->getOperand(1);
uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
DAG.ReplaceAllUsesOfValueWith(SDValue(Extract, 0), Vals[IdxVal]);
}
return SDValue();
}
static std::pair<unsigned, bool>
matchIntegerMINMAX(SDValue Cond, EVT VT, SDValue LHS, SDValue RHS,
SelectionDAG &DAG, const X86Subtarget *Subtarget) {
if (!VT.isVector())
return std::make_pair(0, false);
bool NeedSplit = false;
switch (VT.getSimpleVT().SimpleTy) {
default: return std::make_pair(0, false);
case MVT::v4i64:
case MVT::v2i64:
if (!Subtarget->hasVLX())
return std::make_pair(0, false);
break;
case MVT::v64i8:
case MVT::v32i16:
if (!Subtarget->hasBWI())
return std::make_pair(0, false);
break;
case MVT::v16i32:
case MVT::v8i64:
if (!Subtarget->hasAVX512())
return std::make_pair(0, false);
break;
case MVT::v32i8:
case MVT::v16i16:
case MVT::v8i32:
if (!Subtarget->hasAVX2())
NeedSplit = true;
if (!Subtarget->hasAVX())
return std::make_pair(0, false);
break;
case MVT::v16i8:
case MVT::v8i16:
case MVT::v4i32:
if (!Subtarget->hasSSE2())
return std::make_pair(0, false);
}
bool hasUnsigned = Subtarget->hasSSE41() ||
(Subtarget->hasSSE2() && VT == MVT::v16i8);
bool hasSigned = Subtarget->hasSSE41() ||
(Subtarget->hasSSE2() && VT == MVT::v8i16);
ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
unsigned Opc = 0;
if (DAG.isEqualTo(LHS, Cond.getOperand(0)) &&
DAG.isEqualTo(RHS, Cond.getOperand(1))) {
switch (CC) {
default: break;
case ISD::SETULT:
case ISD::SETULE:
Opc = hasUnsigned ? X86ISD::UMIN : 0; break;
case ISD::SETUGT:
case ISD::SETUGE:
Opc = hasUnsigned ? X86ISD::UMAX : 0; break;
case ISD::SETLT:
case ISD::SETLE:
Opc = hasSigned ? X86ISD::SMIN : 0; break;
case ISD::SETGT:
case ISD::SETGE:
Opc = hasSigned ? X86ISD::SMAX : 0; break;
}
} else if (DAG.isEqualTo(LHS, Cond.getOperand(1)) &&
DAG.isEqualTo(RHS, Cond.getOperand(0))) {
switch (CC) {
default: break;
case ISD::SETULT:
case ISD::SETULE:
Opc = hasUnsigned ? X86ISD::UMAX : 0; break;
case ISD::SETUGT:
case ISD::SETUGE:
Opc = hasUnsigned ? X86ISD::UMIN : 0; break;
case ISD::SETLT:
case ISD::SETLE:
Opc = hasSigned ? X86ISD::SMAX : 0; break;
case ISD::SETGT:
case ISD::SETGE:
Opc = hasSigned ? X86ISD::SMIN : 0; break;
}
}
return std::make_pair(Opc, NeedSplit);
}
static SDValue
transformVSELECTtoBlendVECTOR_SHUFFLE(SDNode *N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
SDLoc dl(N);
SDValue Cond = N->getOperand(0);
SDValue LHS = N->getOperand(1);
SDValue RHS = N->getOperand(2);
if (Cond.getOpcode() == ISD::SIGN_EXTEND) {
SDValue CondSrc = Cond->getOperand(0);
if (CondSrc->getOpcode() == ISD::SIGN_EXTEND_INREG)
Cond = CondSrc->getOperand(0);
}
if (!ISD::isBuildVectorOfConstantSDNodes(Cond.getNode()))
return SDValue();
if (ISD::isBuildVectorOfConstantSDNodes(LHS.getNode()) &&
ISD::isBuildVectorOfConstantSDNodes(RHS.getNode()))
return SDValue();
unsigned MaskValue = 0;
if (!BUILD_VECTORtoBlendMask(cast<BuildVectorSDNode>(Cond), MaskValue))
return SDValue();
MVT VT = N->getSimpleValueType(0);
unsigned NumElems = VT.getVectorNumElements();
SmallVector<int, 8> ShuffleMask(NumElems, -1);
for (unsigned i = 0; i < NumElems; ++i) {
if (Cond.getOperand(i)->getOpcode() == ISD::UNDEF)
ShuffleMask[i] = -1;
else
ShuffleMask[i] = i + NumElems * ((MaskValue >> i) & 1);
}
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!TLI.isShuffleMaskLegal(ShuffleMask, VT))
return SDValue();
return DAG.getVectorShuffle(VT, dl, LHS, RHS, &ShuffleMask[0]);
}
static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
SDLoc DL(N);
SDValue Cond = N->getOperand(0);
SDValue LHS = N->getOperand(1);
SDValue RHS = N->getOperand(2);
EVT VT = LHS.getValueType();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (Cond.getOpcode() == ISD::SETCC && VT.isFloatingPoint() &&
VT != MVT::f80 && (TLI.isTypeLegal(VT) || VT == MVT::v2f32) &&
(Subtarget->hasSSE2() ||
(Subtarget->hasSSE1() && VT.getScalarType() == MVT::f32))) {
ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
unsigned Opcode = 0;
if (DAG.isEqualTo(LHS, Cond.getOperand(0)) &&
DAG.isEqualTo(RHS, Cond.getOperand(1))) {
switch (CC) {
default: break;
case ISD::SETULT:
if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) {
if (!DAG.getTarget().Options.UnsafeFPMath &&
!(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
break;
std::swap(LHS, RHS);
}
Opcode = X86ISD::FMIN;
break;
case ISD::SETOLE:
if (!DAG.getTarget().Options.UnsafeFPMath &&
!DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS))
break;
Opcode = X86ISD::FMIN;
break;
case ISD::SETULE:
std::swap(LHS, RHS);
case ISD::SETOLT:
case ISD::SETLT:
case ISD::SETLE:
Opcode = X86ISD::FMIN;
break;
case ISD::SETOGE:
if (!DAG.getTarget().Options.UnsafeFPMath &&
!DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS))
break;
Opcode = X86ISD::FMAX;
break;
case ISD::SETUGT:
if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) {
if (!DAG.getTarget().Options.UnsafeFPMath &&
!(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
break;
std::swap(LHS, RHS);
}
Opcode = X86ISD::FMAX;
break;
case ISD::SETUGE:
std::swap(LHS, RHS);
case ISD::SETOGT:
case ISD::SETGT:
case ISD::SETGE:
Opcode = X86ISD::FMAX;
break;
}
} else if (DAG.isEqualTo(LHS, Cond.getOperand(1)) &&
DAG.isEqualTo(RHS, Cond.getOperand(0))) {
switch (CC) {
default: break;
case ISD::SETOGE:
if (!DAG.getTarget().Options.UnsafeFPMath &&
!(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) {
if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))
break;
std::swap(LHS, RHS);
}
Opcode = X86ISD::FMIN;
break;
case ISD::SETUGT:
if (!DAG.getTarget().Options.UnsafeFPMath &&
(!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)))
break;
Opcode = X86ISD::FMIN;
break;
case ISD::SETUGE:
std::swap(LHS, RHS);
case ISD::SETOGT:
case ISD::SETGT:
case ISD::SETGE:
Opcode = X86ISD::FMIN;
break;
case ISD::SETULT:
if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))
break;
Opcode = X86ISD::FMAX;
break;
case ISD::SETOLE:
if (!DAG.getTarget().Options.UnsafeFPMath &&
!DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS)) {
if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))
break;
std::swap(LHS, RHS);
}
Opcode = X86ISD::FMAX;
break;
case ISD::SETULE:
std::swap(LHS, RHS);
case ISD::SETOLT:
case ISD::SETLT:
case ISD::SETLE:
Opcode = X86ISD::FMAX;
break;
}
}
if (Opcode)
return DAG.getNode(Opcode, DL, N->getValueType(0), LHS, RHS);
}
EVT CondVT = Cond.getValueType();
if (Subtarget->hasAVX512() && VT.isVector() && CondVT.isVector() &&
CondVT.getVectorElementType() == MVT::i1) {
EVT OpVT = LHS.getValueType();
if ((OpVT.is128BitVector() || OpVT.is256BitVector()) &&
(OpVT.getVectorElementType() == MVT::i8 ||
OpVT.getVectorElementType() == MVT::i16) &&
!(Subtarget->hasBWI() && Subtarget->hasVLX())) {
Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, OpVT, Cond);
DCI.AddToWorklist(Cond.getNode());
return DAG.getNode(N->getOpcode(), DL, OpVT, Cond, LHS, RHS);
}
}
if (ConstantSDNode *TrueC = dyn_cast<ConstantSDNode>(LHS)) {
if (ConstantSDNode *FalseC = dyn_cast<ConstantSDNode>(RHS))
if (DAG.getTargetLoweringInfo().isTypeLegal(LHS.getValueType())) {
bool NeedsCondInvert = false;
if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue()) &&
(Cond.getOpcode() == ISD::SETCC || (Cond.getOpcode() == ISD::XOR && isa<ConstantSDNode>(Cond.getOperand(1))))) {
NeedsCondInvert = true;
std::swap(TrueC, FalseC);
}
if (FalseC->getAPIntValue() == 0 &&
TrueC->getAPIntValue().isPowerOf2()) {
if (NeedsCondInvert) Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
DAG.getConstant(1, Cond.getValueType()));
Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, LHS.getValueType(), Cond);
unsigned ShAmt = TrueC->getAPIntValue().logBase2();
return DAG.getNode(ISD::SHL, DL, LHS.getValueType(), Cond,
DAG.getConstant(ShAmt, MVT::i8));
}
if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) {
if (NeedsCondInvert) Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
DAG.getConstant(1, Cond.getValueType()));
Cond = DAG.getNode(ISD::ZERO_EXTEND, DL,
FalseC->getValueType(0), Cond);
return DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
SDValue(FalseC, 0));
}
if (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64) {
uint64_t Diff = TrueC->getZExtValue()-FalseC->getZExtValue();
if (N->getValueType(0) == MVT::i32) Diff = (unsigned)Diff;
bool isFastMultiplier = false;
if (Diff < 10) {
switch ((unsigned char)Diff) {
default: break;
case 1: case 2: case 3: case 4: case 5: case 8: case 9: isFastMultiplier = true;
break;
}
}
if (isFastMultiplier) {
APInt Diff = TrueC->getAPIntValue()-FalseC->getAPIntValue();
if (NeedsCondInvert) Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
DAG.getConstant(1, Cond.getValueType()));
Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0),
Cond);
if (Diff != 1)
Cond = DAG.getNode(ISD::MUL, DL, Cond.getValueType(), Cond,
DAG.getConstant(Diff, Cond.getValueType()));
if (FalseC->getAPIntValue() != 0)
Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
SDValue(FalseC, 0));
return Cond;
}
}
}
}
if (N->getOpcode() == ISD::SELECT && Cond.getOpcode() == ISD::SETCC &&
DAG.isEqualTo(LHS, Cond.getOperand(0)) &&
DAG.isEqualTo(RHS, Cond.getOperand(1))) {
ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
switch (CC) {
default: break;
case ISD::SETLT:
case ISD::SETGT: {
ISD::CondCode NewCC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGE;
Cond = DAG.getSetCC(SDLoc(Cond), Cond.getValueType(),
Cond.getOperand(0), Cond.getOperand(1), NewCC);
return DAG.getNode(ISD::SELECT, DL, VT, Cond, LHS, RHS);
}
}
}
if (!TLI.isTypeLegal(VT))
return SDValue();
if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC &&
((Subtarget->hasSSE2() && (VT == MVT::v16i8 || VT == MVT::v8i16)) ||
(Subtarget->hasAVX2() && (VT == MVT::v32i8 || VT == MVT::v16i16)))) {
ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
SDValue Other;
if (ISD::isBuildVectorAllZeros(LHS.getNode())) {
Other = RHS;
CC = ISD::getSetCCInverse(CC, true);
} else if (ISD::isBuildVectorAllZeros(RHS.getNode())) {
Other = LHS;
}
if (Other.getNode() && Other->getNumOperands() == 2 &&
DAG.isEqualTo(Other->getOperand(0), Cond.getOperand(0))) {
SDValue OpLHS = Other->getOperand(0), OpRHS = Other->getOperand(1);
SDValue CondRHS = Cond->getOperand(1);
if ((CC == ISD::SETUGE || CC == ISD::SETUGT) &&
Other->getOpcode() == ISD::SUB && DAG.isEqualTo(OpRHS, CondRHS))
return DAG.getNode(X86ISD::SUBUS, DL, VT, OpLHS, OpRHS);
if (auto *OpRHSBV = dyn_cast<BuildVectorSDNode>(OpRHS))
if (auto *OpRHSConst = OpRHSBV->getConstantSplatNode()) {
if (auto *CondRHSBV = dyn_cast<BuildVectorSDNode>(CondRHS))
if (auto *CondRHSConst = CondRHSBV->getConstantSplatNode())
if (CC == ISD::SETUGT && Other->getOpcode() == ISD::ADD &&
CondRHSConst->getAPIntValue() ==
(-OpRHSConst->getAPIntValue() - 1))
return DAG.getNode(
X86ISD::SUBUS, DL, VT, OpLHS,
DAG.getConstant(-OpRHSConst->getAPIntValue(), VT));
if (CC == ISD::SETLT && Other->getOpcode() == ISD::XOR &&
ISD::isBuildVectorAllZeros(CondRHS.getNode()) &&
OpRHSConst->getAPIntValue().isSignBit())
return DAG.getNode(
X86ISD::SUBUS, DL, VT, OpLHS,
DAG.getConstant(OpRHSConst->getAPIntValue(), VT));
}
}
}
if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC) {
std::pair<unsigned, bool> ret = matchIntegerMINMAX(Cond, VT, LHS, RHS, DAG, Subtarget);
unsigned Opc = ret.first;
bool NeedSplit = ret.second;
if (Opc && NeedSplit) {
unsigned NumElems = VT.getVectorNumElements();
SDValue LHS1 = Extract128BitVector(LHS, 0, DAG, DL);
SDValue LHS2 = Extract128BitVector(LHS, NumElems/2, DAG, DL);
SDValue RHS1 = Extract128BitVector(RHS, 0, DAG, DL);
SDValue RHS2 = Extract128BitVector(RHS, NumElems/2, DAG, DL);
LHS = DAG.getNode(Opc, DL, LHS1.getValueType(), LHS1, RHS1);
RHS = DAG.getNode(Opc, DL, LHS2.getValueType(), LHS2, RHS2);
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LHS, RHS);
} else if (Opc)
return DAG.getNode(Opc, DL, VT, LHS, RHS);
}
if (N->getOpcode() == ISD::VSELECT && CondVT == VT) {
assert(Cond.getValueType().isVector() &&
"vector select expects a vector selector!");
bool TValIsAllOnes = ISD::isBuildVectorAllOnes(LHS.getNode());
bool FValIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
if (!TValIsAllOnes && !FValIsAllZeros &&
Cond.getOpcode() == ISD::SETCC &&
TLI.getSetCCResultType(*DAG.getContext(), VT) == CondVT) {
bool TValIsAllZeros = ISD::isBuildVectorAllZeros(LHS.getNode());
bool FValIsAllOnes = ISD::isBuildVectorAllOnes(RHS.getNode());
if (TValIsAllZeros || FValIsAllOnes) {
SDValue CC = Cond.getOperand(2);
ISD::CondCode NewCC =
ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
Cond.getOperand(0).getValueType().isInteger());
Cond = DAG.getSetCC(DL, CondVT, Cond.getOperand(0), Cond.getOperand(1), NewCC);
std::swap(LHS, RHS);
TValIsAllOnes = FValIsAllOnes;
FValIsAllZeros = TValIsAllZeros;
}
}
if (TValIsAllOnes || FValIsAllZeros) {
SDValue Ret;
if (TValIsAllOnes && FValIsAllZeros)
Ret = Cond;
else if (TValIsAllOnes)
Ret = DAG.getNode(ISD::OR, DL, CondVT, Cond,
DAG.getNode(ISD::BITCAST, DL, CondVT, RHS));
else if (FValIsAllZeros)
Ret = DAG.getNode(ISD::AND, DL, CondVT, Cond,
DAG.getNode(ISD::BITCAST, DL, CondVT, LHS));
return DAG.getNode(ISD::BITCAST, DL, VT, Ret);
}
}
if (N->getOpcode() == ISD::VSELECT && DCI.isBeforeLegalizeOps() &&
!DCI.isBeforeLegalize() &&
(TLI.isOperationLegalOrCustom(ISD::VSELECT, VT) && VT != MVT::v16i16 &&
VT != MVT::v8i16) &&
!ISD::isBuildVectorOfConstantSDNodes(Cond.getNode())) {
unsigned BitWidth = Cond.getValueType().getScalarType().getSizeInBits();
if (BitWidth == 1)
return SDValue();
assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size");
APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 1);
APInt KnownZero, KnownOne;
TargetLowering::TargetLoweringOpt TLO(DAG, DCI.isBeforeLegalize(),
DCI.isBeforeLegalizeOps());
if (TLO.ShrinkDemandedConstant(Cond, DemandedMask) ||
TLI.SimplifyDemandedBits(Cond, DemandedMask, KnownZero, KnownOne,
TLO)) {
if (Cond != TLO.Old) {
for (SDNode::use_iterator I = Cond->use_begin(), E = Cond->use_end();
I != E; ++I)
if (I->getOpcode() != ISD::VSELECT)
return SDValue();
for (SDNode::use_iterator I = Cond->use_begin(), E = Cond->use_end();
I != E; ++I)
DAG.ReplaceAllUsesOfValueWith(
SDValue(*I, 0),
DAG.getNode(X86ISD::SHRUNKBLEND, SDLoc(*I), I->getValueType(0),
Cond, I->getOperand(1), I->getOperand(2)));
DCI.CommitTargetLoweringOpt(TLO);
return SDValue();
}
DAG.ReplaceAllUsesOfValueWith(
SDValue(N, 0),
DAG.getNode(X86ISD::SHRUNKBLEND, SDLoc(N), N->getValueType(0),
TLO.New, N->getOperand(1), N->getOperand(2)));
return SDValue();
}
}
if ((N->getOpcode() == ISD::VSELECT ||
N->getOpcode() == X86ISD::SHRUNKBLEND) &&
!DCI.isBeforeLegalize()) {
SDValue Shuffle = transformVSELECTtoBlendVECTOR_SHUFFLE(N, DAG, Subtarget);
if (Shuffle.getNode())
return Shuffle;
}
return SDValue();
}
static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) {
if (Cmp.getOpcode() != X86ISD::CMP &&
(Cmp.getOpcode() != X86ISD::SUB || Cmp.getNode()->hasAnyUseOfValue(0)))
return SDValue();
if (CC != X86::COND_E && CC != X86::COND_NE)
return SDValue();
SDValue Op1 = Cmp.getOperand(0);
SDValue Op2 = Cmp.getOperand(1);
SDValue SetCC;
const ConstantSDNode* C = nullptr;
bool needOppositeCond = (CC == X86::COND_E);
bool checkAgainstTrue = false;
if ((C = dyn_cast<ConstantSDNode>(Op1)))
SetCC = Op2;
else if ((C = dyn_cast<ConstantSDNode>(Op2)))
SetCC = Op1;
else return SDValue();
if (C->getZExtValue() == 1) {
needOppositeCond = !needOppositeCond;
checkAgainstTrue = true;
} else if (C->getZExtValue() != 0)
return SDValue();
bool truncatedToBoolWithAnd = false;
while (SetCC.getOpcode() == ISD::ZERO_EXTEND ||
SetCC.getOpcode() == ISD::TRUNCATE ||
SetCC.getOpcode() == ISD::AND) {
if (SetCC.getOpcode() == ISD::AND) {
int OpIdx = -1;
ConstantSDNode *CS;
if ((CS = dyn_cast<ConstantSDNode>(SetCC.getOperand(0))) &&
CS->getZExtValue() == 1)
OpIdx = 1;
if ((CS = dyn_cast<ConstantSDNode>(SetCC.getOperand(1))) &&
CS->getZExtValue() == 1)
OpIdx = 0;
if (OpIdx == -1)
break;
SetCC = SetCC.getOperand(OpIdx);
truncatedToBoolWithAnd = true;
} else
SetCC = SetCC.getOperand(0);
}
switch (SetCC.getOpcode()) {
case X86ISD::SETCC_CARRY:
if (checkAgainstTrue && !truncatedToBoolWithAnd)
break;
assert(X86::CondCode(SetCC.getConstantOperandVal(0)) == X86::COND_B &&
"Invalid use of SETCC_CARRY!");
case X86ISD::SETCC:
CC = X86::CondCode(SetCC.getConstantOperandVal(0));
if (needOppositeCond)
CC = X86::GetOppositeBranchCondition(CC);
return SetCC.getOperand(1);
case X86ISD::CMOV: {
ConstantSDNode *FVal = dyn_cast<ConstantSDNode>(SetCC.getOperand(0));
ConstantSDNode *TVal = dyn_cast<ConstantSDNode>(SetCC.getOperand(1));
if (!TVal)
return SDValue();
if (!FVal) {
SDValue Op = SetCC.getOperand(0);
if (Op.getOpcode() == ISD::ZERO_EXTEND ||
Op.getOpcode() == ISD::TRUNCATE)
Op = Op.getOperand(0);
if ((Op.getOpcode() != X86ISD::RDRAND &&
Op.getOpcode() != X86ISD::RDSEED) || Op.getResNo() != 0)
return SDValue();
}
bool FValIsFalse = true;
if (FVal && FVal->getZExtValue() != 0) {
if (FVal->getZExtValue() != 1)
return SDValue();
needOppositeCond = !needOppositeCond;
FValIsFalse = false;
}
if (FValIsFalse && TVal->getZExtValue() != 1)
return SDValue();
if (!FValIsFalse && TVal->getZExtValue() != 0)
return SDValue();
CC = X86::CondCode(SetCC.getConstantOperandVal(2));
if (needOppositeCond)
CC = X86::GetOppositeBranchCondition(CC);
return SetCC.getOperand(3);
}
}
return SDValue();
}
static bool checkBoolTestAndOrSetCCCombine(SDValue Cond, X86::CondCode &CC0,
X86::CondCode &CC1, SDValue &Flags,
bool &isAnd) {
if (Cond->getOpcode() == X86ISD::CMP) {
ConstantSDNode *CondOp1C = dyn_cast<ConstantSDNode>(Cond->getOperand(1));
if (!CondOp1C || !CondOp1C->isNullValue())
return false;
Cond = Cond->getOperand(0);
}
isAnd = false;
SDValue SetCC0, SetCC1;
switch (Cond->getOpcode()) {
default: return false;
case ISD::AND:
case X86ISD::AND:
isAnd = true;
case ISD::OR:
case X86ISD::OR:
SetCC0 = Cond->getOperand(0);
SetCC1 = Cond->getOperand(1);
break;
};
if (SetCC0.getOpcode() != X86ISD::SETCC ||
SetCC1.getOpcode() != X86ISD::SETCC ||
SetCC0->getOperand(1) != SetCC1->getOperand(1))
return false;
CC0 = (X86::CondCode)SetCC0->getConstantOperandVal(0);
CC1 = (X86::CondCode)SetCC1->getConstantOperandVal(0);
Flags = SetCC0->getOperand(1);
return true;
}
static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
SDLoc DL(N);
if (N->getNumValues() == 2 && !SDValue(N, 1).use_empty())
return SDValue();
SDValue FalseOp = N->getOperand(0);
SDValue TrueOp = N->getOperand(1);
X86::CondCode CC = (X86::CondCode)N->getConstantOperandVal(2);
SDValue Cond = N->getOperand(3);
if (CC == X86::COND_E || CC == X86::COND_NE) {
switch (Cond.getOpcode()) {
default: break;
case X86ISD::BSR:
case X86ISD::BSF:
if (DAG.isKnownNeverZero(Cond.getOperand(0)))
return (CC == X86::COND_E) ? FalseOp : TrueOp;
}
}
SDValue Flags;
Flags = checkBoolTestSetCCCombine(Cond, CC);
if (Flags.getNode() &&
(FalseOp.getValueType() != MVT::f80 || hasFPCMov(CC))) {
SDValue Ops[] = { FalseOp, TrueOp,
DAG.getConstant(CC, MVT::i8), Flags };
return DAG.getNode(X86ISD::CMOV, DL, N->getVTList(), Ops);
}
if (ConstantSDNode *TrueC = dyn_cast<ConstantSDNode>(TrueOp)) {
if (ConstantSDNode *FalseC = dyn_cast<ConstantSDNode>(FalseOp)) {
if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue())) {
CC = X86::GetOppositeBranchCondition(CC);
std::swap(TrueC, FalseC);
std::swap(TrueOp, FalseOp);
}
if (FalseC->getAPIntValue() == 0 && TrueC->getAPIntValue().isPowerOf2()) {
Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
DAG.getConstant(CC, MVT::i8), Cond);
Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, TrueC->getValueType(0), Cond);
unsigned ShAmt = TrueC->getAPIntValue().logBase2();
Cond = DAG.getNode(ISD::SHL, DL, Cond.getValueType(), Cond,
DAG.getConstant(ShAmt, MVT::i8));
if (N->getNumValues() == 2) return DCI.CombineTo(N, Cond, SDValue());
return Cond;
}
if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) {
Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
DAG.getConstant(CC, MVT::i8), Cond);
Cond = DAG.getNode(ISD::ZERO_EXTEND, DL,
FalseC->getValueType(0), Cond);
Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
SDValue(FalseC, 0));
if (N->getNumValues() == 2) return DCI.CombineTo(N, Cond, SDValue());
return Cond;
}
if (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64) {
uint64_t Diff = TrueC->getZExtValue()-FalseC->getZExtValue();
if (N->getValueType(0) == MVT::i32) Diff = (unsigned)Diff;
bool isFastMultiplier = false;
if (Diff < 10) {
switch ((unsigned char)Diff) {
default: break;
case 1: case 2: case 3: case 4: case 5: case 8: case 9: isFastMultiplier = true;
break;
}
}
if (isFastMultiplier) {
APInt Diff = TrueC->getAPIntValue()-FalseC->getAPIntValue();
Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
DAG.getConstant(CC, MVT::i8), Cond);
Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0),
Cond);
if (Diff != 1)
Cond = DAG.getNode(ISD::MUL, DL, Cond.getValueType(), Cond,
DAG.getConstant(Diff, Cond.getValueType()));
if (FalseC->getAPIntValue() != 0)
Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
SDValue(FalseC, 0));
if (N->getNumValues() == 2) return DCI.CombineTo(N, Cond, SDValue());
return Cond;
}
}
}
}
if (!DCI.isBeforeLegalize() && !DCI.isBeforeLegalizeOps()) {
ConstantSDNode *CmpAgainst = nullptr;
if ((Cond.getOpcode() == X86ISD::CMP || Cond.getOpcode() == X86ISD::SUB) &&
(CmpAgainst = dyn_cast<ConstantSDNode>(Cond.getOperand(1))) &&
!isa<ConstantSDNode>(Cond.getOperand(0))) {
if (CC == X86::COND_NE &&
CmpAgainst == dyn_cast<ConstantSDNode>(FalseOp)) {
CC = X86::GetOppositeBranchCondition(CC);
std::swap(TrueOp, FalseOp);
}
if (CC == X86::COND_E &&
CmpAgainst == dyn_cast<ConstantSDNode>(TrueOp)) {
SDValue Ops[] = { FalseOp, Cond.getOperand(0),
DAG.getConstant(CC, MVT::i8), Cond };
return DAG.getNode(X86ISD::CMOV, DL, N->getVTList (), Ops);
}
}
}
if (CC == X86::COND_NE) {
SDValue Flags;
X86::CondCode CC0, CC1;
bool isAndSetCC;
if (checkBoolTestAndOrSetCCCombine(Cond, CC0, CC1, Flags, isAndSetCC)) {
if (isAndSetCC) {
std::swap(FalseOp, TrueOp);
CC0 = X86::GetOppositeBranchCondition(CC0);
CC1 = X86::GetOppositeBranchCondition(CC1);
}
SDValue LOps[] = {FalseOp, TrueOp, DAG.getConstant(CC0, MVT::i8),
Flags};
SDValue LCMOV = DAG.getNode(X86ISD::CMOV, DL, N->getVTList(), LOps);
SDValue Ops[] = {LCMOV, TrueOp, DAG.getConstant(CC1, MVT::i8), Flags};
SDValue CMOV = DAG.getNode(X86ISD::CMOV, DL, N->getVTList(), Ops);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), SDValue(CMOV.getNode(), 1));
return CMOV;
}
}
return SDValue();
}
static SDValue PerformINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
switch (IntNo) {
default: return SDValue();
case Intrinsic::x86_avx2_pblendvb:
case Intrinsic::x86_avx2_pblendw:
case Intrinsic::x86_avx2_pblendd_128:
case Intrinsic::x86_avx2_pblendd_256:
if (!Subtarget->hasAVX2())
return SDValue();
case Intrinsic::x86_avx_blend_pd_256:
case Intrinsic::x86_avx_blend_ps_256:
case Intrinsic::x86_avx_blendv_pd_256:
case Intrinsic::x86_avx_blendv_ps_256:
if (!Subtarget->hasAVX())
return SDValue();
case Intrinsic::x86_sse41_pblendw:
case Intrinsic::x86_sse41_blendpd:
case Intrinsic::x86_sse41_blendps:
case Intrinsic::x86_sse41_blendvps:
case Intrinsic::x86_sse41_blendvpd:
case Intrinsic::x86_sse41_pblendvb: {
SDValue Op0 = N->getOperand(1);
SDValue Op1 = N->getOperand(2);
SDValue Mask = N->getOperand(3);
if (!Subtarget->hasSSE41())
return SDValue();
if (Op0 == Op1)
return Op0;
if (ISD::isBuildVectorAllZeros(Mask.getNode()))
return Op0;
if (ISD::isBuildVectorAllOnes(Mask.getNode()))
return Op1;
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Mask)) {
if (C->isNullValue())
return Op0;
if (C->isAllOnesValue())
return Op1;
}
return SDValue();
}
case Intrinsic::x86_sse2_psrai_w:
case Intrinsic::x86_sse2_psrai_d:
case Intrinsic::x86_avx2_psrai_w:
case Intrinsic::x86_avx2_psrai_d:
case Intrinsic::x86_sse2_psra_w:
case Intrinsic::x86_sse2_psra_d:
case Intrinsic::x86_avx2_psra_w:
case Intrinsic::x86_avx2_psra_d: {
SDValue Op0 = N->getOperand(1);
SDValue Op1 = N->getOperand(2);
EVT VT = Op0.getValueType();
assert(VT.isVector() && "Expected a vector type!");
if (isa<BuildVectorSDNode>(Op1))
Op1 = Op1.getOperand(0);
if (!isa<ConstantSDNode>(Op1))
return SDValue();
EVT SVT = VT.getVectorElementType();
unsigned SVTBits = SVT.getSizeInBits();
ConstantSDNode *CND = cast<ConstantSDNode>(Op1);
const APInt &C = APInt(SVTBits, CND->getAPIntValue().getZExtValue());
uint64_t ShAmt = C.getZExtValue();
if (ShAmt >= SVTBits)
return SDValue();
if (ShAmt == 0)
return Op0;
SDValue Splat = DAG.getConstant(C, VT);
return DAG.getNode(ISD::SRA, SDLoc(N), VT, Op0, Splat);
}
}
}
static SDValue PerformMulCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
return SDValue();
EVT VT = N->getValueType(0);
if (VT != MVT::i64 && VT != MVT::i32)
return SDValue();
ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
if (!C)
return SDValue();
uint64_t MulAmt = C->getZExtValue();
if (isPowerOf2_64(MulAmt) || MulAmt == 3 || MulAmt == 5 || MulAmt == 9)
return SDValue();
uint64_t MulAmt1 = 0;
uint64_t MulAmt2 = 0;
if ((MulAmt % 9) == 0) {
MulAmt1 = 9;
MulAmt2 = MulAmt / 9;
} else if ((MulAmt % 5) == 0) {
MulAmt1 = 5;
MulAmt2 = MulAmt / 5;
} else if ((MulAmt % 3) == 0) {
MulAmt1 = 3;
MulAmt2 = MulAmt / 3;
}
if (MulAmt2 &&
(isPowerOf2_64(MulAmt2) || MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9)){
SDLoc DL(N);
if (isPowerOf2_64(MulAmt2) &&
!(N->hasOneUse() && N->use_begin()->getOpcode() == ISD::ADD))
std::swap(MulAmt1, MulAmt2);
SDValue NewMul;
if (isPowerOf2_64(MulAmt1))
NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
DAG.getConstant(Log2_64(MulAmt1), MVT::i8));
else
NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0),
DAG.getConstant(MulAmt1, VT));
if (isPowerOf2_64(MulAmt2))
NewMul = DAG.getNode(ISD::SHL, DL, VT, NewMul,
DAG.getConstant(Log2_64(MulAmt2), MVT::i8));
else
NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, NewMul,
DAG.getConstant(MulAmt2, VT));
DCI.CombineTo(N, NewMul, false);
}
return SDValue();
}
static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N0.getValueType();
if (VT.isInteger() && !VT.isVector() &&
N1C && N0.getOpcode() == ISD::AND &&
N0.getOperand(1).getOpcode() == ISD::Constant) {
SDValue N00 = N0.getOperand(0);
if (N00.getOpcode() == X86ISD::SETCC_CARRY ||
((N00.getOpcode() == ISD::ANY_EXTEND ||
N00.getOpcode() == ISD::ZERO_EXTEND) &&
N00.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY)) {
APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
APInt ShAmt = N1C->getAPIntValue();
Mask = Mask.shl(ShAmt);
if (Mask != 0)
return DAG.getNode(ISD::AND, SDLoc(N), VT,
N00, DAG.getConstant(Mask, VT));
}
}
if (auto *N1BV = dyn_cast<BuildVectorSDNode>(N1))
if (auto *N1SplatC = N1BV->getConstantSplatNode()) {
assert(N0.getValueType().isVector() && "Invalid vector shift type");
if (N1SplatC->getZExtValue() == 1)
return DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N0);
}
return SDValue();
}
static SDValue performShiftToAllZeros(SDNode *N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
EVT VT = N->getValueType(0);
if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16 &&
(!Subtarget->hasInt256() ||
(VT != MVT::v4i64 && VT != MVT::v8i32 && VT != MVT::v16i16)))
return SDValue();
SDValue Amt = N->getOperand(1);
SDLoc DL(N);
if (auto *AmtBV = dyn_cast<BuildVectorSDNode>(Amt))
if (auto *AmtSplat = AmtBV->getConstantSplatNode()) {
APInt ShiftAmt = AmtSplat->getAPIntValue();
unsigned MaxAmount = VT.getVectorElementType().getSizeInBits();
if (ShiftAmt.trunc(8).uge(MaxAmount))
return getZeroVector(VT, Subtarget, DAG, DL);
}
return SDValue();
}
static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
if (N->getOpcode() == ISD::SHL) {
SDValue V = PerformSHLCombine(N, DAG);
if (V.getNode()) return V;
}
if (N->getOpcode() != ISD::SRA) {
SDValue V = performShiftToAllZeros(N, DAG, Subtarget);
if (V.getNode()) return V;
}
return SDValue();
}
static SDValue CMPEQCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
unsigned opcode;
if (Subtarget->hasSSE2() && isAndOrOfSetCCs(SDValue(N, 0U), opcode)) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue CMP0 = N0->getOperand(1);
SDValue CMP1 = N1->getOperand(1);
SDLoc DL(N);
if (CMP0.getOpcode() != X86ISD::CMP || CMP0 != CMP1)
return SDValue();
SDValue CMP00 = CMP0->getOperand(0);
SDValue CMP01 = CMP0->getOperand(1);
EVT VT = CMP00.getValueType();
if (VT == MVT::f32 || VT == MVT::f64) {
bool ExpectingFlags = false;
for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
!ExpectingFlags && UI != UE; ++UI)
switch (UI->getOpcode()) {
default:
case ISD::BR_CC:
case ISD::BRCOND:
case ISD::SELECT:
ExpectingFlags = true;
break;
case ISD::CopyToReg:
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND:
break;
}
if (!ExpectingFlags) {
enum X86::CondCode cc0 = (enum X86::CondCode)N0.getConstantOperandVal(0);
enum X86::CondCode cc1 = (enum X86::CondCode)N1.getConstantOperandVal(0);
if (cc1 == X86::COND_E || cc1 == X86::COND_NE) {
X86::CondCode tmp = cc0;
cc0 = cc1;
cc1 = tmp;
}
if ((cc0 == X86::COND_E && cc1 == X86::COND_NP) ||
(cc0 == X86::COND_NE && cc1 == X86::COND_P)) {
unsigned x86cc = (cc0 == X86::COND_E) ? 0 : 4;
if (Subtarget->hasAVX512()) {
SDValue FSetCC = DAG.getNode(X86ISD::FSETCC, DL, MVT::i1, CMP00,
CMP01, DAG.getConstant(x86cc, MVT::i8));
if (N->getValueType(0) != MVT::i1)
return DAG.getNode(ISD::ZERO_EXTEND, DL, N->getValueType(0),
FSetCC);
return FSetCC;
}
SDValue OnesOrZeroesF = DAG.getNode(X86ISD::FSETCC, DL,
CMP00.getValueType(), CMP00, CMP01,
DAG.getConstant(x86cc, MVT::i8));
bool is64BitFP = (CMP00.getValueType() == MVT::f64);
MVT IntVT = is64BitFP ? MVT::i64 : MVT::i32;
if (is64BitFP && !Subtarget->is64Bit()) {
SDValue Vector64 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f64,
OnesOrZeroesF);
SDValue Vector32 = DAG.getNode(ISD::BITCAST, DL, MVT::v4f32,
Vector64);
OnesOrZeroesF = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32,
Vector32, DAG.getIntPtrConstant(0));
IntVT = MVT::i32;
}
SDValue OnesOrZeroesI = DAG.getNode(ISD::BITCAST, DL, IntVT, OnesOrZeroesF);
SDValue ANDed = DAG.getNode(ISD::AND, DL, IntVT, OnesOrZeroesI,
DAG.getConstant(1, IntVT));
SDValue OneBitOfTruth = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ANDed);
return OneBitOfTruth;
}
}
}
}
return SDValue();
}
static bool CanFoldXORWithAllOnes(const SDNode *N) {
EVT VT = N->getValueType(0);
if (ISD::isBuildVectorAllOnes(N))
return true;
if (N->getOpcode() == ISD::BITCAST)
N = N->getOperand(0).getNode();
if (VT.is256BitVector() &&
N->getOpcode() == ISD::INSERT_SUBVECTOR) {
SDValue V1 = N->getOperand(0);
SDValue V2 = N->getOperand(1);
if (V1.getOpcode() == ISD::INSERT_SUBVECTOR &&
V1.getOperand(0).getOpcode() == ISD::UNDEF &&
ISD::isBuildVectorAllOnes(V1.getOperand(1).getNode()) &&
ISD::isBuildVectorAllOnes(V2.getNode()))
return true;
}
return false;
}
static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
EVT VT = N->getValueType(0);
if (!VT.is256BitVector())
return SDValue();
assert((N->getOpcode() == ISD::ANY_EXTEND ||
N->getOpcode() == ISD::ZERO_EXTEND ||
N->getOpcode() == ISD::SIGN_EXTEND) && "Invalid Node");
SDValue Narrow = N->getOperand(0);
EVT NarrowVT = Narrow->getValueType(0);
if (!NarrowVT.is128BitVector())
return SDValue();
if (Narrow->getOpcode() != ISD::XOR &&
Narrow->getOpcode() != ISD::AND &&
Narrow->getOpcode() != ISD::OR)
return SDValue();
SDValue N0 = Narrow->getOperand(0);
SDValue N1 = Narrow->getOperand(1);
SDLoc DL(Narrow);
if (N0.getOpcode() != ISD::TRUNCATE)
return SDValue();
EVT WideVT = N0->getOperand(0)->getValueType(0);
if (WideVT != VT)
return SDValue();
bool RHSTrunc = N1.getOpcode() == ISD::TRUNCATE;
ConstantSDNode *RHSConstSplat = nullptr;
if (auto *RHSBV = dyn_cast<BuildVectorSDNode>(N1))
RHSConstSplat = RHSBV->getConstantSplatNode();
if (!RHSTrunc && !RHSConstSplat)
return SDValue();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!TLI.isOperationLegalOrPromote(Narrow->getOpcode(), WideVT))
return SDValue();
N0 = N0->getOperand(0);
if (RHSConstSplat) {
N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT.getScalarType(),
SDValue(RHSConstSplat, 0));
SmallVector<SDValue, 8> C(WideVT.getVectorNumElements(), N1);
N1 = DAG.getNode(ISD::BUILD_VECTOR, DL, WideVT, C);
} else if (RHSTrunc) {
N1 = N1->getOperand(0);
}
SDValue Op = DAG.getNode(Narrow->getOpcode(), DL, WideVT, N0, N1);
unsigned Opcode = N->getOpcode();
switch (Opcode) {
case ISD::ANY_EXTEND:
return Op;
case ISD::ZERO_EXTEND: {
unsigned InBits = NarrowVT.getScalarType().getSizeInBits();
APInt Mask = APInt::getAllOnesValue(InBits);
Mask = Mask.zext(VT.getScalarType().getSizeInBits());
return DAG.getNode(ISD::AND, DL, VT,
Op, DAG.getConstant(Mask, VT));
}
case ISD::SIGN_EXTEND:
return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT,
Op, DAG.getValueType(NarrowVT));
default:
llvm_unreachable("Unexpected opcode");
}
}
static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
EVT VT = N->getValueType(0);
if (DCI.isBeforeLegalizeOps())
return SDValue();
SDValue R = CMPEQCombine(N, DAG, DCI, Subtarget);
if (R.getNode())
return R;
if (VT == MVT::i32 || VT == MVT::i64) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDLoc DL(N);
if ((Subtarget->hasBMI() || Subtarget->hasTBM()) &&
(N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::SRL)) {
ConstantSDNode *MaskNode = dyn_cast<ConstantSDNode>(N1);
ConstantSDNode *ShiftNode = dyn_cast<ConstantSDNode>(N0.getOperand(1));
if (MaskNode && ShiftNode) {
uint64_t Mask = MaskNode->getZExtValue();
uint64_t Shift = ShiftNode->getZExtValue();
if (isMask_64(Mask)) {
uint64_t MaskSize = CountPopulation_64(Mask);
if (Shift + MaskSize <= VT.getSizeInBits())
return DAG.getNode(X86ISD::BEXTR, DL, VT, N0.getOperand(0),
DAG.getConstant(Shift | (MaskSize << 8), VT));
}
}
}
return SDValue();
}
if (VT != MVT::v2i64 && VT != MVT::v4i64)
return SDValue();
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDLoc DL(N);
if (N0.getOpcode() == ISD::XOR &&
CanFoldXORWithAllOnes(N0.getOperand(1).getNode()))
return DAG.getNode(X86ISD::ANDNP, DL, VT, N0.getOperand(0), N1);
if (N1.getOpcode() == ISD::XOR &&
CanFoldXORWithAllOnes(N1.getOperand(1).getNode()))
return DAG.getNode(X86ISD::ANDNP, DL, VT, N1.getOperand(0), N0);
return SDValue();
}
static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
if (DCI.isBeforeLegalizeOps())
return SDValue();
SDValue R = CMPEQCombine(N, DAG, DCI, Subtarget);
if (R.getNode())
return R;
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
if (VT == MVT::v2i64 || VT == MVT::v4i64) {
if (!Subtarget->hasSSSE3() ||
(VT == MVT::v4i64 && !Subtarget->hasInt256()))
return SDValue();
if (N0.getOpcode() == X86ISD::ANDNP)
std::swap(N0, N1);
if (N0.getOpcode() == ISD::AND && N1.getOpcode() == X86ISD::ANDNP) {
SDValue Mask = N1.getOperand(0);
SDValue X = N1.getOperand(1);
SDValue Y;
if (N0.getOperand(0) == Mask)
Y = N0.getOperand(1);
if (N0.getOperand(1) == Mask)
Y = N0.getOperand(0);
if (!Y.getNode())
return SDValue();
if (Mask.getOpcode() == ISD::BITCAST)
Mask = Mask.getOperand(0);
if (X.getOpcode() == ISD::BITCAST)
X = X.getOperand(0);
if (Y.getOpcode() == ISD::BITCAST)
Y = Y.getOperand(0);
EVT MaskVT = Mask.getValueType();
unsigned EltBits = MaskVT.getVectorElementType().getSizeInBits();
unsigned SraAmt = ~0;
if (Mask.getOpcode() == ISD::SRA) {
if (auto *AmtBV = dyn_cast<BuildVectorSDNode>(Mask.getOperand(1)))
if (auto *AmtConst = AmtBV->getConstantSplatNode())
SraAmt = AmtConst->getZExtValue();
} else if (Mask.getOpcode() == X86ISD::VSRAI) {
SDValue SraC = Mask.getOperand(1);
SraAmt = cast<ConstantSDNode>(SraC)->getZExtValue();
}
if ((SraAmt + 1) != EltBits)
return SDValue();
SDLoc DL(N);
if (Y.getOpcode() == ISD::SUB && Y.getOperand(1) == X &&
ISD::isBuildVectorAllZeros(Y.getOperand(0).getNode()) &&
X.getValueType() == MaskVT && Y.getValueType() == MaskVT) {
assert((EltBits == 8 || EltBits == 16 || EltBits == 32) &&
"Unsupported VT for PSIGN");
Mask = DAG.getNode(X86ISD::PSIGN, DL, MaskVT, X, Mask.getOperand(0));
return DAG.getNode(ISD::BITCAST, DL, VT, Mask);
}
if (!Subtarget->hasSSE41())
return SDValue();
EVT BlendVT = (VT == MVT::v4i64) ? MVT::v32i8 : MVT::v16i8;
X = DAG.getNode(ISD::BITCAST, DL, BlendVT, X);
Y = DAG.getNode(ISD::BITCAST, DL, BlendVT, Y);
Mask = DAG.getNode(ISD::BITCAST, DL, BlendVT, Mask);
Mask = DAG.getNode(ISD::VSELECT, DL, BlendVT, Mask, Y, X);
return DAG.getNode(ISD::BITCAST, DL, VT, Mask);
}
}
if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
return SDValue();
MachineFunction &MF = DAG.getMachineFunction();
bool OptForSize =
MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
if (!OptForSize && Subtarget->isSHLDSlow())
return SDValue();
if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
std::swap(N0, N1);
if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
return SDValue();
if (!N0.hasOneUse() || !N1.hasOneUse())
return SDValue();
SDValue ShAmt0 = N0.getOperand(1);
if (ShAmt0.getValueType() != MVT::i8)
return SDValue();
SDValue ShAmt1 = N1.getOperand(1);
if (ShAmt1.getValueType() != MVT::i8)
return SDValue();
if (ShAmt0.getOpcode() == ISD::TRUNCATE)
ShAmt0 = ShAmt0.getOperand(0);
if (ShAmt1.getOpcode() == ISD::TRUNCATE)
ShAmt1 = ShAmt1.getOperand(0);
SDLoc DL(N);
unsigned Opc = X86ISD::SHLD;
SDValue Op0 = N0.getOperand(0);
SDValue Op1 = N1.getOperand(0);
if (ShAmt0.getOpcode() == ISD::SUB) {
Opc = X86ISD::SHRD;
std::swap(Op0, Op1);
std::swap(ShAmt0, ShAmt1);
}
unsigned Bits = VT.getSizeInBits();
if (ShAmt1.getOpcode() == ISD::SUB) {
SDValue Sum = ShAmt1.getOperand(0);
if (ConstantSDNode *SumC = dyn_cast<ConstantSDNode>(Sum)) {
SDValue ShAmt1Op1 = ShAmt1.getOperand(1);
if (ShAmt1Op1.getNode()->getOpcode() == ISD::TRUNCATE)
ShAmt1Op1 = ShAmt1Op1.getOperand(0);
if (SumC->getSExtValue() == Bits && ShAmt1Op1 == ShAmt0)
return DAG.getNode(Opc, DL, VT,
Op0, Op1,
DAG.getNode(ISD::TRUNCATE, DL,
MVT::i8, ShAmt0));
}
} else if (ConstantSDNode *ShAmt1C = dyn_cast<ConstantSDNode>(ShAmt1)) {
ConstantSDNode *ShAmt0C = dyn_cast<ConstantSDNode>(ShAmt0);
if (ShAmt0C &&
ShAmt0C->getSExtValue() + ShAmt1C->getSExtValue() == Bits)
return DAG.getNode(Opc, DL, VT,
N0.getOperand(0), N1.getOperand(0),
DAG.getNode(ISD::TRUNCATE, DL,
MVT::i8, ShAmt0));
}
return SDValue();
}
static SDValue performIntegerAbsCombine(SDNode *N, SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
if (VT.isInteger() && VT.getSizeInBits() == 8)
return SDValue();
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDLoc DL(N);
if (VT.isInteger() && N->getOpcode() == ISD::XOR &&
N0.getOpcode() == ISD::ADD &&
N0.getOperand(1) == N1 &&
N1.getOpcode() == ISD::SRA &&
N1.getOperand(0) == N0.getOperand(0))
if (ConstantSDNode *Y1C = dyn_cast<ConstantSDNode>(N1.getOperand(1)))
if (Y1C->getAPIntValue() == VT.getSizeInBits()-1) {
SDValue Neg = DAG.getNode(X86ISD::SUB, DL, DAG.getVTList(VT, MVT::i32),
DAG.getConstant(0, VT), N0.getOperand(0));
SDValue Ops[] = { N0.getOperand(0), Neg,
DAG.getConstant(X86::COND_GE, MVT::i8),
SDValue(Neg.getNode(), 1) };
return DAG.getNode(X86ISD::CMOV, DL, DAG.getVTList(VT, MVT::Glue), Ops);
}
return SDValue();
}
static SDValue PerformXorCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
if (DCI.isBeforeLegalizeOps())
return SDValue();
if (Subtarget->hasCMov()) {
SDValue RV = performIntegerAbsCombine(N, DAG);
if (RV.getNode())
return RV;
}
return SDValue();
}
static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
LoadSDNode *Ld = cast<LoadSDNode>(N);
EVT RegVT = Ld->getValueType(0);
EVT MemVT = Ld->getMemoryVT();
SDLoc dl(Ld);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
ISD::LoadExtType Ext = Ld->getExtensionType();
unsigned Alignment = Ld->getAlignment();
bool IsAligned = Alignment == 0 || Alignment >= MemVT.getSizeInBits()/8;
if (RegVT.is256BitVector() && Subtarget->isUnalignedMem32Slow() &&
!DCI.isBeforeLegalizeOps() && !IsAligned && Ext == ISD::NON_EXTLOAD) {
unsigned NumElems = RegVT.getVectorNumElements();
if (NumElems < 2)
return SDValue();
SDValue Ptr = Ld->getBasePtr();
SDValue Increment = DAG.getConstant(16, TLI.getPointerTy());
EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
NumElems/2);
SDValue Load1 = DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr,
Ld->getPointerInfo(), Ld->isVolatile(),
Ld->isNonTemporal(), Ld->isInvariant(),
Alignment);
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
SDValue Load2 = DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr,
Ld->getPointerInfo(), Ld->isVolatile(),
Ld->isNonTemporal(), Ld->isInvariant(),
std::min(16U, Alignment));
SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
Load1.getValue(1),
Load2.getValue(1));
SDValue NewVec = DAG.getUNDEF(RegVT);
NewVec = Insert128BitVector(NewVec, Load1, 0, DAG, dl);
NewVec = Insert128BitVector(NewVec, Load2, NumElems/2, DAG, dl);
return DCI.CombineTo(N, NewVec, TF, true);
}
return SDValue();
}
static SDValue PerformMLOADCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
MaskedLoadSDNode *Mld = cast<MaskedLoadSDNode>(N);
if (Mld->getExtensionType() != ISD::SEXTLOAD)
return SDValue();
EVT VT = Mld->getValueType(0);
unsigned NumElems = VT.getVectorNumElements();
EVT LdVT = Mld->getMemoryVT();
SDLoc dl(Mld);
assert(LdVT != VT && "Cannot extend to the same type");
unsigned ToSz = VT.getVectorElementType().getSizeInBits();
unsigned FromSz = LdVT.getVectorElementType().getSizeInBits();
assert (isPowerOf2_32(NumElems * FromSz * ToSz) &&
"Unexpected size for extending masked load");
unsigned SizeRatio = ToSz / FromSz;
assert(SizeRatio * NumElems * FromSz == VT.getSizeInBits());
EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(),
LdVT.getScalarType(), NumElems*SizeRatio);
assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
SDValue WideSrc0 = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Mld->getSrc0());
if (Mld->getSrc0().getOpcode() != ISD::UNDEF) {
SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
for (unsigned i = 0; i != NumElems; ++i)
ShuffleVec[i] = i * SizeRatio;
assert (DAG.getTargetLoweringInfo().isTypeLegal(WideVecVT)
&& "WideVecVT should be legal");
WideSrc0 = DAG.getVectorShuffle(WideVecVT, dl, WideSrc0,
DAG.getUNDEF(WideVecVT), &ShuffleVec[0]);
}
SDValue NewMask;
SDValue Mask = Mld->getMask();
if (Mask.getValueType() == VT) {
NewMask = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Mask);
SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
for (unsigned i = 0; i != NumElems; ++i)
ShuffleVec[i] = i * SizeRatio;
for (unsigned i = NumElems; i != NumElems*SizeRatio; ++i)
ShuffleVec[i] = NumElems*SizeRatio;
NewMask = DAG.getVectorShuffle(WideVecVT, dl, NewMask,
DAG.getConstant(0, WideVecVT),
&ShuffleVec[0]);
}
else {
assert(Mask.getValueType().getVectorElementType() == MVT::i1);
unsigned WidenNumElts = NumElems*SizeRatio;
unsigned MaskNumElts = VT.getVectorNumElements();
EVT NewMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
WidenNumElts);
unsigned NumConcat = WidenNumElts / MaskNumElts;
SmallVector<SDValue, 16> Ops(NumConcat);
SDValue ZeroVal = DAG.getConstant(0, Mask.getValueType());
Ops[0] = Mask;
for (unsigned i = 1; i != NumConcat; ++i)
Ops[i] = ZeroVal;
NewMask = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewMaskVT, Ops);
}
SDValue WideLd = DAG.getMaskedLoad(WideVecVT, dl, Mld->getChain(),
Mld->getBasePtr(), NewMask, WideSrc0,
Mld->getMemoryVT(), Mld->getMemOperand(),
ISD::NON_EXTLOAD);
SDValue NewVec = DAG.getNode(X86ISD::VSEXT, dl, VT, WideLd);
return DCI.CombineTo(N, NewVec, WideLd.getValue(1), true);
}
static SDValue PerformMSTORECombine(SDNode *N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
MaskedStoreSDNode *Mst = cast<MaskedStoreSDNode>(N);
if (!Mst->isTruncatingStore())
return SDValue();
EVT VT = Mst->getValue().getValueType();
unsigned NumElems = VT.getVectorNumElements();
EVT StVT = Mst->getMemoryVT();
SDLoc dl(Mst);
assert(StVT != VT && "Cannot truncate to the same type");
unsigned FromSz = VT.getVectorElementType().getSizeInBits();
unsigned ToSz = StVT.getVectorElementType().getSizeInBits();
assert (isPowerOf2_32(NumElems * FromSz * ToSz) &&
"Unexpected size for truncating masked store");
assert (((NumElems * FromSz) % ToSz) == 0 &&
"Unexpected ratio for truncating masked store");
unsigned SizeRatio = FromSz / ToSz;
assert(SizeRatio * NumElems * ToSz == VT.getSizeInBits());
EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(),
StVT.getScalarType(), NumElems*SizeRatio);
assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
SDValue WideVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Mst->getValue());
SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
for (unsigned i = 0; i != NumElems; ++i)
ShuffleVec[i] = i * SizeRatio;
assert (DAG.getTargetLoweringInfo().isTypeLegal(WideVecVT)
&& "WideVecVT should be legal");
SDValue TruncatedVal = DAG.getVectorShuffle(WideVecVT, dl, WideVec,
DAG.getUNDEF(WideVecVT),
&ShuffleVec[0]);
SDValue NewMask;
SDValue Mask = Mst->getMask();
if (Mask.getValueType() == VT) {
NewMask = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Mask);
for (unsigned i = 0; i != NumElems; ++i)
ShuffleVec[i] = i * SizeRatio;
for (unsigned i = NumElems; i != NumElems*SizeRatio; ++i)
ShuffleVec[i] = NumElems*SizeRatio;
NewMask = DAG.getVectorShuffle(WideVecVT, dl, NewMask,
DAG.getConstant(0, WideVecVT),
&ShuffleVec[0]);
}
else {
assert(Mask.getValueType().getVectorElementType() == MVT::i1);
unsigned WidenNumElts = NumElems*SizeRatio;
unsigned MaskNumElts = VT.getVectorNumElements();
EVT NewMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
WidenNumElts);
unsigned NumConcat = WidenNumElts / MaskNumElts;
SmallVector<SDValue, 16> Ops(NumConcat);
SDValue ZeroVal = DAG.getConstant(0, Mask.getValueType());
Ops[0] = Mask;
for (unsigned i = 1; i != NumConcat; ++i)
Ops[i] = ZeroVal;
NewMask = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewMaskVT, Ops);
}
return DAG.getMaskedStore(Mst->getChain(), dl, TruncatedVal, Mst->getBasePtr(),
NewMask, StVT, Mst->getMemOperand(), false);
}
static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
StoreSDNode *St = cast<StoreSDNode>(N);
EVT VT = St->getValue().getValueType();
EVT StVT = St->getMemoryVT();
SDLoc dl(St);
SDValue StoredVal = St->getOperand(1);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
unsigned Alignment = St->getAlignment();
bool IsAligned = Alignment == 0 || Alignment >= VT.getSizeInBits()/8;
if (VT.is256BitVector() && Subtarget->isUnalignedMem32Slow() &&
StVT == VT && !IsAligned) {
unsigned NumElems = VT.getVectorNumElements();
if (NumElems < 2)
return SDValue();
SDValue Value0 = Extract128BitVector(StoredVal, 0, DAG, dl);
SDValue Value1 = Extract128BitVector(StoredVal, NumElems/2, DAG, dl);
SDValue Stride = DAG.getConstant(16, TLI.getPointerTy());
SDValue Ptr0 = St->getBasePtr();
SDValue Ptr1 = DAG.getNode(ISD::ADD, dl, Ptr0.getValueType(), Ptr0, Stride);
SDValue Ch0 = DAG.getStore(St->getChain(), dl, Value0, Ptr0,
St->getPointerInfo(), St->isVolatile(),
St->isNonTemporal(), Alignment);
SDValue Ch1 = DAG.getStore(St->getChain(), dl, Value1, Ptr1,
St->getPointerInfo(), St->isVolatile(),
St->isNonTemporal(),
std::min(16U, Alignment));
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ch0, Ch1);
}
if (St->isTruncatingStore() && VT.isVector()) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
unsigned NumElems = VT.getVectorNumElements();
assert(StVT != VT && "Cannot truncate to the same type");
unsigned FromSz = VT.getVectorElementType().getSizeInBits();
unsigned ToSz = StVT.getVectorElementType().getSizeInBits();
if (!isPowerOf2_32(NumElems * FromSz * ToSz)) return SDValue();
if (0 != (NumElems * FromSz) % ToSz) return SDValue();
unsigned SizeRatio = FromSz / ToSz;
assert(SizeRatio * NumElems * ToSz == VT.getSizeInBits());
EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(),
StVT.getScalarType(), NumElems*SizeRatio);
assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
SDValue WideVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT, St->getValue());
SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
for (unsigned i = 0; i != NumElems; ++i)
ShuffleVec[i] = i * SizeRatio;
if (!TLI.isTypeLegal(WideVecVT))
return SDValue();
SDValue Shuff = DAG.getVectorShuffle(WideVecVT, dl, WideVec,
DAG.getUNDEF(WideVecVT),
&ShuffleVec[0]);
MVT StoreType = MVT::i8;
for (MVT Tp : MVT::integer_valuetypes()) {
if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToSz)
StoreType = Tp;
}
if (TLI.isTypeLegal(MVT::f64) && StoreType.getSizeInBits() < 64 &&
(64 <= NumElems * ToSz))
StoreType = MVT::f64;
EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(),
StoreType, VT.getSizeInBits()/StoreType.getSizeInBits());
assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
SDValue ShuffWide = DAG.getNode(ISD::BITCAST, dl, StoreVecVT, Shuff);
SmallVector<SDValue, 8> Chains;
SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8,
TLI.getPointerTy());
SDValue Ptr = St->getBasePtr();
for (unsigned i=0, e=(ToSz*NumElems)/StoreType.getSizeInBits(); i!=e; ++i) {
SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
StoreType, ShuffWide,
DAG.getIntPtrConstant(i));
SDValue Ch = DAG.getStore(St->getChain(), dl, SubVec, Ptr,
St->getPointerInfo(), St->isVolatile(),
St->isNonTemporal(), St->getAlignment());
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
Chains.push_back(Ch);
}
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
}
if (VT.getSizeInBits() != 64)
return SDValue();
const Function *F = DAG.getMachineFunction().getFunction();
bool NoImplicitFloatOps = F->hasFnAttribute(Attribute::NoImplicitFloat);
bool F64IsLegal = !DAG.getTarget().Options.UseSoftFloat && !NoImplicitFloatOps
&& Subtarget->hasSSE2();
if ((VT.isVector() ||
(VT == MVT::i64 && F64IsLegal && !Subtarget->is64Bit())) &&
isa<LoadSDNode>(St->getValue()) &&
!cast<LoadSDNode>(St->getValue())->isVolatile() &&
St->getChain().hasOneUse() && !St->isVolatile()) {
SDNode* LdVal = St->getValue().getNode();
LoadSDNode *Ld = nullptr;
int TokenFactorIndex = -1;
SmallVector<SDValue, 8> Ops;
SDNode* ChainVal = St->getChain().getNode();
if (ChainVal == LdVal)
Ld = cast<LoadSDNode>(St->getChain());
else if (St->getValue().hasOneUse() &&
ChainVal->getOpcode() == ISD::TokenFactor) {
for (unsigned i = 0, e = ChainVal->getNumOperands(); i != e; ++i) {
if (ChainVal->getOperand(i).getNode() == LdVal) {
TokenFactorIndex = i;
Ld = cast<LoadSDNode>(St->getValue());
} else
Ops.push_back(ChainVal->getOperand(i));
}
}
if (!Ld || !ISD::isNormalLoad(Ld))
return SDValue();
if (!VT.isVector() && !Ld->hasNUsesOfValue(1, 0))
return SDValue();
SDLoc LdDL(Ld);
SDLoc StDL(N);
if (Subtarget->is64Bit() || F64IsLegal) {
EVT LdVT = Subtarget->is64Bit() ? MVT::i64 : MVT::f64;
SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), Ld->getBasePtr(),
Ld->getPointerInfo(), Ld->isVolatile(),
Ld->isNonTemporal(), Ld->isInvariant(),
Ld->getAlignment());
SDValue NewChain = NewLd.getValue(1);
if (TokenFactorIndex != -1) {
Ops.push_back(NewChain);
NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, Ops);
}
return DAG.getStore(NewChain, StDL, NewLd, St->getBasePtr(),
St->getPointerInfo(),
St->isVolatile(), St->isNonTemporal(),
St->getAlignment());
}
SDValue LoAddr = Ld->getBasePtr();
SDValue HiAddr = DAG.getNode(ISD::ADD, LdDL, MVT::i32, LoAddr,
DAG.getConstant(4, MVT::i32));
SDValue LoLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), LoAddr,
Ld->getPointerInfo(),
Ld->isVolatile(), Ld->isNonTemporal(),
Ld->isInvariant(), Ld->getAlignment());
SDValue HiLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), HiAddr,
Ld->getPointerInfo().getWithOffset(4),
Ld->isVolatile(), Ld->isNonTemporal(),
Ld->isInvariant(),
MinAlign(Ld->getAlignment(), 4));
SDValue NewChain = LoLd.getValue(1);
if (TokenFactorIndex != -1) {
Ops.push_back(LoLd);
Ops.push_back(HiLd);
NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, Ops);
}
LoAddr = St->getBasePtr();
HiAddr = DAG.getNode(ISD::ADD, StDL, MVT::i32, LoAddr,
DAG.getConstant(4, MVT::i32));
SDValue LoSt = DAG.getStore(NewChain, StDL, LoLd, LoAddr,
St->getPointerInfo(),
St->isVolatile(), St->isNonTemporal(),
St->getAlignment());
SDValue HiSt = DAG.getStore(NewChain, StDL, HiLd, HiAddr,
St->getPointerInfo().getWithOffset(4),
St->isVolatile(),
St->isNonTemporal(),
MinAlign(St->getAlignment(), 4));
return DAG.getNode(ISD::TokenFactor, StDL, MVT::Other, LoSt, HiSt);
}
return SDValue();
}
static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool IsCommutative) {
if (LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
RHS.getOpcode() != ISD::VECTOR_SHUFFLE)
return false;
MVT VT = LHS.getSimpleValueType();
assert((VT.is128BitVector() || VT.is256BitVector()) &&
"Unsupported vector type for horizontal add/sub");
unsigned NumElts = VT.getVectorNumElements();
unsigned NumLanes = VT.getSizeInBits()/128;
unsigned NumLaneElts = NumElts / NumLanes;
assert((NumLaneElts % 2 == 0) &&
"Vector type should have an even number of elements in each lane");
unsigned HalfLaneElts = NumLaneElts/2;
SDValue A, B;
SmallVector<int, 16> LMask(NumElts);
if (LHS.getOpcode() == ISD::VECTOR_SHUFFLE) {
if (LHS.getOperand(0).getOpcode() != ISD::UNDEF)
A = LHS.getOperand(0);
if (LHS.getOperand(1).getOpcode() != ISD::UNDEF)
B = LHS.getOperand(1);
ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(LHS.getNode())->getMask();
std::copy(Mask.begin(), Mask.end(), LMask.begin());
} else {
if (LHS.getOpcode() != ISD::UNDEF)
A = LHS;
for (unsigned i = 0; i != NumElts; ++i)
LMask[i] = i;
}
SDValue C, D;
SmallVector<int, 16> RMask(NumElts);
if (RHS.getOpcode() == ISD::VECTOR_SHUFFLE) {
if (RHS.getOperand(0).getOpcode() != ISD::UNDEF)
C = RHS.getOperand(0);
if (RHS.getOperand(1).getOpcode() != ISD::UNDEF)
D = RHS.getOperand(1);
ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(RHS.getNode())->getMask();
std::copy(Mask.begin(), Mask.end(), RMask.begin());
} else {
if (RHS.getOpcode() != ISD::UNDEF)
C = RHS;
for (unsigned i = 0; i != NumElts; ++i)
RMask[i] = i;
}
if (!(A == C && B == D) && !(A == D && B == C))
return false;
if (!A.getNode() && !B.getNode())
return false;
if (A != C)
CommuteVectorShuffleMask(RMask, NumElts);
for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
for (unsigned i = 0; i != NumLaneElts; ++i) {
int LIdx = LMask[i+l], RIdx = RMask[i+l];
if (LIdx < 0 || RIdx < 0 ||
(!A.getNode() && (LIdx < (int)NumElts || RIdx < (int)NumElts)) ||
(!B.getNode() && (LIdx >= (int)NumElts || RIdx >= (int)NumElts)))
continue;
unsigned Src = (i/HalfLaneElts); int Index = 2*(i%HalfLaneElts) + NumElts*Src + l;
if (!(LIdx == Index && RIdx == Index + 1) &&
!(IsCommutative && LIdx == Index + 1 && RIdx == Index))
return false;
}
}
LHS = A.getNode() ? A : B; RHS = B.getNode() ? B : A; return true;
}
static SDValue PerformFADDCombine(SDNode *N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
EVT VT = N->getValueType(0);
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
if (((Subtarget->hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) ||
(Subtarget->hasFp256() && (VT == MVT::v8f32 || VT == MVT::v4f64))) &&
isHorizontalBinOp(LHS, RHS, true))
return DAG.getNode(X86ISD::FHADD, SDLoc(N), VT, LHS, RHS);
return SDValue();
}
static SDValue PerformFSUBCombine(SDNode *N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
EVT VT = N->getValueType(0);
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
if (((Subtarget->hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) ||
(Subtarget->hasFp256() && (VT == MVT::v8f32 || VT == MVT::v4f64))) &&
isHorizontalBinOp(LHS, RHS, false))
return DAG.getNode(X86ISD::FHSUB, SDLoc(N), VT, LHS, RHS);
return SDValue();
}
static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG) {
assert(N->getOpcode() == X86ISD::FOR || N->getOpcode() == X86ISD::FXOR);
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
if (C->getValueAPF().isPosZero())
return N->getOperand(1);
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1)))
if (C->getValueAPF().isPosZero())
return N->getOperand(0);
return SDValue();
}
static SDValue PerformFMinFMaxCombine(SDNode *N, SelectionDAG &DAG) {
assert(N->getOpcode() == X86ISD::FMIN || N->getOpcode() == X86ISD::FMAX);
if (!DAG.getTarget().Options.UnsafeFPMath)
return SDValue();
unsigned NewOp = 0;
switch (N->getOpcode()) {
default: llvm_unreachable("unknown opcode");
case X86ISD::FMIN: NewOp = X86ISD::FMINC; break;
case X86ISD::FMAX: NewOp = X86ISD::FMAXC; break;
}
return DAG.getNode(NewOp, SDLoc(N), N->getValueType(0),
N->getOperand(0), N->getOperand(1));
}
static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG) {
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
if (C->getValueAPF().isPosZero())
return N->getOperand(0);
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1)))
if (C->getValueAPF().isPosZero())
return N->getOperand(1);
return SDValue();
}
static SDValue PerformFANDNCombine(SDNode *N, SelectionDAG &DAG) {
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
if (C->getValueAPF().isPosZero())
return N->getOperand(1);
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1)))
if (C->getValueAPF().isPosZero())
return N->getOperand(1);
return SDValue();
}
static SDValue PerformBTCombine(SDNode *N,
SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
SDValue Op1 = N->getOperand(1);
if (Op1.hasOneUse()) {
unsigned BitWidth = Op1.getValueSizeInBits();
APInt DemandedMask = APInt::getLowBitsSet(BitWidth, Log2_32(BitWidth));
APInt KnownZero, KnownOne;
TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
!DCI.isBeforeLegalizeOps());
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (TLO.ShrinkDemandedConstant(Op1, DemandedMask) ||
TLI.SimplifyDemandedBits(Op1, DemandedMask, KnownZero, KnownOne, TLO))
DCI.CommitTargetLoweringOpt(TLO);
}
return SDValue();
}
static SDValue PerformVZEXT_MOVLCombine(SDNode *N, SelectionDAG &DAG) {
SDValue Op = N->getOperand(0);
if (Op.getOpcode() == ISD::BITCAST)
Op = Op.getOperand(0);
EVT VT = N->getValueType(0), OpVT = Op.getValueType();
if (Op.getOpcode() == X86ISD::VZEXT_LOAD &&
VT.getVectorElementType().getSizeInBits() ==
OpVT.getVectorElementType().getSizeInBits()) {
return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
}
return SDValue();
}
static SDValue PerformSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
EVT VT = N->getValueType(0);
if (!VT.isVector())
return SDValue();
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT ExtraVT = cast<VTSDNode>(N1)->getVT();
SDLoc dl(N);
if (VT == MVT::v4i64 && (N0.getOpcode() == ISD::ANY_EXTEND ||
N0.getOpcode() == ISD::SIGN_EXTEND)) {
SDValue N00 = N0.getOperand(0);
if (N00.getOpcode() == ISD::LOAD && Subtarget->hasInt256())
if (!ISD::isNormalLoad(N00.getNode()))
return SDValue();
if (N00.getValueType() == MVT::v4i32 && ExtraVT.getSizeInBits() < 128) {
SDValue Tmp = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32,
N00, N1);
return DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i64, Tmp);
}
}
return SDValue();
}
static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
if (N0.getOpcode() == ISD::SDIVREM && N0.getResNo() == 1 &&
N0.getValueType() == MVT::i8 && VT == MVT::i32) {
SDLoc dl(N);
SDVTList NodeTys = DAG.getVTList(MVT::i8, VT);
SDValue R = DAG.getNode(X86ISD::SDIVREM8_SEXT_HREG, dl, NodeTys,
N0.getOperand(0), N0.getOperand(1));
DAG.ReplaceAllUsesOfValueWith(N0.getValue(0), R.getValue(0));
return R.getValue(1);
}
if (!DCI.isBeforeLegalizeOps())
return SDValue();
if (!Subtarget->hasFp256())
return SDValue();
if (VT.isVector() && VT.getSizeInBits() == 256) {
SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget);
if (R.getNode())
return R;
}
return SDValue();
}
static SDValue PerformFMACombine(SDNode *N, SelectionDAG &DAG,
const X86Subtarget* Subtarget) {
SDLoc dl(N);
EVT VT = N->getValueType(0);
if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
return SDValue();
EVT ScalarVT = VT.getScalarType();
if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) ||
(!Subtarget->hasFMA() && !Subtarget->hasFMA4()))
return SDValue();
SDValue A = N->getOperand(0);
SDValue B = N->getOperand(1);
SDValue C = N->getOperand(2);
bool NegA = (A.getOpcode() == ISD::FNEG);
bool NegB = (B.getOpcode() == ISD::FNEG);
bool NegC = (C.getOpcode() == ISD::FNEG);
bool NegMul = (NegA != NegB);
if (NegA)
A = A.getOperand(0);
if (NegB)
B = B.getOperand(0);
if (NegC)
C = C.getOperand(0);
unsigned Opcode;
if (!NegMul)
Opcode = (!NegC) ? X86ISD::FMADD : X86ISD::FMSUB;
else
Opcode = (!NegC) ? X86ISD::FNMADD : X86ISD::FNMSUB;
return DAG.getNode(Opcode, dl, VT, A, B, C);
}
static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
SDLoc dl(N);
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
if (N0.getOpcode() == ISD::AND &&
N0.hasOneUse() &&
N0.getOperand(0).hasOneUse()) {
SDValue N00 = N0.getOperand(0);
if (N00.getOpcode() == X86ISD::SETCC_CARRY) {
ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
if (!C || C->getZExtValue() != 1)
return SDValue();
return DAG.getNode(ISD::AND, dl, VT,
DAG.getNode(X86ISD::SETCC_CARRY, dl, VT,
N00.getOperand(0), N00.getOperand(1)),
DAG.getConstant(1, VT));
}
}
if (N0.getOpcode() == ISD::TRUNCATE &&
N0.hasOneUse() &&
N0.getOperand(0).hasOneUse()) {
SDValue N00 = N0.getOperand(0);
if (N00.getOpcode() == X86ISD::SETCC_CARRY) {
return DAG.getNode(ISD::AND, dl, VT,
DAG.getNode(X86ISD::SETCC_CARRY, dl, VT,
N00.getOperand(0), N00.getOperand(1)),
DAG.getConstant(1, VT));
}
}
if (VT.is256BitVector()) {
SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget);
if (R.getNode())
return R;
}
if (N0.getOpcode() == ISD::UDIVREM &&
N0.getResNo() == 1 && N0.getValueType() == MVT::i8 &&
(VT == MVT::i32 || VT == MVT::i64)) {
SDVTList NodeTys = DAG.getVTList(MVT::i8, VT);
SDValue R = DAG.getNode(X86ISD::UDIVREM8_ZEXT_HREG, dl, NodeTys,
N0.getOperand(0), N0.getOperand(1));
DAG.ReplaceAllUsesOfValueWith(N0.getValue(0), R.getValue(0));
return R.getValue(1);
}
return SDValue();
}
static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG,
const X86Subtarget* Subtarget) {
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
EVT VT = N->getValueType(0);
SDLoc DL(N);
if ((CC == ISD::SETNE || CC == ISD::SETEQ) && LHS.getOpcode() == ISD::SUB)
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(LHS.getOperand(0)))
if (C->getAPIntValue() == 0 && LHS.hasOneUse()) {
SDValue addV = DAG.getNode(ISD::ADD, SDLoc(N), LHS.getValueType(), RHS,
LHS.getOperand(1));
return DAG.getSetCC(SDLoc(N), N->getValueType(0), addV,
DAG.getConstant(0, addV.getValueType()), CC);
}
if ((CC == ISD::SETNE || CC == ISD::SETEQ) && RHS.getOpcode() == ISD::SUB)
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS.getOperand(0)))
if (C->getAPIntValue() == 0 && RHS.hasOneUse()) {
SDValue addV = DAG.getNode(ISD::ADD, SDLoc(N), RHS.getValueType(), LHS,
RHS.getOperand(1));
return DAG.getSetCC(SDLoc(N), N->getValueType(0), addV,
DAG.getConstant(0, addV.getValueType()), CC);
}
if (VT.getScalarType() == MVT::i1 &&
(CC == ISD::SETNE || CC == ISD::SETEQ || ISD::isSignedIntSetCC(CC))) {
bool IsSEXT0 =
(LHS.getOpcode() == ISD::SIGN_EXTEND) &&
(LHS.getOperand(0).getValueType().getScalarType() == MVT::i1);
bool IsVZero1 = ISD::isBuildVectorAllZeros(RHS.getNode());
if (!IsSEXT0 || !IsVZero1) {
std::swap(LHS, RHS);
CC = ISD::getSetCCSwappedOperands(CC);
IsSEXT0 = (LHS.getOpcode() == ISD::SIGN_EXTEND) &&
(LHS.getOperand(0).getValueType().getScalarType() == MVT::i1);
IsVZero1 = ISD::isBuildVectorAllZeros(RHS.getNode());
}
if (IsSEXT0 && IsVZero1) {
assert(VT == LHS.getOperand(0).getValueType() &&
"Uexpected operand type");
if (CC == ISD::SETGT)
return DAG.getConstant(0, VT);
if (CC == ISD::SETLE)
return DAG.getConstant(1, VT);
if (CC == ISD::SETEQ || CC == ISD::SETGE)
return DAG.getNOT(DL, LHS.getOperand(0), VT);
assert((CC == ISD::SETNE || CC == ISD::SETLT) &&
"Unexpected condition code!");
return LHS.getOperand(0);
}
}
return SDValue();
}
static SDValue PerformINSERTPSCombine(SDNode *N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
SDLoc dl(N);
MVT VT = N->getOperand(1)->getSimpleValueType(0);
assert((VT == MVT::v4f32 || VT == MVT::v4i32) &&
"X86insertps is only defined for v4x32");
SDValue Ld = N->getOperand(1);
if (MayFoldLoad(Ld)) {
unsigned DestIndex =
cast<ConstantSDNode>(N->getOperand(2))->getZExtValue() >> 6;
Ld = NarrowVectorLoadToElement(cast<LoadSDNode>(Ld), DestIndex, DAG);
} else
return SDValue();
SDValue LoadScalarToVector = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Ld);
return DAG.getNode(X86ISD::INSERTPS, dl, VT, N->getOperand(0),
LoadScalarToVector, N->getOperand(2));
}
static SDValue MaterializeSETB(SDLoc DL, SDValue EFLAGS, SelectionDAG &DAG,
MVT VT) {
if (VT == MVT::i8)
return DAG.getNode(ISD::AND, DL, VT,
DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8,
DAG.getConstant(X86::COND_B, MVT::i8), EFLAGS),
DAG.getConstant(1, VT));
assert (VT == MVT::i1 && "Unexpected type for SECCC node");
return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1,
DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8,
DAG.getConstant(X86::COND_B, MVT::i8), EFLAGS));
}
static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
SDLoc DL(N);
X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(0));
SDValue EFLAGS = N->getOperand(1);
if (CC == X86::COND_A) {
if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.hasOneUse() &&
EFLAGS.getValueType().isInteger() &&
!isa<ConstantSDNode>(EFLAGS.getOperand(1))) {
SDValue NewSub = DAG.getNode(X86ISD::SUB, SDLoc(EFLAGS),
EFLAGS.getNode()->getVTList(),
EFLAGS.getOperand(1), EFLAGS.getOperand(0));
SDValue NewEFLAGS = SDValue(NewSub.getNode(), EFLAGS.getResNo());
return MaterializeSETB(DL, NewEFLAGS, DAG, N->getSimpleValueType(0));
}
}
if (CC == X86::COND_B)
return MaterializeSETB(DL, EFLAGS, DAG, N->getSimpleValueType(0));
SDValue Flags;
Flags = checkBoolTestSetCCCombine(EFLAGS, CC);
if (Flags.getNode()) {
SDValue Cond = DAG.getConstant(CC, MVT::i8);
return DAG.getNode(X86ISD::SETCC, DL, N->getVTList(), Cond, Flags);
}
return SDValue();
}
static SDValue PerformBrCondCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
SDLoc DL(N);
SDValue Chain = N->getOperand(0);
SDValue Dest = N->getOperand(1);
SDValue EFLAGS = N->getOperand(3);
X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(2));
SDValue Flags;
Flags = checkBoolTestSetCCCombine(EFLAGS, CC);
if (Flags.getNode()) {
SDValue Cond = DAG.getConstant(CC, MVT::i8);
return DAG.getNode(X86ISD::BRCOND, DL, N->getVTList(), Chain, Dest, Cond,
Flags);
}
return SDValue();
}
static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N,
SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
if (!VT.isVector() || N->getOperand(0)->getOpcode() != ISD::AND ||
N->getOperand(0)->getOperand(0)->getOpcode() != ISD::SETCC ||
VT.getSizeInBits() != N->getOperand(0)->getValueType(0).getSizeInBits())
return SDValue();
if (BuildVectorSDNode *BV =
dyn_cast<BuildVectorSDNode>(N->getOperand(0)->getOperand(1))) {
if (!BV->isConstant())
return SDValue();
SDLoc DL(N);
EVT IntVT = BV->getValueType(0);
SDValue SourceConst = DAG.getNode(N->getOpcode(), DL, VT, SDValue(BV, 0));
SDValue MaskConst = DAG.getNode(ISD::BITCAST, DL, IntVT, SourceConst);
SDValue NewAnd = DAG.getNode(ISD::AND, DL, IntVT,
N->getOperand(0)->getOperand(0), MaskConst);
SDValue Res = DAG.getNode(ISD::BITCAST, DL, VT, NewAnd);
return Res;
}
return SDValue();
}
static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
SDValue Res = performVectorCompareAndMaskUnaryOpCombine(N, DAG);
if (Res != SDValue())
return Res;
SDValue Op0 = N->getOperand(0);
EVT InVT = Op0->getValueType(0);
if (InVT == MVT::v8i8 || InVT == MVT::v4i8) {
SDLoc dl(N);
MVT DstVT = InVT == MVT::v4i8 ? MVT::v4i32 : MVT::v8i32;
SDValue P = DAG.getNode(ISD::SIGN_EXTEND, dl, DstVT, Op0);
return DAG.getNode(ISD::SINT_TO_FP, dl, N->getValueType(0), P);
}
if (Op0.getOpcode() == ISD::LOAD) {
LoadSDNode *Ld = cast<LoadSDNode>(Op0.getNode());
EVT VT = Ld->getValueType(0);
if (!Ld->isVolatile() && !N->getValueType(0).isVector() &&
ISD::isNON_EXTLoad(Op0.getNode()) && Op0.hasOneUse() &&
!Subtarget->is64Bit() && VT == MVT::i64) {
SDValue FILDChain = Subtarget->getTargetLowering()->BuildFILD(
SDValue(N, 0), Ld->getValueType(0), Ld->getChain(), Op0, DAG);
DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), FILDChain.getValue(1));
return FILDChain;
}
}
return SDValue();
}
static SDValue PerformADCCombine(SDNode *N, SelectionDAG &DAG,
X86TargetLowering::DAGCombinerInfo &DCI) {
if (X86::isZeroNode(N->getOperand(0)) &&
X86::isZeroNode(N->getOperand(1)) &&
SDValue(N, 1).use_empty()) {
SDLoc DL(N);
EVT VT = N->getValueType(0);
SDValue CarryOut = DAG.getConstant(0, N->getValueType(1));
SDValue Res1 = DAG.getNode(ISD::AND, DL, VT,
DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
DAG.getConstant(X86::COND_B,MVT::i8),
N->getOperand(2)),
DAG.getConstant(1, VT));
return DCI.CombineTo(N, Res1, CarryOut);
}
return SDValue();
}
static SDValue OptimizeConditionalInDecrement(SDNode *N, SelectionDAG &DAG) {
SDLoc DL(N);
SDValue Ext = N->getOperand(N->getOpcode() == ISD::SUB ? 1 : 0);
if (Ext.getOpcode() != ISD::ZERO_EXTEND || !Ext.hasOneUse())
return SDValue();
SDValue SetCC = Ext.getOperand(0);
if (SetCC.getOpcode() != X86ISD::SETCC || !SetCC.hasOneUse())
return SDValue();
X86::CondCode CC = (X86::CondCode)SetCC.getConstantOperandVal(0);
if (CC != X86::COND_E && CC != X86::COND_NE)
return SDValue();
SDValue Cmp = SetCC.getOperand(1);
if (Cmp.getOpcode() != X86ISD::CMP || !Cmp.hasOneUse() ||
!X86::isZeroNode(Cmp.getOperand(1)) ||
!Cmp.getOperand(0).getValueType().isInteger())
return SDValue();
SDValue CmpOp0 = Cmp.getOperand(0);
SDValue NewCmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32, CmpOp0,
DAG.getConstant(1, CmpOp0.getValueType()));
SDValue OtherVal = N->getOperand(N->getOpcode() == ISD::SUB ? 0 : 1);
if (CC == X86::COND_NE)
return DAG.getNode(N->getOpcode() == ISD::SUB ? X86ISD::ADC : X86ISD::SBB,
DL, OtherVal.getValueType(), OtherVal,
DAG.getConstant(-1ULL, OtherVal.getValueType()), NewCmp);
return DAG.getNode(N->getOpcode() == ISD::SUB ? X86ISD::SBB : X86ISD::ADC,
DL, OtherVal.getValueType(), OtherVal,
DAG.getConstant(0, OtherVal.getValueType()), NewCmp);
}
static SDValue PerformAddCombine(SDNode *N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
EVT VT = N->getValueType(0);
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
if (((Subtarget->hasSSSE3() && (VT == MVT::v8i16 || VT == MVT::v4i32)) ||
(Subtarget->hasInt256() && (VT == MVT::v16i16 || VT == MVT::v8i32))) &&
isHorizontalBinOp(Op0, Op1, true))
return DAG.getNode(X86ISD::HADD, SDLoc(N), VT, Op0, Op1);
return OptimizeConditionalInDecrement(N, DAG);
}
static SDValue PerformSubCombine(SDNode *N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op0)) {
if (Op1->hasOneUse() && Op1.getOpcode() == ISD::XOR &&
isa<ConstantSDNode>(Op1.getOperand(1))) {
APInt XorC = cast<ConstantSDNode>(Op1.getOperand(1))->getAPIntValue();
EVT VT = Op0.getValueType();
SDValue NewXor = DAG.getNode(ISD::XOR, SDLoc(Op1), VT,
Op1.getOperand(0),
DAG.getConstant(~XorC, VT));
return DAG.getNode(ISD::ADD, SDLoc(N), VT, NewXor,
DAG.getConstant(C->getAPIntValue()+1, VT));
}
}
EVT VT = N->getValueType(0);
if (((Subtarget->hasSSSE3() && (VT == MVT::v8i16 || VT == MVT::v4i32)) ||
(Subtarget->hasInt256() && (VT == MVT::v16i16 || VT == MVT::v8i32))) &&
isHorizontalBinOp(Op0, Op1, true))
return DAG.getNode(X86ISD::HSUB, SDLoc(N), VT, Op0, Op1);
return OptimizeConditionalInDecrement(N, DAG);
}
static SDValue performVZEXTCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
SDLoc DL(N);
MVT VT = N->getSimpleValueType(0);
SDValue Op = N->getOperand(0);
MVT OpVT = Op.getSimpleValueType();
MVT OpEltVT = OpVT.getVectorElementType();
unsigned InputBits = OpEltVT.getSizeInBits() * VT.getVectorNumElements();
SDValue V = Op;
while (V.getOpcode() == ISD::BITCAST)
V = V.getOperand(0);
if (V != Op && V.getOpcode() == X86ISD::VZEXT) {
MVT InnerVT = V.getSimpleValueType();
MVT InnerEltVT = InnerVT.getVectorElementType();
if (OpEltVT == InnerEltVT) {
assert(OpVT == InnerVT && "Types must match for vzext!");
return DAG.getNode(X86ISD::VZEXT, DL, VT, V.getOperand(0));
}
if (InnerEltVT.getSizeInBits() < InputBits)
return SDValue();
return DAG.getNode(X86ISD::VZEXT, DL, VT,
DAG.getNode(ISD::BITCAST, DL, OpVT, V));
}
if (V.getOpcode() == ISD::SCALAR_TO_VECTOR &&
V.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
V.getOperand(0).getSimpleValueType().getSizeInBits() == InputBits) {
SDValue ExtractedV = V.getOperand(0);
SDValue OrigV = ExtractedV.getOperand(0);
if (auto *ExtractIdx = dyn_cast<ConstantSDNode>(ExtractedV.getOperand(1)))
if (ExtractIdx->getZExtValue() == 0) {
MVT OrigVT = OrigV.getSimpleValueType();
if (OrigVT.getSizeInBits() > OpVT.getSizeInBits()) {
int Ratio = OrigVT.getSizeInBits() / OpVT.getSizeInBits();
OrigVT = MVT::getVectorVT(OrigVT.getVectorElementType(),
OrigVT.getVectorNumElements() / Ratio);
OrigV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OrigVT, OrigV,
DAG.getIntPtrConstant(0));
}
Op = DAG.getNode(ISD::BITCAST, DL, OpVT, OrigV);
return DAG.getNode(X86ISD::VZEXT, DL, VT, Op);
}
}
return SDValue();
}
SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
switch (N->getOpcode()) {
default: break;
case ISD::EXTRACT_VECTOR_ELT:
return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, DCI);
case ISD::VSELECT:
case ISD::SELECT:
case X86ISD::SHRUNKBLEND:
return PerformSELECTCombine(N, DAG, DCI, Subtarget);
case ISD::BITCAST: return PerformBITCASTCombine(N, DAG);
case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI, Subtarget);
case ISD::ADD: return PerformAddCombine(N, DAG, Subtarget);
case ISD::SUB: return PerformSubCombine(N, DAG, Subtarget);
case X86ISD::ADC: return PerformADCCombine(N, DAG, DCI);
case ISD::MUL: return PerformMulCombine(N, DAG, DCI);
case ISD::SHL:
case ISD::SRA:
case ISD::SRL: return PerformShiftCombine(N, DAG, DCI, Subtarget);
case ISD::AND: return PerformAndCombine(N, DAG, DCI, Subtarget);
case ISD::OR: return PerformOrCombine(N, DAG, DCI, Subtarget);
case ISD::XOR: return PerformXorCombine(N, DAG, DCI, Subtarget);
case ISD::LOAD: return PerformLOADCombine(N, DAG, DCI, Subtarget);
case ISD::MLOAD: return PerformMLOADCombine(N, DAG, DCI, Subtarget);
case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget);
case ISD::MSTORE: return PerformMSTORECombine(N, DAG, Subtarget);
case ISD::SINT_TO_FP: return PerformSINT_TO_FPCombine(N, DAG, Subtarget);
case ISD::FADD: return PerformFADDCombine(N, DAG, Subtarget);
case ISD::FSUB: return PerformFSUBCombine(N, DAG, Subtarget);
case X86ISD::FXOR:
case X86ISD::FOR: return PerformFORCombine(N, DAG);
case X86ISD::FMIN:
case X86ISD::FMAX: return PerformFMinFMaxCombine(N, DAG);
case X86ISD::FAND: return PerformFANDCombine(N, DAG);
case X86ISD::FANDN: return PerformFANDNCombine(N, DAG);
case X86ISD::BT: return PerformBTCombine(N, DAG, DCI);
case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG);
case ISD::ANY_EXTEND:
case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG, DCI, Subtarget);
case ISD::SIGN_EXTEND: return PerformSExtCombine(N, DAG, DCI, Subtarget);
case ISD::SIGN_EXTEND_INREG:
return PerformSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);
case ISD::TRUNCATE: return PerformTruncateCombine(N, DAG,DCI,Subtarget);
case ISD::SETCC: return PerformISDSETCCCombine(N, DAG, Subtarget);
case X86ISD::SETCC: return PerformSETCCCombine(N, DAG, DCI, Subtarget);
case X86ISD::BRCOND: return PerformBrCondCombine(N, DAG, DCI, Subtarget);
case X86ISD::VZEXT: return performVZEXTCombine(N, DAG, DCI, Subtarget);
case X86ISD::SHUFP: case X86ISD::PALIGNR:
case X86ISD::UNPCKH:
case X86ISD::UNPCKL:
case X86ISD::MOVHLPS:
case X86ISD::MOVLHPS:
case X86ISD::PSHUFB:
case X86ISD::PSHUFD:
case X86ISD::PSHUFHW:
case X86ISD::PSHUFLW:
case X86ISD::MOVSS:
case X86ISD::MOVSD:
case X86ISD::VPERMILPI:
case X86ISD::VPERM2X128:
case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI,Subtarget);
case ISD::FMA: return PerformFMACombine(N, DAG, Subtarget);
case ISD::INTRINSIC_WO_CHAIN:
return PerformINTRINSIC_WO_CHAINCombine(N, DAG, Subtarget);
case X86ISD::INSERTPS: {
if (getTargetMachine().getOptLevel() > CodeGenOpt::None)
return PerformINSERTPSCombine(N, DAG, Subtarget);
break;
}
case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DAG, Subtarget);
}
return SDValue();
}
bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const {
if (!isTypeLegal(VT))
return false;
if (VT != MVT::i16)
return true;
switch (Opc) {
default:
return true;
case ISD::LOAD:
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND:
case ISD::SHL:
case ISD::SRL:
case ISD::SUB:
case ISD::ADD:
case ISD::MUL:
case ISD::AND:
case ISD::OR:
case ISD::XOR:
return false;
}
}
bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
EVT VT = Op.getValueType();
if (VT != MVT::i16)
return false;
bool Promote = false;
bool Commute = false;
switch (Op.getOpcode()) {
default: break;
case ISD::LOAD: {
LoadSDNode *LD = cast<LoadSDNode>(Op);
if (LD->getExtensionType() == ISD::NON_EXTLOAD ) {
for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
UE = Op.getNode()->use_end(); UI != UE; ++UI) {
if (UI->getOpcode() != ISD::CopyToReg)
return false;
}
}
Promote = true;
break;
}
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND:
Promote = true;
break;
case ISD::SHL:
case ISD::SRL: {
SDValue N0 = Op.getOperand(0);
if (MayFoldLoad(N0) && MayFoldIntoStore(Op))
return false;
Promote = true;
break;
}
case ISD::ADD:
case ISD::MUL:
case ISD::AND:
case ISD::OR:
case ISD::XOR:
Commute = true;
case ISD::SUB: {
SDValue N0 = Op.getOperand(0);
SDValue N1 = Op.getOperand(1);
if (!Commute && MayFoldLoad(N1))
return false;
if (MayFoldLoad(N0) && (!isa<ConstantSDNode>(N1) || MayFoldIntoStore(Op)))
return false;
if (MayFoldLoad(N1) && (!isa<ConstantSDNode>(N0) || MayFoldIntoStore(Op)))
return false;
Promote = true;
}
}
PVT = MVT::i32;
return Promote;
}
namespace {
bool matchAsmImpl(StringRef s, ArrayRef<const StringRef *> args) {
s = s.substr(s.find_first_not_of(" \t"));
for (unsigned i = 0, e = args.size(); i != e; ++i) {
StringRef piece(*args[i]);
if (!s.startswith(piece)) return false;
s = s.substr(piece.size());
StringRef::size_type pos = s.find_first_not_of(" \t");
if (pos == 0) return false;
s = s.substr(pos);
}
return s.empty();
}
const VariadicFunction1<bool, StringRef, StringRef, matchAsmImpl> matchAsm={};
}
static bool clobbersFlagRegisters(const SmallVector<StringRef, 4> &AsmPieces) {
if (AsmPieces.size() == 3 || AsmPieces.size() == 4) {
if (std::count(AsmPieces.begin(), AsmPieces.end(), "~{cc}") &&
std::count(AsmPieces.begin(), AsmPieces.end(), "~{flags}") &&
std::count(AsmPieces.begin(), AsmPieces.end(), "~{fpsr}")) {
if (AsmPieces.size() == 3)
return true;
else if (std::count(AsmPieces.begin(), AsmPieces.end(), "~{dirflag}"))
return true;
}
}
return false;
}
bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue());
std::string AsmStr = IA->getAsmString();
IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
if (!Ty || Ty->getBitWidth() % 16 != 0)
return false;
SmallVector<StringRef, 4> AsmPieces;
SplitString(AsmStr, AsmPieces, ";\n");
switch (AsmPieces.size()) {
default: return false;
case 1:
if (matchAsm(AsmPieces[0], "bswap", "$0") ||
matchAsm(AsmPieces[0], "bswapl", "$0") ||
matchAsm(AsmPieces[0], "bswapq", "$0") ||
matchAsm(AsmPieces[0], "bswap", "${0:q}") ||
matchAsm(AsmPieces[0], "bswapl", "${0:q}") ||
matchAsm(AsmPieces[0], "bswapq", "${0:q}")) {
return IntrinsicLowering::LowerToByteSwap(CI);
}
if (CI->getType()->isIntegerTy(16) &&
IA->getConstraintString().compare(0, 5, "=r,0,") == 0 &&
(matchAsm(AsmPieces[0], "rorw", "$$8,", "${0:w}") ||
matchAsm(AsmPieces[0], "rolw", "$$8,", "${0:w}"))) {
AsmPieces.clear();
const std::string &ConstraintsStr = IA->getConstraintString();
SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ",");
array_pod_sort(AsmPieces.begin(), AsmPieces.end());
if (clobbersFlagRegisters(AsmPieces))
return IntrinsicLowering::LowerToByteSwap(CI);
}
break;
case 3:
if (CI->getType()->isIntegerTy(32) &&
IA->getConstraintString().compare(0, 5, "=r,0,") == 0 &&
matchAsm(AsmPieces[0], "rorw", "$$8,", "${0:w}") &&
matchAsm(AsmPieces[1], "rorl", "$$16,", "$0") &&
matchAsm(AsmPieces[2], "rorw", "$$8,", "${0:w}")) {
AsmPieces.clear();
const std::string &ConstraintsStr = IA->getConstraintString();
SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ",");
array_pod_sort(AsmPieces.begin(), AsmPieces.end());
if (clobbersFlagRegisters(AsmPieces))
return IntrinsicLowering::LowerToByteSwap(CI);
}
if (CI->getType()->isIntegerTy(64)) {
InlineAsm::ConstraintInfoVector Constraints = IA->ParseConstraints();
if (Constraints.size() >= 2 &&
Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" &&
Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") {
if (matchAsm(AsmPieces[0], "bswap", "%eax") &&
matchAsm(AsmPieces[1], "bswap", "%edx") &&
matchAsm(AsmPieces[2], "xchgl", "%eax,", "%edx"))
return IntrinsicLowering::LowerToByteSwap(CI);
}
}
break;
}
return false;
}
X86TargetLowering::ConstraintType
X86TargetLowering::getConstraintType(const std::string &Constraint) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
case 'R':
case 'q':
case 'Q':
case 'f':
case 't':
case 'u':
case 'y':
case 'x':
case 'Y':
case 'l':
return C_RegisterClass;
case 'a':
case 'b':
case 'c':
case 'd':
case 'S':
case 'D':
case 'A':
return C_Register;
case 'I':
case 'J':
case 'K':
case 'L':
case 'M':
case 'N':
case 'G':
case 'C':
case 'e':
case 'Z':
return C_Other;
default:
break;
}
}
return TargetLowering::getConstraintType(Constraint);
}
TargetLowering::ConstraintWeight
X86TargetLowering::getSingleConstraintMatchWeight(
AsmOperandInfo &info, const char *constraint) const {
ConstraintWeight weight = CW_Invalid;
Value *CallOperandVal = info.CallOperandVal;
if (!CallOperandVal)
return CW_Default;
Type *type = CallOperandVal->getType();
switch (*constraint) {
default:
weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
case 'R':
case 'q':
case 'Q':
case 'a':
case 'b':
case 'c':
case 'd':
case 'S':
case 'D':
case 'A':
if (CallOperandVal->getType()->isIntegerTy())
weight = CW_SpecificReg;
break;
case 'f':
case 't':
case 'u':
if (type->isFloatingPointTy())
weight = CW_SpecificReg;
break;
case 'y':
if (type->isX86_MMXTy() && Subtarget->hasMMX())
weight = CW_SpecificReg;
break;
case 'x':
case 'Y':
if (((type->getPrimitiveSizeInBits() == 128) && Subtarget->hasSSE1()) ||
((type->getPrimitiveSizeInBits() == 256) && Subtarget->hasFp256()))
weight = CW_Register;
break;
case 'I':
if (ConstantInt *C = dyn_cast<ConstantInt>(info.CallOperandVal)) {
if (C->getZExtValue() <= 31)
weight = CW_Constant;
}
break;
case 'J':
if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
if (C->getZExtValue() <= 63)
weight = CW_Constant;
}
break;
case 'K':
if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
if ((C->getSExtValue() >= -0x80) && (C->getSExtValue() <= 0x7f))
weight = CW_Constant;
}
break;
case 'L':
if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
if ((C->getZExtValue() == 0xff) || (C->getZExtValue() == 0xffff))
weight = CW_Constant;
}
break;
case 'M':
if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
if (C->getZExtValue() <= 3)
weight = CW_Constant;
}
break;
case 'N':
if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
if (C->getZExtValue() <= 0xff)
weight = CW_Constant;
}
break;
case 'G':
case 'C':
if (dyn_cast<ConstantFP>(CallOperandVal)) {
weight = CW_Constant;
}
break;
case 'e':
if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
if ((C->getSExtValue() >= -0x80000000LL) &&
(C->getSExtValue() <= 0x7fffffffLL))
weight = CW_Constant;
}
break;
case 'Z':
if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
if (C->getZExtValue() <= 0xffffffff)
weight = CW_Constant;
}
break;
}
return weight;
}
const char *X86TargetLowering::
LowerXConstraint(EVT ConstraintVT) const {
if (ConstraintVT.isFloatingPoint()) {
if (Subtarget->hasSSE2())
return "Y";
if (Subtarget->hasSSE1())
return "x";
}
return TargetLowering::LowerXConstraint(ConstraintVT);
}
void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
std::string &Constraint,
std::vector<SDValue>&Ops,
SelectionDAG &DAG) const {
SDValue Result;
if (Constraint.length() > 1) return;
char ConstraintLetter = Constraint[0];
switch (ConstraintLetter) {
default: break;
case 'I':
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
if (C->getZExtValue() <= 31) {
Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
break;
}
}
return;
case 'J':
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
if (C->getZExtValue() <= 63) {
Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
break;
}
}
return;
case 'K':
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
if (isInt<8>(C->getSExtValue())) {
Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
break;
}
}
return;
case 'L':
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
if (C->getZExtValue() == 0xff || C->getZExtValue() == 0xffff ||
(Subtarget->is64Bit() && C->getZExtValue() == 0xffffffff)) {
Result = DAG.getTargetConstant(C->getSExtValue(), Op.getValueType());
break;
}
}
return;
case 'M':
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
if (C->getZExtValue() <= 3) {
Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
break;
}
}
return;
case 'N':
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
if (C->getZExtValue() <= 255) {
Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
break;
}
}
return;
case 'O':
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
if (C->getZExtValue() <= 127) {
Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
break;
}
}
return;
case 'e': {
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
if (ConstantInt::isValueValidForType(Type::getInt32Ty(*DAG.getContext()),
C->getSExtValue())) {
Result = DAG.getTargetConstant(C->getSExtValue(), MVT::i64);
break;
}
}
return;
}
case 'Z': {
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
if (ConstantInt::isValueValidForType(Type::getInt32Ty(*DAG.getContext()),
C->getZExtValue())) {
Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
break;
}
}
return;
}
case 'i': {
if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op)) {
Result = DAG.getTargetConstant(CST->getSExtValue(), MVT::i64);
break;
}
if (Subtarget->isPICStyleGOT() || Subtarget->isPICStyleStubPIC())
return;
GlobalAddressSDNode *GA = nullptr;
int64_t Offset = 0;
while (1) {
if ((GA = dyn_cast<GlobalAddressSDNode>(Op))) {
Offset += GA->getOffset();
break;
} else if (Op.getOpcode() == ISD::ADD) {
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
Offset += C->getZExtValue();
Op = Op.getOperand(0);
continue;
}
} else if (Op.getOpcode() == ISD::SUB) {
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
Offset += -C->getZExtValue();
Op = Op.getOperand(0);
continue;
}
}
return;
}
const GlobalValue *GV = GA->getGlobal();
if (isGlobalStubReference(
Subtarget->ClassifyGlobalReference(GV, DAG.getTarget())))
return;
Result = DAG.getTargetGlobalAddress(GV, SDLoc(Op),
GA->getValueType(0), Offset);
break;
}
}
if (Result.getNode()) {
Ops.push_back(Result);
return;
}
return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
}
std::pair<unsigned, const TargetRegisterClass *>
X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
const std::string &Constraint,
MVT VT) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
default: break;
case 'q': if (Subtarget->is64Bit()) {
if (VT == MVT::i32 || VT == MVT::f32)
return std::make_pair(0U, &X86::GR32RegClass);
if (VT == MVT::i16)
return std::make_pair(0U, &X86::GR16RegClass);
if (VT == MVT::i8 || VT == MVT::i1)
return std::make_pair(0U, &X86::GR8RegClass);
if (VT == MVT::i64 || VT == MVT::f64)
return std::make_pair(0U, &X86::GR64RegClass);
break;
}
case 'Q': if (VT == MVT::i32 || VT == MVT::f32)
return std::make_pair(0U, &X86::GR32_ABCDRegClass);
if (VT == MVT::i16)
return std::make_pair(0U, &X86::GR16_ABCDRegClass);
if (VT == MVT::i8 || VT == MVT::i1)
return std::make_pair(0U, &X86::GR8_ABCD_LRegClass);
if (VT == MVT::i64)
return std::make_pair(0U, &X86::GR64_ABCDRegClass);
break;
case 'r': case 'l': if (VT == MVT::i8 || VT == MVT::i1)
return std::make_pair(0U, &X86::GR8RegClass);
if (VT == MVT::i16)
return std::make_pair(0U, &X86::GR16RegClass);
if (VT == MVT::i32 || VT == MVT::f32 || !Subtarget->is64Bit())
return std::make_pair(0U, &X86::GR32RegClass);
return std::make_pair(0U, &X86::GR64RegClass);
case 'R': if (VT == MVT::i8 || VT == MVT::i1)
return std::make_pair(0U, &X86::GR8_NOREXRegClass);
if (VT == MVT::i16)
return std::make_pair(0U, &X86::GR16_NOREXRegClass);
if (VT == MVT::i32 || !Subtarget->is64Bit())
return std::make_pair(0U, &X86::GR32_NOREXRegClass);
return std::make_pair(0U, &X86::GR64_NOREXRegClass);
case 'f': if (VT == MVT::f32 && !isScalarFPTypeInSSEReg(VT))
return std::make_pair(0U, &X86::RFP32RegClass);
if (VT == MVT::f64 && !isScalarFPTypeInSSEReg(VT))
return std::make_pair(0U, &X86::RFP64RegClass);
return std::make_pair(0U, &X86::RFP80RegClass);
case 'y': if (!Subtarget->hasMMX()) break;
return std::make_pair(0U, &X86::VR64RegClass);
case 'Y': if (!Subtarget->hasSSE2()) break;
case 'x': if (!Subtarget->hasSSE1()) break;
switch (VT.SimpleTy) {
default: break;
case MVT::f32:
case MVT::i32:
return std::make_pair(0U, &X86::FR32RegClass);
case MVT::f64:
case MVT::i64:
return std::make_pair(0U, &X86::FR64RegClass);
case MVT::v16i8:
case MVT::v8i16:
case MVT::v4i32:
case MVT::v2i64:
case MVT::v4f32:
case MVT::v2f64:
return std::make_pair(0U, &X86::VR128RegClass);
case MVT::v32i8:
case MVT::v16i16:
case MVT::v8i32:
case MVT::v4i64:
case MVT::v8f32:
case MVT::v4f64:
return std::make_pair(0U, &X86::VR256RegClass);
case MVT::v8f64:
case MVT::v16f32:
case MVT::v16i32:
case MVT::v8i64:
return std::make_pair(0U, &X86::VR512RegClass);
}
break;
}
}
std::pair<unsigned, const TargetRegisterClass*> Res;
Res = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
if (!Res.second) {
if (Constraint.size() == 7 && Constraint[0] == '{' &&
tolower(Constraint[1]) == 's' &&
tolower(Constraint[2]) == 't' &&
Constraint[3] == '(' &&
(Constraint[4] >= '0' && Constraint[4] <= '7') &&
Constraint[5] == ')' &&
Constraint[6] == '}') {
Res.first = X86::FP0+Constraint[4]-'0';
Res.second = &X86::RFP80RegClass;
return Res;
}
if (StringRef("{st}").equals_lower(Constraint)) {
Res.first = X86::FP0;
Res.second = &X86::RFP80RegClass;
return Res;
}
if (StringRef("{flags}").equals_lower(Constraint)) {
Res.first = X86::EFLAGS;
Res.second = &X86::CCRRegClass;
return Res;
}
if (Constraint == "A") {
Res.first = X86::EAX;
Res.second = &X86::GR32_ADRegClass;
return Res;
}
return Res;
}
if (Res.second->hasType(VT))
return Res;
if (Res.second == &X86::GR16RegClass) {
if (VT == MVT::i8 || VT == MVT::i1) {
unsigned DestReg = 0;
switch (Res.first) {
default: break;
case X86::AX: DestReg = X86::AL; break;
case X86::DX: DestReg = X86::DL; break;
case X86::CX: DestReg = X86::CL; break;
case X86::BX: DestReg = X86::BL; break;
}
if (DestReg) {
Res.first = DestReg;
Res.second = &X86::GR8RegClass;
}
} else if (VT == MVT::i32 || VT == MVT::f32) {
unsigned DestReg = 0;
switch (Res.first) {
default: break;
case X86::AX: DestReg = X86::EAX; break;
case X86::DX: DestReg = X86::EDX; break;
case X86::CX: DestReg = X86::ECX; break;
case X86::BX: DestReg = X86::EBX; break;
case X86::SI: DestReg = X86::ESI; break;
case X86::DI: DestReg = X86::EDI; break;
case X86::BP: DestReg = X86::EBP; break;
case X86::SP: DestReg = X86::ESP; break;
}
if (DestReg) {
Res.first = DestReg;
Res.second = &X86::GR32RegClass;
}
} else if (VT == MVT::i64 || VT == MVT::f64) {
unsigned DestReg = 0;
switch (Res.first) {
default: break;
case X86::AX: DestReg = X86::RAX; break;
case X86::DX: DestReg = X86::RDX; break;
case X86::CX: DestReg = X86::RCX; break;
case X86::BX: DestReg = X86::RBX; break;
case X86::SI: DestReg = X86::RSI; break;
case X86::DI: DestReg = X86::RDI; break;
case X86::BP: DestReg = X86::RBP; break;
case X86::SP: DestReg = X86::RSP; break;
}
if (DestReg) {
Res.first = DestReg;
Res.second = &X86::GR64RegClass;
}
}
} else if (Res.second == &X86::FR32RegClass ||
Res.second == &X86::FR64RegClass ||
Res.second == &X86::VR128RegClass ||
Res.second == &X86::VR256RegClass ||
Res.second == &X86::FR32XRegClass ||
Res.second == &X86::FR64XRegClass ||
Res.second == &X86::VR128XRegClass ||
Res.second == &X86::VR256XRegClass ||
Res.second == &X86::VR512RegClass) {
if (VT == MVT::f32 || VT == MVT::i32)
Res.second = &X86::FR32RegClass;
else if (VT == MVT::f64 || VT == MVT::i64)
Res.second = &X86::FR64RegClass;
else if (X86::VR128RegClass.hasType(VT))
Res.second = &X86::VR128RegClass;
else if (X86::VR256RegClass.hasType(VT))
Res.second = &X86::VR256RegClass;
else if (X86::VR512RegClass.hasType(VT))
Res.second = &X86::VR512RegClass;
}
return Res;
}
int X86TargetLowering::getScalingFactorCost(const AddrMode &AM,
Type *Ty) const {
if (isLegalAddressingMode(AM, Ty))
return AM.Scale != 0;
return -1;
}
bool X86TargetLowering::isTargetFTOL() const {
return Subtarget->isTargetKnownWindowsMSVC() && !Subtarget->is64Bit();
}