X86ISelDAGToDAG.cpp [plain text]
#define DEBUG_TYPE "x86-isel"
#include "X86.h"
#include "X86InstrBuilder.h"
#include "X86MachineFunctionInfo.h"
#include "X86RegisterInfo.h"
#include "X86Subtarget.h"
#include "X86TargetMachine.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
namespace {
struct X86ISelAddressMode {
enum {
RegBase,
FrameIndexBase
} BaseType;
SDValue Base_Reg;
int Base_FrameIndex;
unsigned Scale;
SDValue IndexReg;
int32_t Disp;
SDValue Segment;
const GlobalValue *GV;
const Constant *CP;
const BlockAddress *BlockAddr;
const char *ES;
int JT;
unsigned Align; unsigned char SymbolFlags;
X86ISelAddressMode()
: BaseType(RegBase), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0),
Segment(), GV(0), CP(0), BlockAddr(0), ES(0), JT(-1), Align(0),
SymbolFlags(X86II::MO_NO_FLAG) {
}
bool hasSymbolicDisplacement() const {
return GV != 0 || CP != 0 || ES != 0 || JT != -1 || BlockAddr != 0;
}
bool hasBaseOrIndexReg() const {
return BaseType == FrameIndexBase ||
IndexReg.getNode() != 0 || Base_Reg.getNode() != 0;
}
bool isRIPRelative() const {
if (BaseType != RegBase) return false;
if (RegisterSDNode *RegNode =
dyn_cast_or_null<RegisterSDNode>(Base_Reg.getNode()))
return RegNode->getReg() == X86::RIP;
return false;
}
void setBaseReg(SDValue Reg) {
BaseType = RegBase;
Base_Reg = Reg;
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void dump() {
dbgs() << "X86ISelAddressMode " << this << '\n';
dbgs() << "Base_Reg ";
if (Base_Reg.getNode() != 0)
Base_Reg.getNode()->dump();
else
dbgs() << "nul";
dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n'
<< " Scale" << Scale << '\n'
<< "IndexReg ";
if (IndexReg.getNode() != 0)
IndexReg.getNode()->dump();
else
dbgs() << "nul";
dbgs() << " Disp " << Disp << '\n'
<< "GV ";
if (GV)
GV->dump();
else
dbgs() << "nul";
dbgs() << " CP ";
if (CP)
CP->dump();
else
dbgs() << "nul";
dbgs() << '\n'
<< "ES ";
if (ES)
dbgs() << ES;
else
dbgs() << "nul";
dbgs() << " JT" << JT << " Align" << Align << '\n';
}
#endif
};
}
namespace {
class X86DAGToDAGISel : public SelectionDAGISel {
const X86Subtarget *Subtarget;
bool OptForSize;
public:
explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)
: SelectionDAGISel(tm, OptLevel),
Subtarget(&tm.getSubtarget<X86Subtarget>()),
OptForSize(false) {}
virtual const char *getPassName() const {
return "X86 DAG->DAG Instruction Selection";
}
virtual void EmitFunctionEntryCode();
virtual bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const;
virtual void PreprocessISelDAG();
inline bool immSext8(SDNode *N) const {
return isInt<8>(cast<ConstantSDNode>(N)->getSExtValue());
}
inline bool i64immSExt32(SDNode *N) const {
uint64_t v = cast<ConstantSDNode>(N)->getZExtValue();
return (int64_t)v == (int32_t)v;
}
#include "X86GenDAGISel.inc"
private:
SDNode *Select(SDNode *N);
SDNode *SelectGather(SDNode *N, unsigned Opc);
SDNode *SelectAtomic64(SDNode *Node, unsigned Opc);
SDNode *SelectAtomicLoadArith(SDNode *Node, MVT NVT);
bool FoldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM);
bool MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM);
bool MatchWrapper(SDValue N, X86ISelAddressMode &AM);
bool MatchAddress(SDValue N, X86ISelAddressMode &AM);
bool MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
unsigned Depth);
bool MatchAddressBase(SDValue N, X86ISelAddressMode &AM);
bool SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index, SDValue &Disp,
SDValue &Segment);
bool SelectMOV64Imm32(SDValue N, SDValue &Imm);
bool SelectLEAAddr(SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index, SDValue &Disp,
SDValue &Segment);
bool SelectLEA64_32Addr(SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index, SDValue &Disp,
SDValue &Segment);
bool SelectTLSADDRAddr(SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index, SDValue &Disp,
SDValue &Segment);
bool SelectScalarSSELoad(SDNode *Root, SDValue N,
SDValue &Base, SDValue &Scale,
SDValue &Index, SDValue &Disp,
SDValue &Segment,
SDValue &NodeWithChain);
bool TryFoldLoad(SDNode *P, SDValue N,
SDValue &Base, SDValue &Scale,
SDValue &Index, SDValue &Disp,
SDValue &Segment);
virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
char ConstraintCode,
std::vector<SDValue> &OutOps);
void EmitSpecialCodeForMain(MachineBasicBlock *BB, MachineFrameInfo *MFI);
inline void getAddressOperands(X86ISelAddressMode &AM, SDValue &Base,
SDValue &Scale, SDValue &Index,
SDValue &Disp, SDValue &Segment) {
Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ?
CurDAG->getTargetFrameIndex(AM.Base_FrameIndex,
getTargetLowering()->getPointerTy()) :
AM.Base_Reg;
Scale = getI8Imm(AM.Scale);
Index = AM.IndexReg;
if (AM.GV)
Disp = CurDAG->getTargetGlobalAddress(AM.GV, SDLoc(),
MVT::i32, AM.Disp,
AM.SymbolFlags);
else if (AM.CP)
Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32,
AM.Align, AM.Disp, AM.SymbolFlags);
else if (AM.ES) {
assert(!AM.Disp && "Non-zero displacement is ignored with ES.");
Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags);
} else if (AM.JT != -1) {
assert(!AM.Disp && "Non-zero displacement is ignored with JT.");
Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags);
} else if (AM.BlockAddr)
Disp = CurDAG->getTargetBlockAddress(AM.BlockAddr, MVT::i32, AM.Disp,
AM.SymbolFlags);
else
Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i32);
if (AM.Segment.getNode())
Segment = AM.Segment;
else
Segment = CurDAG->getRegister(0, MVT::i32);
}
inline SDValue getI8Imm(unsigned Imm) {
return CurDAG->getTargetConstant(Imm, MVT::i8);
}
inline SDValue getI32Imm(unsigned Imm) {
return CurDAG->getTargetConstant(Imm, MVT::i32);
}
SDNode *getGlobalBaseReg();
const X86TargetMachine &getTargetMachine() const {
return static_cast<const X86TargetMachine &>(TM);
}
const X86InstrInfo *getInstrInfo() const {
return getTargetMachine().getInstrInfo();
}
};
}
bool
X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const {
if (OptLevel == CodeGenOpt::None) return false;
if (!N.hasOneUse())
return false;
if (N.getOpcode() != ISD::LOAD)
return true;
if (U == Root) {
switch (U->getOpcode()) {
default: break;
case X86ISD::ADD:
case X86ISD::SUB:
case X86ISD::AND:
case X86ISD::XOR:
case X86ISD::OR:
case ISD::ADD:
case ISD::ADDC:
case ISD::ADDE:
case ISD::AND:
case ISD::OR:
case ISD::XOR: {
SDValue Op1 = U->getOperand(1);
if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Op1))
if (Imm->getAPIntValue().isSignedIntN(8))
return false;
if (Op1.getOpcode() == X86ISD::Wrapper) {
SDValue Val = Op1.getOperand(0);
if (Val.getOpcode() == ISD::TargetGlobalTLSAddress)
return false;
}
}
}
}
return true;
}
static void MoveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load,
SDValue Call, SDValue OrigChain) {
SmallVector<SDValue, 8> Ops;
SDValue Chain = OrigChain.getOperand(0);
if (Chain.getNode() == Load.getNode())
Ops.push_back(Load.getOperand(0));
else {
assert(Chain.getOpcode() == ISD::TokenFactor &&
"Unexpected chain operand");
for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i)
if (Chain.getOperand(i).getNode() == Load.getNode())
Ops.push_back(Load.getOperand(0));
else
Ops.push_back(Chain.getOperand(i));
SDValue NewChain =
CurDAG->getNode(ISD::TokenFactor, SDLoc(Load),
MVT::Other, &Ops[0], Ops.size());
Ops.clear();
Ops.push_back(NewChain);
}
for (unsigned i = 1, e = OrigChain.getNumOperands(); i != e; ++i)
Ops.push_back(OrigChain.getOperand(i));
CurDAG->UpdateNodeOperands(OrigChain.getNode(), &Ops[0], Ops.size());
CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0),
Load.getOperand(1), Load.getOperand(2));
unsigned NumOps = Call.getNode()->getNumOperands();
Ops.clear();
Ops.push_back(SDValue(Load.getNode(), 1));
for (unsigned i = 1, e = NumOps; i != e; ++i)
Ops.push_back(Call.getOperand(i));
CurDAG->UpdateNodeOperands(Call.getNode(), &Ops[0], NumOps);
}
static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse())
return false;
LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode());
if (!LD ||
LD->isVolatile() ||
LD->getAddressingMode() != ISD::UNINDEXED ||
LD->getExtensionType() != ISD::NON_EXTLOAD)
return false;
while (HasCallSeq && Chain.getOpcode() != ISD::CALLSEQ_START) {
if (!Chain.hasOneUse())
return false;
Chain = Chain.getOperand(0);
}
if (!Chain.getNumOperands())
return false;
if (isa<MemSDNode>(Chain.getNode()) &&
cast<MemSDNode>(Chain.getNode())->writeMem())
return false;
if (Chain.getOperand(0).getNode() == Callee.getNode())
return true;
if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor &&
Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()) &&
Callee.getValue(1).hasOneUse())
return true;
return false;
}
void X86DAGToDAGISel::PreprocessISelDAG() {
OptForSize = MF->getFunction()->getAttributes().
hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
E = CurDAG->allnodes_end(); I != E; ) {
SDNode *N = I++;
if (OptLevel != CodeGenOpt::None &&
((N->getOpcode() == X86ISD::CALL && !Subtarget->callRegIndirect()) ||
(N->getOpcode() == X86ISD::TC_RETURN &&
(Subtarget->is64Bit() ||
getTargetMachine().getRelocationModel() != Reloc::PIC_)))) {
bool HasCallSeq = N->getOpcode() == X86ISD::CALL;
SDValue Chain = N->getOperand(0);
SDValue Load = N->getOperand(1);
if (!isCalleeLoad(Load, Chain, HasCallSeq))
continue;
MoveBelowOrigChain(CurDAG, Load, SDValue(N, 0), Chain);
++NumLoadMoved;
continue;
}
if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND)
continue;
MVT SrcVT = N->getOperand(0).getSimpleValueType();
MVT DstVT = N->getSimpleValueType(0);
if (SrcVT.isVector() || DstVT.isVector())
continue;
const X86TargetLowering *X86Lowering =
static_cast<const X86TargetLowering *>(getTargetLowering());
bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT);
bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT);
if (SrcIsSSE && DstIsSSE)
continue;
if (!SrcIsSSE && !DstIsSSE) {
if (N->getOpcode() == ISD::FP_EXTEND)
continue;
if (N->getConstantOperandVal(1))
continue;
}
MVT MemVT;
if (N->getOpcode() == ISD::FP_ROUND)
MemVT = DstVT; else
MemVT = SrcIsSSE ? SrcVT : DstVT;
SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
SDLoc dl(N);
SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl,
N->getOperand(0),
MemTmp, MachinePointerInfo(), MemVT,
false, false, 0);
SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
MachinePointerInfo(),
MemVT, false, false, 0);
--I;
CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
++I;
CurDAG->DeleteNode(N);
}
}
void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB,
MachineFrameInfo *MFI) {
const TargetInstrInfo *TII = TM.getInstrInfo();
if (Subtarget->isTargetCygMing()) {
unsigned CallOp =
Subtarget->is64Bit() ? X86::CALL64pcrel32 : X86::CALLpcrel32;
BuildMI(BB, DebugLoc(),
TII->get(CallOp)).addExternalSymbol("__main");
}
}
void X86DAGToDAGISel::EmitFunctionEntryCode() {
if (const Function *Fn = MF->getFunction())
if (Fn->hasExternalLinkage() && Fn->getName() == "main")
EmitSpecialCodeForMain(MF->begin(), MF->getFrameInfo());
}
static bool isDispSafeForFrameIndex(int64_t Val) {
return isInt<31>(Val);
}
bool X86DAGToDAGISel::FoldOffsetIntoAddress(uint64_t Offset,
X86ISelAddressMode &AM) {
int64_t Val = AM.Disp + Offset;
CodeModel::Model M = TM.getCodeModel();
if (Subtarget->is64Bit()) {
if (!X86::isOffsetSuitableForCodeModel(Val, M,
AM.hasSymbolicDisplacement()))
return true;
if (AM.BaseType == X86ISelAddressMode::FrameIndexBase &&
!isDispSafeForFrameIndex(Val))
return true;
}
AM.Disp = Val;
return false;
}
bool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
SDValue Address = N->getOperand(1);
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Address))
if (C->getSExtValue() == 0 && AM.Segment.getNode() == 0 &&
Subtarget->isTargetLinux())
switch (N->getPointerInfo().getAddrSpace()) {
case 256:
AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
return false;
case 257:
AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
return false;
}
return true;
}
bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
if (AM.hasSymbolicDisplacement())
return true;
SDValue N0 = N.getOperand(0);
CodeModel::Model M = TM.getCodeModel();
if (Subtarget->is64Bit() && N.getOpcode() == X86ISD::WrapperRIP &&
(M == CodeModel::Small || M == CodeModel::Kernel)) {
if (AM.hasBaseOrIndexReg())
return true;
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
X86ISelAddressMode Backup = AM;
AM.GV = G->getGlobal();
AM.SymbolFlags = G->getTargetFlags();
if (FoldOffsetIntoAddress(G->getOffset(), AM)) {
AM = Backup;
return true;
}
} else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
X86ISelAddressMode Backup = AM;
AM.CP = CP->getConstVal();
AM.Align = CP->getAlignment();
AM.SymbolFlags = CP->getTargetFlags();
if (FoldOffsetIntoAddress(CP->getOffset(), AM)) {
AM = Backup;
return true;
}
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
AM.ES = S->getSymbol();
AM.SymbolFlags = S->getTargetFlags();
} else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
AM.JT = J->getIndex();
AM.SymbolFlags = J->getTargetFlags();
} else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(N0)) {
X86ISelAddressMode Backup = AM;
AM.BlockAddr = BA->getBlockAddress();
AM.SymbolFlags = BA->getTargetFlags();
if (FoldOffsetIntoAddress(BA->getOffset(), AM)) {
AM = Backup;
return true;
}
} else
llvm_unreachable("Unhandled symbol reference node.");
if (N.getOpcode() == X86ISD::WrapperRIP)
AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64));
return false;
}
if (!Subtarget->is64Bit() ||
M == CodeModel::Small || M == CodeModel::Kernel) {
assert(N.getOpcode() != X86ISD::WrapperRIP &&
"RIP-relative addressing already handled");
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
AM.GV = G->getGlobal();
AM.Disp += G->getOffset();
AM.SymbolFlags = G->getTargetFlags();
} else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
AM.CP = CP->getConstVal();
AM.Align = CP->getAlignment();
AM.Disp += CP->getOffset();
AM.SymbolFlags = CP->getTargetFlags();
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
AM.ES = S->getSymbol();
AM.SymbolFlags = S->getTargetFlags();
} else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
AM.JT = J->getIndex();
AM.SymbolFlags = J->getTargetFlags();
} else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(N0)) {
AM.BlockAddr = BA->getBlockAddress();
AM.Disp += BA->getOffset();
AM.SymbolFlags = BA->getTargetFlags();
} else
llvm_unreachable("Unhandled symbol reference node.");
return false;
}
return true;
}
bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) {
if (MatchAddressRecursively(N, AM, 0))
return true;
if (AM.Scale == 2 &&
AM.BaseType == X86ISelAddressMode::RegBase &&
AM.Base_Reg.getNode() == 0) {
AM.Base_Reg = AM.IndexReg;
AM.Scale = 1;
}
if (TM.getCodeModel() == CodeModel::Small &&
Subtarget->is64Bit() &&
AM.Scale == 1 &&
AM.BaseType == X86ISelAddressMode::RegBase &&
AM.Base_Reg.getNode() == 0 &&
AM.IndexReg.getNode() == 0 &&
AM.SymbolFlags == X86II::MO_NO_FLAG &&
AM.hasSymbolicDisplacement())
AM.Base_Reg = CurDAG->getRegister(X86::RIP, MVT::i64);
return false;
}
static void InsertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N) {
if (N.getNode()->getNodeId() == -1 ||
N.getNode()->getNodeId() > Pos.getNode()->getNodeId()) {
DAG.RepositionNode(Pos.getNode(), N.getNode());
N.getNode()->setNodeId(Pos.getNode()->getNodeId());
}
}
static bool FoldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N,
uint64_t Mask,
SDValue Shift, SDValue X,
X86ISelAddressMode &AM) {
if (Shift.getOpcode() != ISD::SRL ||
!isa<ConstantSDNode>(Shift.getOperand(1)) ||
!Shift.hasOneUse())
return true;
int ScaleLog = 8 - Shift.getConstantOperandVal(1);
if (ScaleLog <= 0 || ScaleLog >= 4 ||
Mask != (0xffu << ScaleLog))
return true;
MVT VT = N.getSimpleValueType();
SDLoc DL(N);
SDValue Eight = DAG.getConstant(8, MVT::i8);
SDValue NewMask = DAG.getConstant(0xff, VT);
SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, X, Eight);
SDValue And = DAG.getNode(ISD::AND, DL, VT, Srl, NewMask);
SDValue ShlCount = DAG.getConstant(ScaleLog, MVT::i8);
SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, And, ShlCount);
InsertDAGNode(DAG, N, Eight);
InsertDAGNode(DAG, N, Srl);
InsertDAGNode(DAG, N, NewMask);
InsertDAGNode(DAG, N, And);
InsertDAGNode(DAG, N, ShlCount);
InsertDAGNode(DAG, N, Shl);
DAG.ReplaceAllUsesWith(N, Shl);
AM.IndexReg = And;
AM.Scale = (1 << ScaleLog);
return false;
}
static bool FoldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N,
uint64_t Mask,
SDValue Shift, SDValue X,
X86ISelAddressMode &AM) {
if (Shift.getOpcode() != ISD::SHL ||
!isa<ConstantSDNode>(Shift.getOperand(1)))
return true;
if (!N.hasOneUse() || !Shift.hasOneUse())
return true;
unsigned ShiftAmt = Shift.getConstantOperandVal(1);
if (ShiftAmt != 1 && ShiftAmt != 2 && ShiftAmt != 3)
return true;
MVT VT = N.getSimpleValueType();
SDLoc DL(N);
SDValue NewMask = DAG.getConstant(Mask >> ShiftAmt, VT);
SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, NewMask);
SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, NewAnd, Shift.getOperand(1));
InsertDAGNode(DAG, N, NewMask);
InsertDAGNode(DAG, N, NewAnd);
InsertDAGNode(DAG, N, NewShift);
DAG.ReplaceAllUsesWith(N, NewShift);
AM.Scale = 1 << ShiftAmt;
AM.IndexReg = NewAnd;
return false;
}
static bool FoldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N,
uint64_t Mask,
SDValue Shift, SDValue X,
X86ISelAddressMode &AM) {
if (Shift.getOpcode() != ISD::SRL || !Shift.hasOneUse() ||
!isa<ConstantSDNode>(Shift.getOperand(1)))
return true;
unsigned ShiftAmt = Shift.getConstantOperandVal(1);
unsigned MaskLZ = countLeadingZeros(Mask);
unsigned MaskTZ = countTrailingZeros(Mask);
unsigned AMShiftAmt = MaskTZ;
if (AMShiftAmt <= 0 || AMShiftAmt > 3) return true;
if (CountTrailingOnes_64(Mask >> MaskTZ) + MaskTZ + MaskLZ != 64) return true;
MaskLZ -= (64 - X.getSimpleValueType().getSizeInBits()) + ShiftAmt;
bool ReplacingAnyExtend = false;
if (X.getOpcode() == ISD::ANY_EXTEND) {
unsigned ExtendBits = X.getSimpleValueType().getSizeInBits() -
X.getOperand(0).getSimpleValueType().getSizeInBits();
X = X.getOperand(0);
MaskLZ = ExtendBits > MaskLZ ? 0 : MaskLZ - ExtendBits;
ReplacingAnyExtend = true;
}
APInt MaskedHighBits =
APInt::getHighBitsSet(X.getSimpleValueType().getSizeInBits(), MaskLZ);
APInt KnownZero, KnownOne;
DAG.ComputeMaskedBits(X, KnownZero, KnownOne);
if (MaskedHighBits != KnownZero) return true;
MVT VT = N.getSimpleValueType();
if (ReplacingAnyExtend) {
assert(X.getValueType() != VT);
SDValue NewX = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(X), VT, X);
InsertDAGNode(DAG, N, NewX);
X = NewX;
}
SDLoc DL(N);
SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, MVT::i8);
SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt);
SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, MVT::i8);
SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewSRL, NewSHLAmt);
InsertDAGNode(DAG, N, NewSRLAmt);
InsertDAGNode(DAG, N, NewSRL);
InsertDAGNode(DAG, N, NewSHLAmt);
InsertDAGNode(DAG, N, NewSHL);
DAG.ReplaceAllUsesWith(N, NewSHL);
AM.Scale = 1 << AMShiftAmt;
AM.IndexReg = NewSRL;
return false;
}
bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
unsigned Depth) {
SDLoc dl(N);
DEBUG({
dbgs() << "MatchAddress: ";
AM.dump();
});
if (Depth > 5)
return MatchAddressBase(N, AM);
if (AM.isRIPRelative()) {
if (!AM.ES && AM.JT != -1) return true;
if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N))
if (!FoldOffsetIntoAddress(Cst->getSExtValue(), AM))
return false;
return true;
}
switch (N.getOpcode()) {
default: break;
case ISD::Constant: {
uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
if (!FoldOffsetIntoAddress(Val, AM))
return false;
break;
}
case X86ISD::Wrapper:
case X86ISD::WrapperRIP:
if (!MatchWrapper(N, AM))
return false;
break;
case ISD::LOAD:
if (!MatchLoadInAddress(cast<LoadSDNode>(N), AM))
return false;
break;
case ISD::FrameIndex:
if (AM.BaseType == X86ISelAddressMode::RegBase &&
AM.Base_Reg.getNode() == 0 &&
(!Subtarget->is64Bit() || isDispSafeForFrameIndex(AM.Disp))) {
AM.BaseType = X86ISelAddressMode::FrameIndexBase;
AM.Base_FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
return false;
}
break;
case ISD::SHL:
if (AM.IndexReg.getNode() != 0 || AM.Scale != 1)
break;
if (ConstantSDNode
*CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1))) {
unsigned Val = CN->getZExtValue();
if (Val == 1 || Val == 2 || Val == 3) {
AM.Scale = 1 << Val;
SDValue ShVal = N.getNode()->getOperand(0);
if (CurDAG->isBaseWithConstantOffset(ShVal)) {
AM.IndexReg = ShVal.getNode()->getOperand(0);
ConstantSDNode *AddVal =
cast<ConstantSDNode>(ShVal.getNode()->getOperand(1));
uint64_t Disp = (uint64_t)AddVal->getSExtValue() << Val;
if (!FoldOffsetIntoAddress(Disp, AM))
return false;
}
AM.IndexReg = ShVal;
return false;
}
}
break;
case ISD::SRL: {
if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) break;
SDValue And = N.getOperand(0);
if (And.getOpcode() != ISD::AND) break;
SDValue X = And.getOperand(0);
if (X.getSimpleValueType().getSizeInBits() > 64) break;
if (!isa<ConstantSDNode>(N.getOperand(1)) ||
!isa<ConstantSDNode>(And.getOperand(1)))
break;
uint64_t Mask = And.getConstantOperandVal(1) >> N.getConstantOperandVal(1);
if (!FoldMaskAndShiftToScale(*CurDAG, N, Mask, N, X, AM))
return false;
break;
}
case ISD::SMUL_LOHI:
case ISD::UMUL_LOHI:
if (N.getResNo() != 0) break;
case ISD::MUL:
case X86ISD::MUL_IMM:
if (AM.BaseType == X86ISelAddressMode::RegBase &&
AM.Base_Reg.getNode() == 0 &&
AM.IndexReg.getNode() == 0) {
if (ConstantSDNode
*CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1)))
if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 ||
CN->getZExtValue() == 9) {
AM.Scale = unsigned(CN->getZExtValue())-1;
SDValue MulVal = N.getNode()->getOperand(0);
SDValue Reg;
if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() &&
isa<ConstantSDNode>(MulVal.getNode()->getOperand(1))) {
Reg = MulVal.getNode()->getOperand(0);
ConstantSDNode *AddVal =
cast<ConstantSDNode>(MulVal.getNode()->getOperand(1));
uint64_t Disp = AddVal->getSExtValue() * CN->getZExtValue();
if (FoldOffsetIntoAddress(Disp, AM))
Reg = N.getNode()->getOperand(0);
} else {
Reg = N.getNode()->getOperand(0);
}
AM.IndexReg = AM.Base_Reg = Reg;
return false;
}
}
break;
case ISD::SUB: {
HandleSDNode Handle(N);
X86ISelAddressMode Backup = AM;
if (MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1)) {
AM = Backup;
break;
}
if (AM.IndexReg.getNode() || AM.isRIPRelative()) {
AM = Backup;
break;
}
int Cost = 0;
SDValue RHS = Handle.getValue().getNode()->getOperand(1);
if (!RHS.getNode()->hasOneUse() ||
RHS.getNode()->getOpcode() == ISD::CopyFromReg ||
RHS.getNode()->getOpcode() == ISD::TRUNCATE ||
RHS.getNode()->getOpcode() == ISD::ANY_EXTEND ||
(RHS.getNode()->getOpcode() == ISD::ZERO_EXTEND &&
RHS.getNode()->getOperand(0).getValueType() == MVT::i32))
++Cost;
if ((AM.BaseType == X86ISelAddressMode::RegBase &&
AM.Base_Reg.getNode() &&
!AM.Base_Reg.getNode()->hasOneUse()) ||
AM.BaseType == X86ISelAddressMode::FrameIndexBase)
--Cost;
if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) +
((AM.Disp != 0) && (Backup.Disp == 0)) +
(AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2)
--Cost;
if (Cost >= 0) {
AM = Backup;
break;
}
SDValue Zero = CurDAG->getConstant(0, N.getValueType());
SDValue Neg = CurDAG->getNode(ISD::SUB, dl, N.getValueType(), Zero, RHS);
AM.IndexReg = Neg;
AM.Scale = 1;
InsertDAGNode(*CurDAG, N, Zero);
InsertDAGNode(*CurDAG, N, Neg);
return false;
}
case ISD::ADD: {
HandleSDNode Handle(N);
X86ISelAddressMode Backup = AM;
if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
!MatchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1))
return false;
AM = Backup;
if (!MatchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1)&&
!MatchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth+1))
return false;
AM = Backup;
if (AM.BaseType == X86ISelAddressMode::RegBase &&
!AM.Base_Reg.getNode() &&
!AM.IndexReg.getNode()) {
N = Handle.getValue();
AM.Base_Reg = N.getOperand(0);
AM.IndexReg = N.getOperand(1);
AM.Scale = 1;
return false;
}
N = Handle.getValue();
break;
}
case ISD::OR:
if (CurDAG->isBaseWithConstantOffset(N)) {
X86ISelAddressMode Backup = AM;
ConstantSDNode *CN = cast<ConstantSDNode>(N.getOperand(1));
if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
!FoldOffsetIntoAddress(CN->getSExtValue(), AM))
return false;
AM = Backup;
}
break;
case ISD::AND: {
if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) break;
SDValue Shift = N.getOperand(0);
if (Shift.getOpcode() != ISD::SRL && Shift.getOpcode() != ISD::SHL) break;
SDValue X = Shift.getOperand(0);
if (X.getSimpleValueType().getSizeInBits() > 64) break;
if (!isa<ConstantSDNode>(N.getOperand(1)))
break;
uint64_t Mask = N.getConstantOperandVal(1);
if (!FoldMaskAndShiftToExtract(*CurDAG, N, Mask, Shift, X, AM))
return false;
if (!FoldMaskAndShiftToScale(*CurDAG, N, Mask, Shift, X, AM))
return false;
if (!FoldMaskedShiftToScaledMask(*CurDAG, N, Mask, Shift, X, AM))
return false;
break;
}
}
return MatchAddressBase(N, AM);
}
bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) {
if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) {
if (AM.IndexReg.getNode() == 0) {
AM.IndexReg = N;
AM.Scale = 1;
return false;
}
return true;
}
AM.BaseType = X86ISelAddressMode::RegBase;
AM.Base_Reg = N;
return false;
}
bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index,
SDValue &Disp, SDValue &Segment) {
X86ISelAddressMode AM;
if (Parent &&
Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && Parent->getOpcode() != ISD::INTRINSIC_VOID && Parent->getOpcode() != X86ISD::TLSCALL && Parent->getOpcode() != X86ISD::EH_SJLJ_SETJMP && Parent->getOpcode() != X86ISD::EH_SJLJ_LONGJMP) { unsigned AddrSpace =
cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
if (AddrSpace == 256)
AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
if (AddrSpace == 257)
AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
}
if (MatchAddress(N, AM))
return false;
MVT VT = N.getSimpleValueType();
if (AM.BaseType == X86ISelAddressMode::RegBase) {
if (!AM.Base_Reg.getNode())
AM.Base_Reg = CurDAG->getRegister(0, VT);
}
if (!AM.IndexReg.getNode())
AM.IndexReg = CurDAG->getRegister(0, VT);
getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
return true;
}
bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root,
SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index,
SDValue &Disp, SDValue &Segment,
SDValue &PatternNodeWithChain) {
if (N.getOpcode() == ISD::SCALAR_TO_VECTOR) {
PatternNodeWithChain = N.getOperand(0);
if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) &&
PatternNodeWithChain.hasOneUse() &&
IsProfitableToFold(N.getOperand(0), N.getNode(), Root) &&
IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) {
LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
if (!SelectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
return false;
return true;
}
}
if (N.getOpcode() == X86ISD::VZEXT_MOVL && N.getNode()->hasOneUse() &&
N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
N.getOperand(0).getNode()->hasOneUse() &&
ISD::isNON_EXTLoad(N.getOperand(0).getOperand(0).getNode()) &&
N.getOperand(0).getOperand(0).hasOneUse() &&
IsProfitableToFold(N.getOperand(0), N.getNode(), Root) &&
IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) {
LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(0).getOperand(0));
if (!SelectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
return false;
PatternNodeWithChain = SDValue(LD, 0);
return true;
}
return false;
}
bool X86DAGToDAGISel::SelectMOV64Imm32(SDValue N, SDValue &Imm) {
if (const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
uint64_t ImmVal = CN->getZExtValue();
if ((uint32_t)ImmVal != (uint64_t)ImmVal)
return false;
Imm = CurDAG->getTargetConstant(ImmVal, MVT::i64);
return true;
}
assert(N->getOpcode() == X86ISD::Wrapper &&
"Unexpected node type for MOV32ri64");
N = N.getOperand(0);
if (N->getOpcode() != ISD::TargetConstantPool &&
N->getOpcode() != ISD::TargetJumpTable &&
N->getOpcode() != ISD::TargetGlobalAddress &&
N->getOpcode() != ISD::TargetExternalSymbol &&
N->getOpcode() != ISD::TargetBlockAddress)
return false;
Imm = N;
return TM.getCodeModel() == CodeModel::Small;
}
bool X86DAGToDAGISel::SelectLEA64_32Addr(SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index,
SDValue &Disp, SDValue &Segment) {
if (!SelectLEAAddr(N, Base, Scale, Index, Disp, Segment))
return false;
SDLoc DL(N);
RegisterSDNode *RN = dyn_cast<RegisterSDNode>(Base);
if (RN && RN->getReg() == 0)
Base = CurDAG->getRegister(0, MVT::i64);
else if (Base.getValueType() == MVT::i32 && !dyn_cast<FrameIndexSDNode>(N)) {
Base = SDValue(CurDAG->getMachineNode(
TargetOpcode::SUBREG_TO_REG, DL, MVT::i64,
CurDAG->getTargetConstant(0, MVT::i64),
Base,
CurDAG->getTargetConstant(X86::sub_32bit, MVT::i32)),
0);
}
RN = dyn_cast<RegisterSDNode>(Index);
if (RN && RN->getReg() == 0)
Index = CurDAG->getRegister(0, MVT::i64);
else {
assert(Index.getValueType() == MVT::i32 &&
"Expect to be extending 32-bit registers for use in LEA");
Index = SDValue(CurDAG->getMachineNode(
TargetOpcode::SUBREG_TO_REG, DL, MVT::i64,
CurDAG->getTargetConstant(0, MVT::i64),
Index,
CurDAG->getTargetConstant(X86::sub_32bit, MVT::i32)),
0);
}
return true;
}
bool X86DAGToDAGISel::SelectLEAAddr(SDValue N,
SDValue &Base, SDValue &Scale,
SDValue &Index, SDValue &Disp,
SDValue &Segment) {
X86ISelAddressMode AM;
SDValue Copy = AM.Segment;
SDValue T = CurDAG->getRegister(0, MVT::i32);
AM.Segment = T;
if (MatchAddress(N, AM))
return false;
assert (T == AM.Segment);
AM.Segment = Copy;
MVT VT = N.getSimpleValueType();
unsigned Complexity = 0;
if (AM.BaseType == X86ISelAddressMode::RegBase)
if (AM.Base_Reg.getNode())
Complexity = 1;
else
AM.Base_Reg = CurDAG->getRegister(0, VT);
else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
Complexity = 4;
if (AM.IndexReg.getNode())
Complexity++;
else
AM.IndexReg = CurDAG->getRegister(0, VT);
if (AM.Scale > 1)
Complexity++;
if (AM.hasSymbolicDisplacement()) {
if (Subtarget->is64Bit())
Complexity = 4;
else
Complexity += 2;
}
if (AM.Disp && (AM.Base_Reg.getNode() || AM.IndexReg.getNode()))
Complexity++;
if (Complexity <= 2)
return false;
getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
return true;
}
bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index,
SDValue &Disp, SDValue &Segment) {
assert(N.getOpcode() == ISD::TargetGlobalTLSAddress);
const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
X86ISelAddressMode AM;
AM.GV = GA->getGlobal();
AM.Disp += GA->getOffset();
AM.Base_Reg = CurDAG->getRegister(0, N.getValueType());
AM.SymbolFlags = GA->getTargetFlags();
if (N.getValueType() == MVT::i32) {
AM.Scale = 1;
AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32);
} else {
AM.IndexReg = CurDAG->getRegister(0, MVT::i64);
}
getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
return true;
}
bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N,
SDValue &Base, SDValue &Scale,
SDValue &Index, SDValue &Disp,
SDValue &Segment) {
if (!ISD::isNON_EXTLoad(N.getNode()) ||
!IsProfitableToFold(N, P, P) ||
!IsLegalToFold(N, P, P, OptLevel))
return false;
return SelectAddr(N.getNode(),
N.getOperand(1), Base, Scale, Index, Disp, Segment);
}
SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
return CurDAG->getRegister(GlobalBaseReg,
getTargetLowering()->getPointerTy()).getNode();
}
SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {
SDValue Chain = Node->getOperand(0);
SDValue In1 = Node->getOperand(1);
SDValue In2L = Node->getOperand(2);
SDValue In2H = Node->getOperand(3);
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
if (!SelectAddr(Node, In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
return NULL;
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
const SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, In2L, In2H, Chain};
SDNode *ResNode = CurDAG->getMachineNode(Opc, SDLoc(Node),
MVT::i32, MVT::i32, MVT::Other, Ops);
cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp + 1);
return ResNode;
}
enum AtomicOpc {
ADD,
SUB,
INC,
DEC,
OR,
AND,
XOR,
AtomicOpcEnd
};
enum AtomicSz {
ConstantI8,
I8,
SextConstantI16,
ConstantI16,
I16,
SextConstantI32,
ConstantI32,
I32,
SextConstantI64,
ConstantI64,
I64,
AtomicSzEnd
};
static const uint16_t AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = {
{
X86::LOCK_ADD8mi,
X86::LOCK_ADD8mr,
X86::LOCK_ADD16mi8,
X86::LOCK_ADD16mi,
X86::LOCK_ADD16mr,
X86::LOCK_ADD32mi8,
X86::LOCK_ADD32mi,
X86::LOCK_ADD32mr,
X86::LOCK_ADD64mi8,
X86::LOCK_ADD64mi32,
X86::LOCK_ADD64mr,
},
{
X86::LOCK_SUB8mi,
X86::LOCK_SUB8mr,
X86::LOCK_SUB16mi8,
X86::LOCK_SUB16mi,
X86::LOCK_SUB16mr,
X86::LOCK_SUB32mi8,
X86::LOCK_SUB32mi,
X86::LOCK_SUB32mr,
X86::LOCK_SUB64mi8,
X86::LOCK_SUB64mi32,
X86::LOCK_SUB64mr,
},
{
0,
X86::LOCK_INC8m,
0,
0,
X86::LOCK_INC16m,
0,
0,
X86::LOCK_INC32m,
0,
0,
X86::LOCK_INC64m,
},
{
0,
X86::LOCK_DEC8m,
0,
0,
X86::LOCK_DEC16m,
0,
0,
X86::LOCK_DEC32m,
0,
0,
X86::LOCK_DEC64m,
},
{
X86::LOCK_OR8mi,
X86::LOCK_OR8mr,
X86::LOCK_OR16mi8,
X86::LOCK_OR16mi,
X86::LOCK_OR16mr,
X86::LOCK_OR32mi8,
X86::LOCK_OR32mi,
X86::LOCK_OR32mr,
X86::LOCK_OR64mi8,
X86::LOCK_OR64mi32,
X86::LOCK_OR64mr,
},
{
X86::LOCK_AND8mi,
X86::LOCK_AND8mr,
X86::LOCK_AND16mi8,
X86::LOCK_AND16mi,
X86::LOCK_AND16mr,
X86::LOCK_AND32mi8,
X86::LOCK_AND32mi,
X86::LOCK_AND32mr,
X86::LOCK_AND64mi8,
X86::LOCK_AND64mi32,
X86::LOCK_AND64mr,
},
{
X86::LOCK_XOR8mi,
X86::LOCK_XOR8mr,
X86::LOCK_XOR16mi8,
X86::LOCK_XOR16mi,
X86::LOCK_XOR16mr,
X86::LOCK_XOR32mi8,
X86::LOCK_XOR32mi,
X86::LOCK_XOR32mr,
X86::LOCK_XOR64mi8,
X86::LOCK_XOR64mi32,
X86::LOCK_XOR64mr,
}
};
static SDValue getAtomicLoadArithTargetConstant(SelectionDAG *CurDAG,
SDLoc dl,
enum AtomicOpc &Op, MVT NVT,
SDValue Val) {
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val)) {
int64_t CNVal = CN->getSExtValue();
if ((int32_t)CNVal != CNVal)
return Val;
if (Op == ADD) {
if ((CNVal == 1) || (CNVal == -1)) {
Op = (CNVal == 1) ? INC : DEC;
return SDValue();
}
if (CNVal < 0) {
Op = SUB;
CNVal = -CNVal;
}
}
return CurDAG->getTargetConstant(CNVal, NVT);
}
if (Op == ADD && Val.hasOneUse()) {
if (Val.getOpcode() == ISD::SUB && X86::isZeroNode(Val.getOperand(0))) {
Op = SUB;
return Val.getOperand(1);
}
if (Val.getOpcode() == ISD::TRUNCATE && NVT == MVT::i16 &&
Val.getOperand(0).getOpcode() == ISD::SUB &&
X86::isZeroNode(Val.getOperand(0).getOperand(0))) {
Op = SUB;
Val = Val.getOperand(0);
return CurDAG->getTargetExtractSubreg(X86::sub_16bit, dl, NVT,
Val.getOperand(1));
}
}
return Val;
}
SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, MVT NVT) {
if (Node->hasAnyUseOfValue(0))
return 0;
SDLoc dl(Node);
SDValue Chain = Node->getOperand(0);
SDValue Ptr = Node->getOperand(1);
SDValue Val = Node->getOperand(2);
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
if (!SelectAddr(Node, Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
return 0;
enum AtomicOpc Op;
switch (Node->getOpcode()) {
default:
return 0;
case ISD::ATOMIC_LOAD_OR:
Op = OR;
break;
case ISD::ATOMIC_LOAD_AND:
Op = AND;
break;
case ISD::ATOMIC_LOAD_XOR:
Op = XOR;
break;
case ISD::ATOMIC_LOAD_ADD:
Op = ADD;
break;
}
Val = getAtomicLoadArithTargetConstant(CurDAG, dl, Op, NVT, Val);
bool isUnOp = !Val.getNode();
bool isCN = Val.getNode() && (Val.getOpcode() == ISD::TargetConstant);
unsigned Opc = 0;
switch (NVT.SimpleTy) {
default: return 0;
case MVT::i8:
if (isCN)
Opc = AtomicOpcTbl[Op][ConstantI8];
else
Opc = AtomicOpcTbl[Op][I8];
break;
case MVT::i16:
if (isCN) {
if (immSext8(Val.getNode()))
Opc = AtomicOpcTbl[Op][SextConstantI16];
else
Opc = AtomicOpcTbl[Op][ConstantI16];
} else
Opc = AtomicOpcTbl[Op][I16];
break;
case MVT::i32:
if (isCN) {
if (immSext8(Val.getNode()))
Opc = AtomicOpcTbl[Op][SextConstantI32];
else
Opc = AtomicOpcTbl[Op][ConstantI32];
} else
Opc = AtomicOpcTbl[Op][I32];
break;
case MVT::i64:
Opc = AtomicOpcTbl[Op][I64];
if (isCN) {
if (immSext8(Val.getNode()))
Opc = AtomicOpcTbl[Op][SextConstantI64];
else if (i64immSExt32(Val.getNode()))
Opc = AtomicOpcTbl[Op][ConstantI64];
}
break;
}
assert(Opc != 0 && "Invalid arith lock transform!");
SDValue Ret;
SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
dl, NVT), 0);
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
if (isUnOp) {
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain };
Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops), 0);
} else {
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, Chain };
Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops), 0);
}
cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1);
SDValue RetVals[] = { Undef, Ret };
return CurDAG->getMergeValues(RetVals, 2, dl).getNode();
}
static bool HasNoSignedComparisonUses(SDNode *N) {
for (SDNode::use_iterator UI = N->use_begin(),
UE = N->use_end(); UI != UE; ++UI) {
if (UI->getOpcode() != ISD::CopyToReg)
return false;
if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() !=
X86::EFLAGS)
return false;
for (SDNode::use_iterator FlagUI = UI->use_begin(),
FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) {
if (FlagUI.getUse().getResNo() != 1) continue;
if (!FlagUI->isMachineOpcode()) return false;
switch (FlagUI->getMachineOpcode()) {
case X86::SETAr: case X86::SETAEr: case X86::SETBr: case X86::SETBEr:
case X86::SETEr: case X86::SETNEr: case X86::SETPr: case X86::SETNPr:
case X86::SETAm: case X86::SETAEm: case X86::SETBm: case X86::SETBEm:
case X86::SETEm: case X86::SETNEm: case X86::SETPm: case X86::SETNPm:
case X86::JA_4: case X86::JAE_4: case X86::JB_4: case X86::JBE_4:
case X86::JE_4: case X86::JNE_4: case X86::JP_4: case X86::JNP_4:
case X86::CMOVA16rr: case X86::CMOVA16rm:
case X86::CMOVA32rr: case X86::CMOVA32rm:
case X86::CMOVA64rr: case X86::CMOVA64rm:
case X86::CMOVAE16rr: case X86::CMOVAE16rm:
case X86::CMOVAE32rr: case X86::CMOVAE32rm:
case X86::CMOVAE64rr: case X86::CMOVAE64rm:
case X86::CMOVB16rr: case X86::CMOVB16rm:
case X86::CMOVB32rr: case X86::CMOVB32rm:
case X86::CMOVB64rr: case X86::CMOVB64rm:
case X86::CMOVBE16rr: case X86::CMOVBE16rm:
case X86::CMOVBE32rr: case X86::CMOVBE32rm:
case X86::CMOVBE64rr: case X86::CMOVBE64rm:
case X86::CMOVE16rr: case X86::CMOVE16rm:
case X86::CMOVE32rr: case X86::CMOVE32rm:
case X86::CMOVE64rr: case X86::CMOVE64rm:
case X86::CMOVNE16rr: case X86::CMOVNE16rm:
case X86::CMOVNE32rr: case X86::CMOVNE32rm:
case X86::CMOVNE64rr: case X86::CMOVNE64rm:
case X86::CMOVNP16rr: case X86::CMOVNP16rm:
case X86::CMOVNP32rr: case X86::CMOVNP32rm:
case X86::CMOVNP64rr: case X86::CMOVNP64rm:
case X86::CMOVP16rr: case X86::CMOVP16rm:
case X86::CMOVP32rr: case X86::CMOVP32rm:
case X86::CMOVP64rr: case X86::CMOVP64rm:
continue;
default: return false;
}
}
}
return true;
}
static bool isLoadIncOrDecStore(StoreSDNode *StoreNode, unsigned Opc,
SDValue StoredVal, SelectionDAG *CurDAG,
LoadSDNode* &LoadNode, SDValue &InputChain) {
if (!(Opc == X86ISD::DEC || Opc == X86ISD::INC)) return false;
if (StoredVal.getResNo() != 0) return false;
if (!StoredVal.getNode()->hasNUsesOfValue(1, 0)) return false;
if (!ISD::isNormalStore(StoreNode) || StoreNode->isNonTemporal())
return false;
SDValue Load = StoredVal->getOperand(0);
if (!ISD::isNormalLoad(Load.getNode())) return false;
LoadNode = cast<LoadSDNode>(Load);
EVT LdVT = LoadNode->getMemoryVT();
if (LdVT != MVT::i64 && LdVT != MVT::i32 && LdVT != MVT::i16 &&
LdVT != MVT::i8)
return false;
if (!Load.hasOneUse())
return false;
if (LoadNode->getBasePtr() != StoreNode->getBasePtr() ||
LoadNode->getOffset() != StoreNode->getOffset())
return false;
SDValue Chain = StoreNode->getChain();
bool ChainCheck = false;
if (Chain == Load.getValue(1)) {
ChainCheck = true;
InputChain = LoadNode->getChain();
} else if (Chain.getOpcode() == ISD::TokenFactor) {
SmallVector<SDValue, 4> ChainOps;
for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) {
SDValue Op = Chain.getOperand(i);
if (Op == Load.getValue(1)) {
ChainCheck = true;
continue;
}
int LoadId = LoadNode->getNodeId();
for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
UE = UI->use_end(); UI != UE; ++UI) {
if (UI.getUse().getResNo() != 0)
continue;
if (UI->getNodeId() > LoadId)
return false;
}
ChainOps.push_back(Op);
}
if (ChainCheck)
InputChain = CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain),
MVT::Other, &ChainOps[0], ChainOps.size());
}
if (!ChainCheck)
return false;
return true;
}
static unsigned getFusedLdStOpcode(EVT &LdVT, unsigned Opc) {
if (Opc == X86ISD::DEC) {
if (LdVT == MVT::i64) return X86::DEC64m;
if (LdVT == MVT::i32) return X86::DEC32m;
if (LdVT == MVT::i16) return X86::DEC16m;
if (LdVT == MVT::i8) return X86::DEC8m;
} else {
assert(Opc == X86ISD::INC && "unrecognized opcode");
if (LdVT == MVT::i64) return X86::INC64m;
if (LdVT == MVT::i32) return X86::INC32m;
if (LdVT == MVT::i16) return X86::INC16m;
if (LdVT == MVT::i8) return X86::INC8m;
}
llvm_unreachable("unrecognized size for LdVT");
}
SDNode *X86DAGToDAGISel::SelectGather(SDNode *Node, unsigned Opc) {
SDValue Chain = Node->getOperand(0);
SDValue VSrc = Node->getOperand(2);
SDValue Base = Node->getOperand(3);
SDValue VIdx = Node->getOperand(4);
SDValue VMask = Node->getOperand(5);
ConstantSDNode *Scale = dyn_cast<ConstantSDNode>(Node->getOperand(6));
if (!Scale)
return 0;
SDVTList VTs = CurDAG->getVTList(VSrc.getValueType(), VSrc.getValueType(),
MVT::Other);
SDValue Disp = CurDAG->getTargetConstant(0, MVT::i32);
SDValue Segment = CurDAG->getRegister(0, MVT::i32);
const SDValue Ops[] = { VSrc, Base, getI8Imm(Scale->getSExtValue()), VIdx,
Disp, Segment, VMask, Chain};
SDNode *ResNode = CurDAG->getMachineNode(Opc, SDLoc(Node), VTs, Ops);
ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0));
ReplaceUses(SDValue(Node, 1), SDValue(ResNode, 2));
return ResNode;
}
SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
MVT NVT = Node->getSimpleValueType(0);
unsigned Opc, MOpc;
unsigned Opcode = Node->getOpcode();
SDLoc dl(Node);
DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << '\n');
if (Node->isMachineOpcode()) {
DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << '\n');
Node->setNodeId(-1);
return NULL; }
switch (Opcode) {
default: break;
case ISD::INTRINSIC_W_CHAIN: {
unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
switch (IntNo) {
default: break;
case Intrinsic::x86_avx2_gather_d_pd:
case Intrinsic::x86_avx2_gather_d_pd_256:
case Intrinsic::x86_avx2_gather_q_pd:
case Intrinsic::x86_avx2_gather_q_pd_256:
case Intrinsic::x86_avx2_gather_d_ps:
case Intrinsic::x86_avx2_gather_d_ps_256:
case Intrinsic::x86_avx2_gather_q_ps:
case Intrinsic::x86_avx2_gather_q_ps_256:
case Intrinsic::x86_avx2_gather_d_q:
case Intrinsic::x86_avx2_gather_d_q_256:
case Intrinsic::x86_avx2_gather_q_q:
case Intrinsic::x86_avx2_gather_q_q_256:
case Intrinsic::x86_avx2_gather_d_d:
case Intrinsic::x86_avx2_gather_d_d_256:
case Intrinsic::x86_avx2_gather_q_d:
case Intrinsic::x86_avx2_gather_q_d_256: {
if (!Subtarget->hasAVX2())
break;
unsigned Opc;
switch (IntNo) {
default: llvm_unreachable("Impossible intrinsic");
case Intrinsic::x86_avx2_gather_d_pd: Opc = X86::VGATHERDPDrm; break;
case Intrinsic::x86_avx2_gather_d_pd_256: Opc = X86::VGATHERDPDYrm; break;
case Intrinsic::x86_avx2_gather_q_pd: Opc = X86::VGATHERQPDrm; break;
case Intrinsic::x86_avx2_gather_q_pd_256: Opc = X86::VGATHERQPDYrm; break;
case Intrinsic::x86_avx2_gather_d_ps: Opc = X86::VGATHERDPSrm; break;
case Intrinsic::x86_avx2_gather_d_ps_256: Opc = X86::VGATHERDPSYrm; break;
case Intrinsic::x86_avx2_gather_q_ps: Opc = X86::VGATHERQPSrm; break;
case Intrinsic::x86_avx2_gather_q_ps_256: Opc = X86::VGATHERQPSYrm; break;
case Intrinsic::x86_avx2_gather_d_q: Opc = X86::VPGATHERDQrm; break;
case Intrinsic::x86_avx2_gather_d_q_256: Opc = X86::VPGATHERDQYrm; break;
case Intrinsic::x86_avx2_gather_q_q: Opc = X86::VPGATHERQQrm; break;
case Intrinsic::x86_avx2_gather_q_q_256: Opc = X86::VPGATHERQQYrm; break;
case Intrinsic::x86_avx2_gather_d_d: Opc = X86::VPGATHERDDrm; break;
case Intrinsic::x86_avx2_gather_d_d_256: Opc = X86::VPGATHERDDYrm; break;
case Intrinsic::x86_avx2_gather_q_d: Opc = X86::VPGATHERQDrm; break;
case Intrinsic::x86_avx2_gather_q_d_256: Opc = X86::VPGATHERQDYrm; break;
}
SDNode *RetVal = SelectGather(Node, Opc);
if (RetVal)
return NULL;
break;
}
}
break;
}
case X86ISD::GlobalBaseReg:
return getGlobalBaseReg();
case X86ISD::ATOMOR64_DAG:
case X86ISD::ATOMXOR64_DAG:
case X86ISD::ATOMADD64_DAG:
case X86ISD::ATOMSUB64_DAG:
case X86ISD::ATOMNAND64_DAG:
case X86ISD::ATOMAND64_DAG:
case X86ISD::ATOMMAX64_DAG:
case X86ISD::ATOMMIN64_DAG:
case X86ISD::ATOMUMAX64_DAG:
case X86ISD::ATOMUMIN64_DAG:
case X86ISD::ATOMSWAP64_DAG: {
unsigned Opc;
switch (Opcode) {
default: llvm_unreachable("Impossible opcode");
case X86ISD::ATOMOR64_DAG: Opc = X86::ATOMOR6432; break;
case X86ISD::ATOMXOR64_DAG: Opc = X86::ATOMXOR6432; break;
case X86ISD::ATOMADD64_DAG: Opc = X86::ATOMADD6432; break;
case X86ISD::ATOMSUB64_DAG: Opc = X86::ATOMSUB6432; break;
case X86ISD::ATOMNAND64_DAG: Opc = X86::ATOMNAND6432; break;
case X86ISD::ATOMAND64_DAG: Opc = X86::ATOMAND6432; break;
case X86ISD::ATOMMAX64_DAG: Opc = X86::ATOMMAX6432; break;
case X86ISD::ATOMMIN64_DAG: Opc = X86::ATOMMIN6432; break;
case X86ISD::ATOMUMAX64_DAG: Opc = X86::ATOMUMAX6432; break;
case X86ISD::ATOMUMIN64_DAG: Opc = X86::ATOMUMIN6432; break;
case X86ISD::ATOMSWAP64_DAG: Opc = X86::ATOMSWAP6432; break;
}
SDNode *RetVal = SelectAtomic64(Node, Opc);
if (RetVal)
return RetVal;
break;
}
case ISD::ATOMIC_LOAD_XOR:
case ISD::ATOMIC_LOAD_AND:
case ISD::ATOMIC_LOAD_OR:
case ISD::ATOMIC_LOAD_ADD: {
SDNode *RetVal = SelectAtomicLoadArith(Node, NVT);
if (RetVal)
return RetVal;
break;
}
case ISD::AND:
case ISD::OR:
case ISD::XOR: {
SDValue N0 = Node->getOperand(0);
SDValue N1 = Node->getOperand(1);
if (N0->getOpcode() != ISD::SHL || !N0->hasOneUse())
break;
if (NVT != MVT::i32 && NVT != MVT::i64)
break;
ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1);
ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(N0->getOperand(1));
if (!Cst || !ShlCst)
break;
int64_t Val = Cst->getSExtValue();
uint64_t ShlVal = ShlCst->getZExtValue();
uint64_t RemovedBitsMask = (1ULL << ShlVal) - 1;
if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
break;
unsigned ShlOp, Op;
MVT CstVT = NVT;
if (!isInt<8>(Val) && isInt<8>(Val >> ShlVal))
CstVT = MVT::i8;
else if (!isInt<32>(Val) && isInt<32>(Val >> ShlVal))
CstVT = MVT::i32;
if (NVT == CstVT)
break;
switch (NVT.SimpleTy) {
default: llvm_unreachable("Unsupported VT!");
case MVT::i32:
assert(CstVT == MVT::i8);
ShlOp = X86::SHL32ri;
switch (Opcode) {
default: llvm_unreachable("Impossible opcode");
case ISD::AND: Op = X86::AND32ri8; break;
case ISD::OR: Op = X86::OR32ri8; break;
case ISD::XOR: Op = X86::XOR32ri8; break;
}
break;
case MVT::i64:
assert(CstVT == MVT::i8 || CstVT == MVT::i32);
ShlOp = X86::SHL64ri;
switch (Opcode) {
default: llvm_unreachable("Impossible opcode");
case ISD::AND: Op = CstVT==MVT::i8? X86::AND64ri8 : X86::AND64ri32; break;
case ISD::OR: Op = CstVT==MVT::i8? X86::OR64ri8 : X86::OR64ri32; break;
case ISD::XOR: Op = CstVT==MVT::i8? X86::XOR64ri8 : X86::XOR64ri32; break;
}
break;
}
SDValue NewCst = CurDAG->getTargetConstant(Val >> ShlVal, CstVT);
SDNode *New = CurDAG->getMachineNode(Op, dl, NVT, N0->getOperand(0),NewCst);
return CurDAG->SelectNodeTo(Node, ShlOp, NVT, SDValue(New, 0),
getI8Imm(ShlVal));
}
case X86ISD::UMUL: {
SDValue N0 = Node->getOperand(0);
SDValue N1 = Node->getOperand(1);
unsigned LoReg;
switch (NVT.SimpleTy) {
default: llvm_unreachable("Unsupported VT!");
case MVT::i8: LoReg = X86::AL; Opc = X86::MUL8r; break;
case MVT::i16: LoReg = X86::AX; Opc = X86::MUL16r; break;
case MVT::i32: LoReg = X86::EAX; Opc = X86::MUL32r; break;
case MVT::i64: LoReg = X86::RAX; Opc = X86::MUL64r; break;
}
SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
N0, SDValue()).getValue(1);
SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
SDValue Ops[] = {N1, InFlag};
SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
ReplaceUses(SDValue(Node, 1), SDValue(CNode, 1));
ReplaceUses(SDValue(Node, 2), SDValue(CNode, 2));
return NULL;
}
case ISD::SMUL_LOHI:
case ISD::UMUL_LOHI: {
SDValue N0 = Node->getOperand(0);
SDValue N1 = Node->getOperand(1);
bool isSigned = Opcode == ISD::SMUL_LOHI;
bool hasBMI2 = Subtarget->hasBMI2();
if (!isSigned) {
switch (NVT.SimpleTy) {
default: llvm_unreachable("Unsupported VT!");
case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break;
case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break;
case MVT::i32: Opc = hasBMI2 ? X86::MULX32rr : X86::MUL32r;
MOpc = hasBMI2 ? X86::MULX32rm : X86::MUL32m; break;
case MVT::i64: Opc = hasBMI2 ? X86::MULX64rr : X86::MUL64r;
MOpc = hasBMI2 ? X86::MULX64rm : X86::MUL64m; break;
}
} else {
switch (NVT.SimpleTy) {
default: llvm_unreachable("Unsupported VT!");
case MVT::i8: Opc = X86::IMUL8r; MOpc = X86::IMUL8m; break;
case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break;
case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break;
case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break;
}
}
unsigned SrcReg, LoReg, HiReg;
switch (Opc) {
default: llvm_unreachable("Unknown MUL opcode!");
case X86::IMUL8r:
case X86::MUL8r:
SrcReg = LoReg = X86::AL; HiReg = X86::AH;
break;
case X86::IMUL16r:
case X86::MUL16r:
SrcReg = LoReg = X86::AX; HiReg = X86::DX;
break;
case X86::IMUL32r:
case X86::MUL32r:
SrcReg = LoReg = X86::EAX; HiReg = X86::EDX;
break;
case X86::IMUL64r:
case X86::MUL64r:
SrcReg = LoReg = X86::RAX; HiReg = X86::RDX;
break;
case X86::MULX32rr:
SrcReg = X86::EDX; LoReg = HiReg = 0;
break;
case X86::MULX64rr:
SrcReg = X86::RDX; LoReg = HiReg = 0;
break;
}
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
if (!foldedLoad) {
foldedLoad = TryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
if (foldedLoad)
std::swap(N0, N1);
}
SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, SrcReg,
N0, SDValue()).getValue(1);
SDValue ResHi, ResLo;
if (foldedLoad) {
SDValue Chain;
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
InFlag };
if (MOpc == X86::MULX32rm || MOpc == X86::MULX64rm) {
SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Other, MVT::Glue);
SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
ResHi = SDValue(CNode, 0);
ResLo = SDValue(CNode, 1);
Chain = SDValue(CNode, 2);
InFlag = SDValue(CNode, 3);
} else {
SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
Chain = SDValue(CNode, 0);
InFlag = SDValue(CNode, 1);
}
ReplaceUses(N1.getValue(1), Chain);
} else {
SDValue Ops[] = { N1, InFlag };
if (Opc == X86::MULX32rr || Opc == X86::MULX64rr) {
SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Glue);
SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
ResHi = SDValue(CNode, 0);
ResLo = SDValue(CNode, 1);
InFlag = SDValue(CNode, 2);
} else {
SDVTList VTs = CurDAG->getVTList(MVT::Glue);
SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
InFlag = SDValue(CNode, 0);
}
}
if (HiReg == X86::AH && Subtarget->is64Bit() &&
!SDValue(Node, 1).use_empty()) {
SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
X86::AX, MVT::i16, InFlag);
InFlag = Result.getValue(2);
if (!SDValue(Node, 0).use_empty())
ReplaceUses(SDValue(Node, 1),
CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16,
Result,
CurDAG->getTargetConstant(8, MVT::i8)), 0);
ReplaceUses(SDValue(Node, 1),
CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
}
if (!SDValue(Node, 0).use_empty()) {
if (ResLo.getNode() == 0) {
assert(LoReg && "Register for low half is not defined!");
ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg, NVT,
InFlag);
InFlag = ResLo.getValue(2);
}
ReplaceUses(SDValue(Node, 0), ResLo);
DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG); dbgs() << '\n');
}
if (!SDValue(Node, 1).use_empty()) {
if (ResHi.getNode() == 0) {
assert(HiReg && "Register for high half is not defined!");
ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg, NVT,
InFlag);
InFlag = ResHi.getValue(2);
}
ReplaceUses(SDValue(Node, 1), ResHi);
DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG); dbgs() << '\n');
}
return NULL;
}
case ISD::SDIVREM:
case ISD::UDIVREM: {
SDValue N0 = Node->getOperand(0);
SDValue N1 = Node->getOperand(1);
bool isSigned = Opcode == ISD::SDIVREM;
if (!isSigned) {
switch (NVT.SimpleTy) {
default: llvm_unreachable("Unsupported VT!");
case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break;
case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break;
case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break;
case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break;
}
} else {
switch (NVT.SimpleTy) {
default: llvm_unreachable("Unsupported VT!");
case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break;
case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break;
}
}
unsigned LoReg, HiReg, ClrReg;
unsigned SExtOpcode;
switch (NVT.SimpleTy) {
default: llvm_unreachable("Unsupported VT!");
case MVT::i8:
LoReg = X86::AL; ClrReg = HiReg = X86::AH;
SExtOpcode = X86::CBW;
break;
case MVT::i16:
LoReg = X86::AX; HiReg = X86::DX;
ClrReg = X86::DX;
SExtOpcode = X86::CWD;
break;
case MVT::i32:
LoReg = X86::EAX; ClrReg = HiReg = X86::EDX;
SExtOpcode = X86::CDQ;
break;
case MVT::i64:
LoReg = X86::RAX; ClrReg = HiReg = X86::RDX;
SExtOpcode = X86::CQO;
break;
}
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
bool signBitIsZero = CurDAG->SignBitIsZero(N0);
SDValue InFlag;
if (NVT == MVT::i8 && (!isSigned || signBitIsZero)) {
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Move, Chain;
if (TryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
Move =
SDValue(CurDAG->getMachineNode(X86::MOVZX32rm8, dl, MVT::i32,
MVT::Other, Ops), 0);
Chain = Move.getValue(1);
ReplaceUses(N0.getValue(1), Chain);
} else {
Move =
SDValue(CurDAG->getMachineNode(X86::MOVZX32rr8, dl, MVT::i32, N0),0);
Chain = CurDAG->getEntryNode();
}
Chain = CurDAG->getCopyToReg(Chain, dl, X86::EAX, Move, SDValue());
InFlag = Chain.getValue(1);
} else {
InFlag =
CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl,
LoReg, N0, SDValue()).getValue(1);
if (isSigned && !signBitIsZero) {
InFlag =
SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InFlag),0);
} else {
SDValue ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, NVT), 0);
switch (NVT.SimpleTy) {
case MVT::i16:
ClrNode =
SDValue(CurDAG->getMachineNode(
TargetOpcode::EXTRACT_SUBREG, dl, MVT::i16, ClrNode,
CurDAG->getTargetConstant(X86::sub_16bit, MVT::i32)),
0);
break;
case MVT::i32:
break;
case MVT::i64:
ClrNode =
SDValue(CurDAG->getMachineNode(
TargetOpcode::SUBREG_TO_REG, dl, MVT::i64,
CurDAG->getTargetConstant(0, MVT::i64), ClrNode,
CurDAG->getTargetConstant(X86::sub_32bit, MVT::i32)),
0);
break;
default:
llvm_unreachable("Unexpected division source");
}
InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg,
ClrNode, InFlag).getValue(1);
}
}
if (foldedLoad) {
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
InFlag };
SDNode *CNode =
CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops);
InFlag = SDValue(CNode, 1);
ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
} else {
InFlag =
SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag), 0);
}
if (HiReg == X86::AH && Subtarget->is64Bit() &&
!SDValue(Node, 1).use_empty()) {
SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
X86::AX, MVT::i16, InFlag);
InFlag = Result.getValue(2);
if (!SDValue(Node, 0).use_empty())
ReplaceUses(SDValue(Node, 0),
CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16,
Result,
CurDAG->getTargetConstant(8, MVT::i8)),
0);
ReplaceUses(SDValue(Node, 1),
CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
}
if (!SDValue(Node, 0).use_empty()) {
SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
LoReg, NVT, InFlag);
InFlag = Result.getValue(2);
ReplaceUses(SDValue(Node, 0), Result);
DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
}
if (!SDValue(Node, 1).use_empty()) {
SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
HiReg, NVT, InFlag);
InFlag = Result.getValue(2);
ReplaceUses(SDValue(Node, 1), Result);
DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
}
return NULL;
}
case X86ISD::CMP:
case X86ISD::SUB: {
if (Opcode == X86ISD::SUB && Node->hasAnyUseOfValue(0))
break;
SDValue N0 = Node->getOperand(0);
SDValue N1 = Node->getOperand(1);
if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
HasNoSignedComparisonUses(Node))
N0 = N0.getOperand(0);
if ((N0.getNode()->getOpcode() == ISD::AND ||
(N0.getResNo() == 0 && N0.getNode()->getOpcode() == X86ISD::AND)) &&
N0.getNode()->hasOneUse() &&
N0.getValueType() != MVT::i8 &&
X86::isZeroNode(N1)) {
ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getNode()->getOperand(1));
if (!C) break;
if ((C->getZExtValue() & ~UINT64_C(0xff)) == 0 &&
(!(C->getZExtValue() & 0x80) ||
HasNoSignedComparisonUses(Node))) {
SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i8);
SDValue Reg = N0.getNode()->getOperand(0);
if (!Subtarget->is64Bit()) {
const TargetRegisterClass *TRC;
switch (N0.getSimpleValueType().SimpleTy) {
case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break;
case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break;
default: llvm_unreachable("Unsupported TEST operand type!");
}
SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32);
Reg = SDValue(CurDAG->getMachineNode(X86::COPY_TO_REGCLASS, dl,
Reg.getValueType(), Reg, RC), 0);
}
SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl,
MVT::i8, Reg);
SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32,
Subreg, Imm);
ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
SDValue(NewNode, 0));
return NULL;
}
if ((C->getZExtValue() & ~UINT64_C(0xff00)) == 0 &&
(!(C->getZExtValue() & 0x8000) ||
HasNoSignedComparisonUses(Node))) {
SDValue ShiftedImm = CurDAG->getTargetConstant(C->getZExtValue() >> 8,
MVT::i8);
SDValue Reg = N0.getNode()->getOperand(0);
const TargetRegisterClass *TRC;
switch (N0.getSimpleValueType().SimpleTy) {
case MVT::i64: TRC = &X86::GR64_ABCDRegClass; break;
case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break;
case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break;
default: llvm_unreachable("Unsupported TEST operand type!");
}
SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32);
Reg = SDValue(CurDAG->getMachineNode(X86::COPY_TO_REGCLASS, dl,
Reg.getValueType(), Reg, RC), 0);
SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit_hi, dl,
MVT::i8, Reg);
SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri_NOREX, dl,
MVT::i32, Subreg, ShiftedImm);
ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
SDValue(NewNode, 0));
return NULL;
}
if ((C->getZExtValue() & ~UINT64_C(0xffff)) == 0 &&
N0.getValueType() != MVT::i16 &&
(!(C->getZExtValue() & 0x8000) ||
HasNoSignedComparisonUses(Node))) {
SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i16);
SDValue Reg = N0.getNode()->getOperand(0);
SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_16bit, dl,
MVT::i16, Reg);
SDNode *NewNode = CurDAG->getMachineNode(X86::TEST16ri, dl, MVT::i32,
Subreg, Imm);
ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
SDValue(NewNode, 0));
return NULL;
}
if ((C->getZExtValue() & ~UINT64_C(0xffffffff)) == 0 &&
N0.getValueType() == MVT::i64 &&
(!(C->getZExtValue() & 0x80000000) ||
HasNoSignedComparisonUses(Node))) {
SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32);
SDValue Reg = N0.getNode()->getOperand(0);
SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_32bit, dl,
MVT::i32, Reg);
SDNode *NewNode = CurDAG->getMachineNode(X86::TEST32ri, dl, MVT::i32,
Subreg, Imm);
ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
SDValue(NewNode, 0));
return NULL;
}
}
break;
}
case ISD::STORE: {
StoreSDNode *StoreNode = cast<StoreSDNode>(Node);
SDValue StoredVal = StoreNode->getOperand(1);
unsigned Opc = StoredVal->getOpcode();
LoadSDNode *LoadNode = 0;
SDValue InputChain;
if (!isLoadIncOrDecStore(StoreNode, Opc, StoredVal, CurDAG,
LoadNode, InputChain))
break;
SDValue Base, Scale, Index, Disp, Segment;
if (!SelectAddr(LoadNode, LoadNode->getBasePtr(),
Base, Scale, Index, Disp, Segment))
break;
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(2);
MemOp[0] = StoreNode->getMemOperand();
MemOp[1] = LoadNode->getMemOperand();
const SDValue Ops[] = { Base, Scale, Index, Disp, Segment, InputChain };
EVT LdVT = LoadNode->getMemoryVT();
unsigned newOpc = getFusedLdStOpcode(LdVT, Opc);
MachineSDNode *Result = CurDAG->getMachineNode(newOpc,
SDLoc(Node),
MVT::i32, MVT::Other, Ops);
Result->setMemRefs(MemOp, MemOp + 2);
ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1));
ReplaceUses(SDValue(StoredVal.getNode(), 1), SDValue(Result, 0));
return Result;
}
}
SDNode *ResNode = SelectCode(Node);
DEBUG(dbgs() << "=> ";
if (ResNode == NULL || ResNode == Node)
Node->dump(CurDAG);
else
ResNode->dump(CurDAG);
dbgs() << '\n');
return ResNode;
}
bool X86DAGToDAGISel::
SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
std::vector<SDValue> &OutOps) {
SDValue Op0, Op1, Op2, Op3, Op4;
switch (ConstraintCode) {
case 'o': case 'v': default: return true;
case 'm': if (!SelectAddr(0, Op, Op0, Op1, Op2, Op3, Op4))
return true;
break;
}
OutOps.push_back(Op0);
OutOps.push_back(Op1);
OutOps.push_back(Op2);
OutOps.push_back(Op3);
OutOps.push_back(Op4);
return false;
}
FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM,
CodeGenOpt::Level OptLevel) {
return new X86DAGToDAGISel(TM, OptLevel);
}