ARM64FrameLowering.cpp [plain text]
#define DEBUG_TYPE "frame-info"
#include "ARM64FrameLowering.h"
#include "ARM64InstrInfo.h"
#include "ARM64MachineFunctionInfo.h"
#include "ARM64Subtarget.h"
#include "ARM64TargetMachine.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
static cl::opt<bool>
EnableRedZone("arm64-redzone",
cl::desc("enable use of redzone on ARM64"),
cl::init(false), cl::Hidden);
STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
static unsigned estimateStackSize(MachineFunction &MF) {
const MachineFrameInfo *FFI = MF.getFrameInfo();
int Offset = 0;
for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) {
int FixedOff = -FFI->getObjectOffset(i);
if (FixedOff > Offset) Offset = FixedOff;
}
for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) {
if (FFI->isDeadObjectIndex(i))
continue;
Offset += FFI->getObjectSize(i);
unsigned Align = FFI->getObjectAlignment(i);
Offset = (Offset+Align-1)/Align*Align;
}
return (unsigned)Offset;
}
bool ARM64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
if (!EnableRedZone)
return false;
if (MF.getFunction()->getAttributes().
hasAttribute(AttributeSet::FunctionIndex, Attribute::NoRedZone))
return false;
const MachineFrameInfo *MFI = MF.getFrameInfo();
const ARM64FunctionInfo *AFI = MF.getInfo<ARM64FunctionInfo>();
unsigned NumBytes = AFI->getLocalStackSize();
if (MFI->hasCalls() || hasFP(MF) || NumBytes > 128)
return false;
return true;
}
bool ARM64FrameLowering::hasFP(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
#ifndef NDEBUG
const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
assert(!RegInfo->needsStackRealignment(MF)
&& "No stack realignment on ARM64!");
#endif
return (MFI->hasCalls() ||
MFI->hasVarSizedObjects() ||
MFI->isFrameAddressTaken());
}
bool ARM64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
return !MF.getFrameInfo()->hasVarSizedObjects();
}
void ARM64FrameLowering::
eliminateCallFramePseudoInstr(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
const ARM64InstrInfo *TII =
static_cast<const ARM64InstrInfo*>(MF.getTarget().getInstrInfo());
if (!TFI->hasReservedCallFrame(MF)) {
MachineInstr *Old = I;
DebugLoc DL = Old->getDebugLoc();
unsigned Amount = Old->getOperand(0).getImm();
if (Amount != 0) {
unsigned Align = TFI->getStackAlignment();
Amount = (Amount+Align-1)/Align*Align;
unsigned Opc = Old->getOpcode();
if (Opc == ARM64::ADJCALLSTACKDOWN) {
emitFrameOffset(MBB, I, DL, ARM64::SP, ARM64::SP, -Amount, TII);
} else {
assert(Opc == ARM64::ADJCALLSTACKUP && "expected ADJCALLSTACKUP");
emitFrameOffset(MBB, I, DL, ARM64::SP, ARM64::SP, Amount, TII);
}
}
}
MBB.erase(I);
}
void ARM64FrameLowering::emitCalleeSavedFrameMoves(MachineFunction &MF,
MCSymbol *Label,
unsigned FramePtr) const {
MachineFrameInfo *MFI = MF.getFrameInfo();
MachineModuleInfo &MMI = MF.getMMI();
const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
if (CSI.empty()) return;
const DataLayout *TD = MF.getTarget().getDataLayout();
bool HasFP = hasFP(MF);
int stackGrowth = -TD->getPointerSize(0);
int64_t saveAreaOffset = (HasFP ? 2 : 1) * stackGrowth;
unsigned TotalSkipped = 0;
for (std::vector<CalleeSavedInfo>::const_iterator
I = CSI.begin(), E = CSI.end(); I != E; ++I) {
unsigned Reg = I->getReg();
int64_t Offset = MFI->getObjectOffset(I->getFrameIdx()) -
getOffsetOfLocalArea() + saveAreaOffset;
if (HasFP && (FramePtr == Reg || Reg == ARM64::LR)) {
TotalSkipped += stackGrowth;
continue;
}
unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
MMI.addFrameInst(
MCCFIInstruction::createOffset(Label, DwarfReg, Offset - TotalSkipped));
}
}
void ARM64FrameLowering::emitPrologue(MachineFunction &MF) const {
MachineBasicBlock &MBB = MF.front(); MachineBasicBlock::iterator MBBI = MBB.begin();
const MachineFrameInfo *MFI = MF.getFrameInfo();
const Function *Fn = MF.getFunction();
const ARM64RegisterInfo *RegInfo = TM.getRegisterInfo();
const ARM64InstrInfo *TII = TM.getInstrInfo();
MachineModuleInfo &MMI = MF.getMMI();
ARM64FunctionInfo *AFI = MF.getInfo<ARM64FunctionInfo>();
bool needsFrameMoves = MMI.hasDebugInfo() ||
Fn->needsUnwindTableEntry();
bool HasFP = hasFP(MF);
DebugLoc DL = MBB.findDebugLoc(MBBI);
int NumBytes = (int)MFI->getStackSize();
if (!AFI->hasStackFrame()) {
assert(!HasFP && "unexpected function without stack frame but with FP");
AFI->setLocalStackSize(NumBytes);
MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();
if (NumBytes && !canUseRedZone(MF)) {
emitFrameOffset(MBB, MBBI, DL, ARM64::SP, ARM64::SP, -NumBytes, TII,
MachineInstr::FrameSetup);
MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset(FrameLabel,
-NumBytes));
} else if (NumBytes) {
++NumRedZoneFunctions;
}
BuildMI(MBB, MBBI, DL, TII->get(ARM64::PROLOG_LABEL))
.addSym(FrameLabel)
.setMIFlag(MachineInstr::FrameSetup);
return;
}
int FPOffset = 0;
if (HasFP) {
assert((MBBI->getOpcode() == ARM64::STPXpre ||
MBBI->getOpcode() == ARM64::STPDpre) &&
MBBI->getOperand(2).getReg() == ARM64::SP &&
MBBI->getOperand(3).getImm() < 0 &&
(MBBI->getOperand(3).getImm() & 1) == 0);
FPOffset = -(MBBI->getOperand(3).getImm() + 2) * 8;
assert(FPOffset >= 0 && "Bad Framepointer Offset");
}
while (MBBI->getOpcode() == ARM64::STPXi ||
MBBI->getOpcode() == ARM64::STPDi ||
MBBI->getOpcode() == ARM64::STPXpre ||
MBBI->getOpcode() == ARM64::STPDpre) {
++MBBI;
NumBytes -= 16;
}
assert(NumBytes >= 0 && "Negative stack allocation size!?");
if (HasFP) {
emitFrameOffset(MBB, MBBI, DL, ARM64::FP, ARM64::SP, FPOffset, TII,
MachineInstr::FrameSetup);
}
AFI->setLocalStackSize(NumBytes);
if (NumBytes) {
if (!canUseRedZone(MF))
emitFrameOffset(MBB, MBBI, DL, ARM64::SP, ARM64::SP, -NumBytes, TII,
MachineInstr::FrameSetup);
}
if (RegInfo->hasBasePointer(MF))
TII->copyPhysReg(MBB, MBBI, DL, ARM64::X19, ARM64::SP, false);
if (needsFrameMoves) {
const DataLayout *TD = MF.getTarget().getDataLayout();
const int StackGrowth = -TD->getPointerSize(0);
unsigned FramePtr = RegInfo->getFrameRegister(MF);
MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();
BuildMI(MBB, MBBI, DL, TII->get(ARM64::PROLOG_LABEL))
.addSym(FrameLabel)
.setMIFlag(MachineInstr::FrameSetup);
if (HasFP) {
unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true);
MMI.addFrameInst(MCCFIInstruction::
createDefCfa(FrameLabel, Reg, 2 * StackGrowth));
unsigned LR = RegInfo->getDwarfRegNum(ARM64::LR, true);
MMI.addFrameInst(MCCFIInstruction::
createOffset(FrameLabel, LR, StackGrowth));
MMI.addFrameInst(MCCFIInstruction::
createOffset(FrameLabel, Reg, 2 * StackGrowth));
} else {
MMI.addFrameInst(MCCFIInstruction::
createDefCfaOffset(FrameLabel, -MFI->getStackSize()));
}
emitCalleeSavedFrameMoves(MF, FrameLabel, FramePtr);
}
}
static bool isCalleeSavedRegister(unsigned Reg, const uint16_t *CSRegs) {
for (unsigned i = 0; CSRegs[i]; ++i)
if (Reg == CSRegs[i])
return true;
return false;
}
static bool isCSRestore(MachineInstr *MI, const uint16_t *CSRegs) {
if (MI->getOpcode() == ARM64::LDPXpost ||
MI->getOpcode() == ARM64::LDPDpost ||
MI->getOpcode() == ARM64::LDPXi ||
MI->getOpcode() == ARM64::LDPDi) {
if (!isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs) ||
!isCalleeSavedRegister(MI->getOperand(1).getReg(), CSRegs) ||
MI->getOperand(2).getReg() != ARM64::SP)
return false;
return true;
}
return false;
}
void ARM64FrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
assert(MBBI->isReturn() &&
"Can only insert epilog into returning blocks");
MachineFrameInfo *MFI = MF.getFrameInfo();
const ARM64InstrInfo *TII =
static_cast<const ARM64InstrInfo*>(MF.getTarget().getInstrInfo());
const ARM64RegisterInfo *RegInfo =
static_cast<const ARM64RegisterInfo*>(MF.getTarget().getRegisterInfo());
DebugLoc DL = MBBI->getDebugLoc();
unsigned NumBytes = MFI->getStackSize();
unsigned NumRestores = 0;
MachineBasicBlock::iterator LastPopI = MBBI;
const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
if (LastPopI != MBB.begin()) {
do {
++NumRestores;
--LastPopI;
} while (LastPopI != MBB.begin() && isCSRestore(LastPopI, CSRegs));
if (!isCSRestore(LastPopI, CSRegs)) {
++LastPopI;
--NumRestores;
}
}
NumBytes -= NumRestores * 16;
assert(NumBytes >= 0 && "Negative stack allocation size!?");
if (!hasFP(MF)) {
if (!canUseRedZone(MF))
emitFrameOffset(MBB, LastPopI, DL, ARM64::SP, ARM64::SP, NumBytes, TII);
return;
}
if (NumBytes || MFI->hasVarSizedObjects())
emitFrameOffset(MBB, LastPopI, DL, ARM64::SP, ARM64::FP,
-(NumRestores - 1)* 16,
TII, MachineInstr::NoFlags);
}
int ARM64FrameLowering::getFrameIndexOffset(const MachineFunction &MF,
int FI) const {
unsigned FrameReg;
return getFrameIndexReference(MF, FI, FrameReg);
}
int
ARM64FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
unsigned &FrameReg) const {
return resolveFrameIndexReference(MF, FI, FrameReg);
}
int
ARM64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
int FI, unsigned &FrameReg,
bool PreferFP) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
const ARM64RegisterInfo *RegInfo =
static_cast<const ARM64RegisterInfo*>(MF.getTarget().getRegisterInfo());
const ARM64FunctionInfo *AFI = MF.getInfo<ARM64FunctionInfo>();
int FPOffset = MFI->getObjectOffset(FI) + 16;
int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize();
bool isFixed = MFI->isFixedObjectIndex(FI);
bool UseFP = false;
if (AFI->hasStackFrame()) {
if (isFixed) {
UseFP = hasFP(MF);
} else if (hasFP(MF) && !RegInfo->hasBasePointer(MF)) {
if (PreferFP || MFI->hasVarSizedObjects() || FPOffset >= 0 ||
(FPOffset >= -256 && Offset > -FPOffset))
UseFP = true;
}
}
if (UseFP) {
FrameReg = RegInfo->getFrameRegister(MF);
return FPOffset;
}
if (RegInfo->hasBasePointer(MF))
FrameReg = RegInfo->getBaseRegister();
else {
FrameReg = ARM64::SP;
if (canUseRedZone(MF))
Offset -= AFI->getLocalStackSize();
}
return Offset;
}
static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) {
if (Reg != ARM64::LR)
return getKillRegState(true);
bool LRLiveIn = MF.getRegInfo().isLiveIn(ARM64::LR);
bool LRKill = !(LRLiveIn && MF.getFrameInfo()->isReturnAddressTaken());
return getKillRegState(LRKill);
}
bool ARM64FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,
const TargetRegisterInfo *TRI) const {
MachineFunction &MF = *MBB.getParent();
const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
unsigned Count = CSI.size();
DebugLoc DL;
assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
if (MI != MBB.end()) DL = MI->getDebugLoc();
for (unsigned i = 0; i < Count; i += 2) {
unsigned idx = Count - i - 2;
unsigned Reg1 = CSI[idx].getReg();
unsigned Reg2 = CSI[idx+1].getReg();
assert(CSI[idx].getFrameIdx() + 1 == CSI[idx + 1].getFrameIdx() &&
"Out of order callee saved regs!");
unsigned StrOpc;
assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
assert((i & 1) == 0 && "Odd index for callee-saved reg spill!");
if (ARM64::GPR64RegClass.contains(Reg1)) {
assert(ARM64::GPR64RegClass.contains(Reg2) &&
"Expected GPR64 callee-saved register pair!");
if (i == 0)
StrOpc = ARM64::STPXpre;
else
StrOpc = ARM64::STPXi;
} else if (ARM64::FPR64RegClass.contains(Reg1)) {
assert(ARM64::FPR64RegClass.contains(Reg2) &&
"Expected FPR64 callee-saved register pair!");
if (i == 0)
StrOpc = ARM64::STPDpre;
else
StrOpc = ARM64::STPDi;
} else
llvm_unreachable("Unexpected callee saved register!");
DEBUG(dbgs() << "CSR spill: (" << TRI->getName(Reg1)
<< ", " << TRI->getName(Reg2) << ") -> fi#("
<< CSI[idx].getFrameIdx() << ", "
<< CSI[idx+1].getFrameIdx() << ")\n");
const int Offset = (i == 0)? -Count: i;
assert((Offset >= -64 && Offset <= 63) && "Offset out of bounds for STP immediate");
BuildMI(MBB, MI, DL, TII.get(StrOpc))
.addReg(Reg2, getPrologueDeath(MF, Reg2))
.addReg(Reg1, getPrologueDeath(MF, Reg1))
.addReg(ARM64::SP)
.addImm(Offset) .setMIFlag(MachineInstr::FrameSetup);
}
return true;
}
bool ARM64FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,
const TargetRegisterInfo *TRI) const {
MachineFunction &MF = *MBB.getParent();
const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
unsigned Count = CSI.size();
DebugLoc DL;
assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
if (MI != MBB.end()) DL = MI->getDebugLoc();
for (unsigned i = 0; i < Count; i += 2) {
unsigned Reg1 = CSI[i].getReg();
unsigned Reg2 = CSI[i+1].getReg();
assert(CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx() &&
"Out of order callee saved regs!");
unsigned LdrOpc;
assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
assert((i & 1) == 0 && "Odd index for callee-saved reg spill!");
if (ARM64::GPR64RegClass.contains(Reg1)) {
assert(ARM64::GPR64RegClass.contains(Reg2) &&
"Expected GPR64 callee-saved register pair!");
if (i == Count - 2)
LdrOpc = ARM64::LDPXpost;
else
LdrOpc = ARM64::LDPXi;
} else if (ARM64::FPR64RegClass.contains(Reg1)) {
assert(ARM64::FPR64RegClass.contains(Reg2) &&
"Expected FPR64 callee-saved register pair!");
if (i == Count - 2)
LdrOpc = ARM64::LDPDpost;
else
LdrOpc = ARM64::LDPDi;
} else
llvm_unreachable("Unexpected callee saved register!");
DEBUG(dbgs() << "CSR restore: (" << TRI->getName(Reg1)
<< ", " << TRI->getName(Reg2) << ") -> fi#("
<< CSI[i].getFrameIdx() << ", "
<< CSI[i+1].getFrameIdx() << ")\n");
const int Offset = (i == Count - 2) ? Count : Count - i - 2;
assert((Offset >= -64 && Offset <= 63) && "Offset out of bounds for LDP immediate");
BuildMI(MBB, MI, DL, TII.get(LdrOpc))
.addReg(Reg2, getDefRegState(true))
.addReg(Reg1, getDefRegState(true))
.addReg(ARM64::SP)
.addImm(Offset); }
return true;
}
void
ARM64FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS) const {
const ARM64RegisterInfo *RegInfo =
static_cast<const ARM64RegisterInfo*>(MF.getTarget().getRegisterInfo());
ARM64FunctionInfo *AFI = MF.getInfo<ARM64FunctionInfo>();
MachineRegisterInfo *MRI = &MF.getRegInfo();
SmallVector<unsigned, 4> UnspilledCSGPRs;
SmallVector<unsigned, 4> UnspilledCSFPRs;
if (hasFP(MF)) {
MRI->setPhysRegUsed(ARM64::FP);
MRI->setPhysRegUsed(ARM64::LR);
}
if (RegInfo->hasBasePointer(MF))
MRI->setPhysRegUsed(RegInfo->getBaseRegister());
unsigned NumGPRSpilled = 0;
unsigned NumFPRSpilled = 0;
bool ExtraCSSpill = false;
bool CanEliminateFrame = true;
DEBUG(dbgs() << "*** processFunctionBeforeCalleeSavedScan\nUsed CSRs:");
const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
for (unsigned i = 0; CSRegs[i]; i += 2) {
assert(CSRegs[i + 1] && "Odd number of callee-saved registers!");
const unsigned OddReg = CSRegs[i];
const unsigned EvenReg = CSRegs[i + 1];
assert(
(ARM64::GPR64RegClass.contains(OddReg)
&& ARM64::GPR64RegClass.contains(EvenReg))
^ (ARM64::FPR64RegClass.contains(OddReg)
&& ARM64::FPR64RegClass.contains(EvenReg))
&& "Register class mismatch!");
const bool OddRegUsed = MRI->isPhysRegUsed(OddReg);
const bool EvenRegUsed = MRI->isPhysRegUsed(EvenReg);
if (!OddRegUsed && !EvenRegUsed) {
if (ARM64::GPR64RegClass.contains(OddReg)) {
UnspilledCSGPRs.push_back(OddReg);
UnspilledCSGPRs.push_back(EvenReg);
} else {
UnspilledCSFPRs.push_back(OddReg);
UnspilledCSFPRs.push_back(EvenReg);
}
continue;
}
unsigned Reg = ARM64::NoRegister;
if (OddRegUsed ^ EvenRegUsed) {
Reg = OddRegUsed ? EvenReg : OddReg;
MRI->setPhysRegUsed(Reg);
}
DEBUG(dbgs() << ' ' << PrintReg(OddReg, RegInfo));
DEBUG(dbgs() << ' ' << PrintReg(EvenReg, RegInfo));
assert(((OddReg == ARM64::LR && EvenReg == ARM64::FP) ||
(RegInfo->getEncodingValue(OddReg) + 1 ==
RegInfo->getEncodingValue(EvenReg))) &&
"Register pair of non-adjacent registers!");
if (ARM64::GPR64RegClass.contains(OddReg)) {
NumGPRSpilled += 2;
if (Reg != ARM64::NoRegister && !RegInfo->isReservedReg(MF, Reg))
ExtraCSSpill = true;
} else
NumFPRSpilled += 2;
CanEliminateFrame = false;
}
MachineFrameInfo *MFI = MF.getFrameInfo();
unsigned CFSize = estimateStackSize(MF) + 8*(NumGPRSpilled + NumFPRSpilled);
DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n");
bool BigStack = (CFSize >= 256);
if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))
AFI->setHasStackFrame(true);
if (BigStack && !ExtraCSSpill) {
assert(((UnspilledCSGPRs.size() & 1) == 0) && "Odd number of registers!");
unsigned Count = 0;
while (!UnspilledCSGPRs.empty() && Count < 2) {
unsigned Reg = UnspilledCSGPRs.back();
UnspilledCSGPRs.pop_back();
DEBUG(dbgs() << "Spilling " << PrintReg(Reg, RegInfo)
<< " to get a scratch register.\n");
MRI->setPhysRegUsed(Reg);
ExtraCSSpill = true;
++Count;
}
if (!ExtraCSSpill) {
const TargetRegisterClass *RC = &ARM64::GPR64RegClass;
int FI = MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), false);
RS->addScavengingFrameIndex(FI);
DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI
<< " as the emergency spill slot.\n");
}
}
}