ARM64ExpandPseudoInsts.cpp [plain text]
#include "MCTargetDesc/ARM64AddressingModes.h"
#include "ARM64InstrInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/Support/MathExtras.h"
using namespace llvm;
namespace {
class ARM64ExpandPseudo : public MachineFunctionPass {
public:
static char ID;
ARM64ExpandPseudo() : MachineFunctionPass(ID) {}
const ARM64InstrInfo *TII;
virtual bool runOnMachineFunction(MachineFunction &Fn);
virtual const char *getPassName() const {
return "ARM64 pseudo instruction expansion pass";
}
private:
bool expandMBB(MachineBasicBlock &MBB);
bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
unsigned BitSize);
};
char ARM64ExpandPseudo::ID = 0;
}
static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI,
MachineInstrBuilder &DefMI) {
const MCInstrDesc &Desc = OldMI.getDesc();
for (unsigned i = Desc.getNumOperands(), e = OldMI.getNumOperands(); i != e;
++i) {
const MachineOperand &MO = OldMI.getOperand(i);
assert(MO.isReg() && MO.getReg());
if (MO.isUse())
UseMI.addOperand(MO);
else
DefMI.addOperand(MO);
}
}
static uint64_t getChunk(uint64_t Imm, unsigned ChunkIdx) {
assert(ChunkIdx < 4 && "Out of range chunk index specified!");
return (Imm >> (ChunkIdx * 16)) & 0xFFFF;
}
static uint64_t replicateChunk(uint64_t Imm, unsigned FromIdx, unsigned ToIdx) {
assert((FromIdx < 4) && (ToIdx < 4) && "Out of range chunk index specified!");
const unsigned ShiftAmt = ToIdx * 16;
const uint64_t Chunk = getChunk(Imm, FromIdx) << ShiftAmt;
Imm &= ~(0xFFFFLL << ShiftAmt);
return Imm | Chunk;
}
static bool tryOrrMovk(uint64_t UImm, uint64_t OrrImm, MachineInstr &MI,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
const ARM64InstrInfo *TII, unsigned ChunkIdx) {
assert(ChunkIdx < 4 && "Out of range chunk index specified!");
const unsigned ShiftAmt = ChunkIdx * 16;
uint64_t Encoding;
if (ARM64_AM::processLogicalImmediate(OrrImm, 64, Encoding)) {
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::ORRXri))
.addOperand(MI.getOperand(0))
.addReg(ARM64::XZR)
.addImm(Encoding);
const unsigned Imm16 = getChunk(UImm, ChunkIdx);
const unsigned DstReg = MI.getOperand(0).getReg();
const bool DstIsDead = MI.getOperand(0).isDead();
MachineInstrBuilder MIB1 =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::MOVKXi))
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstReg)
.addImm(Imm16)
.addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, ShiftAmt));
transferImpOps(MI, MIB, MIB1);
MI.eraseFromParent();
return true;
}
return false;
}
static bool canUseOrr(uint64_t Chunk, uint64_t &Encoding) {
Chunk = (Chunk << 48) | (Chunk << 32) | (Chunk << 16) | Chunk;
return ARM64_AM::processLogicalImmediate(Chunk, 64, Encoding);
}
static bool tryToreplicateChunks(uint64_t UImm, MachineInstr &MI,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
const ARM64InstrInfo *TII) {
typedef DenseMap<uint64_t, unsigned> CountMap;
CountMap Counts;
for (unsigned Idx = 0; Idx < 4; ++Idx)
++Counts[getChunk(UImm, Idx)];
for (CountMap::const_iterator Chunk = Counts.begin(), End = Counts.end();
Chunk != End; ++Chunk) {
const uint64_t ChunkVal = Chunk->first;
const unsigned Count = Chunk->second;
uint64_t Encoding = 0;
if ((Count != 2 && Count != 3) || !canUseOrr(ChunkVal, Encoding))
continue;
const bool CountThree = Count == 3;
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::ORRXri))
.addOperand(MI.getOperand(0))
.addReg(ARM64::XZR)
.addImm(Encoding);
const unsigned DstReg = MI.getOperand(0).getReg();
const bool DstIsDead = MI.getOperand(0).isDead();
unsigned ShiftAmt = 0;
uint64_t Imm16 = 0;
for (; ShiftAmt < 64; ShiftAmt += 16) {
Imm16 = (UImm >> ShiftAmt) & 0xFFFF;
if (Imm16 != ChunkVal)
break;
}
MachineInstrBuilder MIB1 =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::MOVKXi))
.addReg(DstReg,
RegState::Define | getDeadRegState(DstIsDead && CountThree))
.addReg(DstReg)
.addImm(Imm16)
.addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, ShiftAmt));
if (CountThree) {
transferImpOps(MI, MIB, MIB1);
MI.eraseFromParent();
return true;
}
for (ShiftAmt += 16; ShiftAmt < 64; ShiftAmt += 16) {
Imm16 = (UImm >> ShiftAmt) & 0xFFFF;
if (Imm16 != ChunkVal)
break;
}
MachineInstrBuilder MIB2 =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::MOVKXi))
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstReg)
.addImm(Imm16)
.addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, ShiftAmt));
transferImpOps(MI, MIB, MIB2);
MI.eraseFromParent();
return true;
}
return false;
}
static bool isStartChunk(uint64_t Chunk) {
if (Chunk == 0 || Chunk == UINT64_MAX)
return false;
return (CountLeadingOnes_64(Chunk) + countTrailingZeros(Chunk)) == 64;
}
static bool isEndChunk(uint64_t Chunk) {
if (Chunk == 0 || Chunk == UINT64_MAX)
return false;
return (countLeadingZeros(Chunk) + CountTrailingOnes_64(Chunk)) == 64;
}
static uint64_t updateImm(uint64_t Imm, unsigned Idx, bool Clear) {
const uint64_t Mask = 0xFFFF;
if (Clear)
Imm &= ~(Mask << (Idx * 16));
else
Imm |= Mask << (Idx * 16);
return Imm;
}
static bool trySequenceOfOnes(uint64_t UImm, MachineInstr &MI,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
const ARM64InstrInfo *TII) {
const int NotSet = -1;
const uint64_t Mask = 0xFFFF;
int StartIdx = NotSet;
int EndIdx = NotSet;
for (int Idx = 0; Idx < 4; ++Idx) {
int64_t Chunk = getChunk(UImm, Idx);
Chunk = (Chunk << 48) >> 48;
if (isStartChunk(Chunk))
StartIdx = Idx;
else if (isEndChunk(Chunk))
EndIdx = Idx;
}
if (StartIdx == NotSet || EndIdx == NotSet)
return false;
uint64_t Outside = 0;
uint64_t Inside = Mask;
if (StartIdx > EndIdx) {
std::swap(StartIdx, EndIdx);
std::swap(Outside, Inside);
}
uint64_t OrrImm = UImm;
int FirstMovkIdx = NotSet;
int SecondMovkIdx = NotSet;
for (int Idx = 0; Idx < 4; ++Idx) {
const uint64_t Chunk = getChunk(UImm, Idx);
if ((Idx < StartIdx || EndIdx < Idx) && Chunk != Outside) {
OrrImm = updateImm(OrrImm, Idx, Outside == 0);
if (FirstMovkIdx == NotSet)
FirstMovkIdx = Idx;
else
SecondMovkIdx = Idx;
} else if (Idx > StartIdx && Idx < EndIdx && Chunk != Inside) {
OrrImm = updateImm(OrrImm, Idx, Inside != Mask);
if (FirstMovkIdx == NotSet)
FirstMovkIdx = Idx;
else
SecondMovkIdx = Idx;
}
}
assert(FirstMovkIdx != NotSet && "Constant materializable with single ORR!");
uint64_t Encoding = 0;
ARM64_AM::processLogicalImmediate(OrrImm, 64, Encoding);
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::ORRXri))
.addOperand(MI.getOperand(0))
.addReg(ARM64::XZR)
.addImm(Encoding);
const unsigned DstReg = MI.getOperand(0).getReg();
const bool DstIsDead = MI.getOperand(0).isDead();
const bool SingleMovk = SecondMovkIdx == NotSet;
MachineInstrBuilder MIB1 =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::MOVKXi))
.addReg(DstReg, RegState::Define |
getDeadRegState(DstIsDead && SingleMovk))
.addReg(DstReg)
.addImm(getChunk(UImm, FirstMovkIdx))
.addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, FirstMovkIdx * 16));
if (SingleMovk) {
transferImpOps(MI, MIB, MIB1);
MI.eraseFromParent();
return true;
}
MachineInstrBuilder MIB2 =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::MOVKXi))
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstReg)
.addImm(getChunk(UImm, SecondMovkIdx))
.addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, SecondMovkIdx * 16));
transferImpOps(MI, MIB, MIB2);
MI.eraseFromParent();
return true;
}
bool ARM64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned BitSize) {
MachineInstr &MI = *MBBI;
uint64_t Imm = MI.getOperand(1).getImm();
const unsigned Mask = 0xFFFF;
uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
uint64_t Encoding;
if (ARM64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
unsigned Opc = (BitSize == 32 ? ARM64::ORRWri : ARM64::ORRXri);
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
.addOperand(MI.getOperand(0))
.addReg(BitSize == 32 ? ARM64::WZR : ARM64::XZR)
.addImm(Encoding);
transferImpOps(MI, MIB, MIB);
MI.eraseFromParent();
return true;
}
unsigned OneChunks = 0;
unsigned ZeroChunks = 0;
for (unsigned Shift = 0; Shift < BitSize; Shift += 16) {
const unsigned Chunk = (Imm >> Shift) & Mask;
if (Chunk == Mask)
OneChunks++;
else if (Chunk == 0)
ZeroChunks++;
}
if (BitSize == 64 && OneChunks < 3 && ZeroChunks < 3) {
if (getChunk(UImm, 0) == getChunk(UImm, 2)) {
uint64_t OrrImm = replicateChunk(UImm, 3, 1);
if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 1))
return true;
OrrImm = replicateChunk(UImm, 1, 3);
if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 3))
return true;
} else if (getChunk(UImm, 1) == getChunk(UImm, 3)) {
uint64_t OrrImm = replicateChunk(UImm, 2, 0);
if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 0))
return true;
OrrImm = replicateChunk(UImm, 0, 2);
if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 2))
return true;
}
}
if (BitSize == 64 && tryToreplicateChunks(UImm, MI, MBB, MBBI, TII))
return true;
if (BitSize == 64 && trySequenceOfOnes(UImm, MI, MBB, MBBI, TII))
return true;
bool isNeg = false;
if (OneChunks > ZeroChunks) {
isNeg = true;
Imm = ~Imm;
}
unsigned FirstOpc;
if (BitSize == 32) {
Imm &= (1LL << 32) - 1;
FirstOpc = (isNeg ? ARM64::MOVNWi : ARM64::MOVZWi);
} else {
FirstOpc = (isNeg ? ARM64::MOVNXi : ARM64::MOVZXi);
}
unsigned Shift = 0; unsigned LastShift = 0; if (Imm != 0) {
unsigned LZ = countLeadingZeros(Imm);
unsigned TZ = countTrailingZeros(Imm);
Shift = ((63 - LZ) / 16) * 16;
LastShift = (TZ / 16) * 16;
}
unsigned Imm16 = (Imm >> Shift) & Mask;
unsigned DstReg = MI.getOperand(0).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
MachineInstrBuilder MIB1 =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(FirstOpc))
.addReg(DstReg, RegState::Define |
getDeadRegState(DstIsDead && Shift == LastShift))
.addImm(Imm16)
.addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, Shift));
if (isNeg)
Imm = ~Imm;
if (Shift == LastShift) {
transferImpOps(MI, MIB1, MIB1);
MI.eraseFromParent();
return true;
}
MachineInstrBuilder MIB2;
unsigned Opc = (BitSize == 32 ? ARM64::MOVKWi : ARM64::MOVKXi);
while (Shift != LastShift) {
Shift -= 16;
Imm16 = (Imm >> Shift) & Mask;
if (Imm16 == (isNeg ? Mask : 0))
continue; MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
.addReg(DstReg,
RegState::Define |
getDeadRegState(DstIsDead && Shift == LastShift))
.addReg(DstReg)
.addImm(Imm16)
.addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, Shift));
}
transferImpOps(MI, MIB1, MIB2);
MI.eraseFromParent();
return true;
}
bool ARM64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI) {
MachineInstr &MI = *MBBI;
unsigned Opcode = MI.getOpcode();
switch (Opcode) {
default: break;
case ARM64::ADDWrr:
case ARM64::SUBWrr:
case ARM64::ADDXrr:
case ARM64::SUBXrr:
case ARM64::ADDSWrr:
case ARM64::SUBSWrr:
case ARM64::ADDSXrr:
case ARM64::SUBSXrr:
case ARM64::ANDWrr:
case ARM64::ANDXrr:
case ARM64::BICWrr:
case ARM64::BICXrr:
case ARM64::EONWrr:
case ARM64::EONXrr:
case ARM64::EORWrr:
case ARM64::EORXrr:
case ARM64::ORNWrr:
case ARM64::ORNXrr:
case ARM64::ORRWrr:
case ARM64::ORRXrr: {
unsigned Opcode;
switch (MI.getOpcode()) {
default: return false;
case ARM64::ADDWrr: Opcode = ARM64::ADDWrs; break;
case ARM64::SUBWrr: Opcode = ARM64::SUBWrs; break;
case ARM64::ADDXrr: Opcode = ARM64::ADDXrs; break;
case ARM64::SUBXrr: Opcode = ARM64::SUBXrs; break;
case ARM64::ADDSWrr: Opcode = ARM64::ADDSWrs; break;
case ARM64::SUBSWrr: Opcode = ARM64::SUBSWrs; break;
case ARM64::ADDSXrr: Opcode = ARM64::ADDSXrs; break;
case ARM64::SUBSXrr: Opcode = ARM64::SUBSXrs; break;
case ARM64::ANDWrr: Opcode = ARM64::ANDWrs; break;
case ARM64::ANDXrr: Opcode = ARM64::ANDXrs; break;
case ARM64::BICWrr: Opcode = ARM64::BICWrs; break;
case ARM64::BICXrr: Opcode = ARM64::BICXrs; break;
case ARM64::EONWrr: Opcode = ARM64::EONWrs; break;
case ARM64::EONXrr: Opcode = ARM64::EONXrs; break;
case ARM64::EORWrr: Opcode = ARM64::EORWrs; break;
case ARM64::EORXrr: Opcode = ARM64::EORXrs; break;
case ARM64::ORNWrr: Opcode = ARM64::ORNWrs; break;
case ARM64::ORNXrr: Opcode = ARM64::ORNXrs; break;
case ARM64::ORRWrr: Opcode = ARM64::ORRWrs; break;
case ARM64::ORRXrr: Opcode = ARM64::ORRXrs; break;
}
MachineInstrBuilder MIB1 =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode),
MI.getOperand(0).getReg())
.addOperand(MI.getOperand(1))
.addOperand(MI.getOperand(2))
.addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, 0));
transferImpOps(MI, MIB1, MIB1);
MI.eraseFromParent();
return true;
}
case ARM64::FCVTSHpseudo: {
MachineOperand Src = MI.getOperand(1);
Src.setImplicit();
unsigned SrcH = TII->getRegisterInfo().getSubReg(Src.getReg(), ARM64::hsub);
auto MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::FCVTSHr))
.addOperand(MI.getOperand(0))
.addReg(SrcH, RegState::Undef)
.addOperand(Src);
transferImpOps(MI, MIB, MIB);
MI.eraseFromParent();
return true;
}
case ARM64::LOADgot: {
unsigned DstReg = MI.getOperand(0).getReg();
const MachineOperand &MO1 = MI.getOperand(1);
unsigned Flags = MO1.getTargetFlags();
MachineInstrBuilder MIB1 =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::ADRP), DstReg);
MachineInstrBuilder MIB2 =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::LDRXui))
.addOperand(MI.getOperand(0))
.addReg(DstReg);
if (MO1.isGlobal()) {
MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | ARM64II::MO_PAGE);
MIB2.addGlobalAddress(MO1.getGlobal(), 0,
Flags | ARM64II::MO_PAGEOFF | ARM64II::MO_NC);
} else if (MO1.isSymbol()) {
MIB1.addExternalSymbol(MO1.getSymbolName(), Flags | ARM64II::MO_PAGE);
MIB2.addExternalSymbol(MO1.getSymbolName(),
Flags | ARM64II::MO_PAGEOFF | ARM64II::MO_NC);
} else {
assert(MO1.isCPI() &&
"Only expect globals, externalsymbols, or constant pools");
MIB1.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
Flags | ARM64II::MO_PAGE);
MIB2.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
Flags | ARM64II::MO_PAGEOFF | ARM64II::MO_NC);
}
transferImpOps(MI, MIB1, MIB2);
MI.eraseFromParent();
return true;
}
case ARM64::MOVaddr:
case ARM64::MOVaddrJT:
case ARM64::MOVaddrCP:
case ARM64::MOVaddrBA:
case ARM64::MOVaddrTLS:
case ARM64::MOVaddrEXT: {
unsigned DstReg = MI.getOperand(0).getReg();
MachineInstrBuilder MIB1 =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::ADRP), DstReg)
.addOperand(MI.getOperand(1));
MachineInstrBuilder MIB2 =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::ADDXri))
.addOperand(MI.getOperand(0))
.addReg(DstReg)
.addOperand(MI.getOperand(2))
.addImm(0);
transferImpOps(MI, MIB1, MIB2);
MI.eraseFromParent();
return true;
}
case ARM64::MOVi32imm:
return expandMOVImm(MBB, MBBI, 32);
case ARM64::MOVi64imm:
return expandMOVImm(MBB, MBBI, 64);
case ARM64::RET_ReallyLR:
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::RET))
.addReg(ARM64::LR);
MI.eraseFromParent();
return true;
}
return false;
}
bool ARM64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
bool Modified = false;
MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
while (MBBI != E) {
MachineBasicBlock::iterator NMBBI = llvm::next(MBBI);
Modified |= expandMI(MBB, MBBI);
MBBI = NMBBI;
}
return Modified;
}
bool ARM64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
TII = static_cast<const ARM64InstrInfo *>(MF.getTarget().getInstrInfo());
bool Modified = false;
for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E;
++MFI)
Modified |= expandMBB(*MFI);
return Modified;
}
FunctionPass *llvm::createARM64ExpandPseudoPass() {
return new ARM64ExpandPseudo();
}