LoopStrengthReduce.cpp [plain text]
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/Analysis/IVUsers.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
using namespace llvm;
#define DEBUG_TYPE "loop-reduce"
static const unsigned MaxIVUsers = 200;
static cl::opt<bool> EnablePhiElim(
"enable-lsr-phielim", cl::Hidden, cl::init(true),
cl::desc("Enable LSR phi elimination"));
#ifndef NDEBUG
static cl::opt<bool> StressIVChain(
"stress-ivchain", cl::Hidden, cl::init(false),
cl::desc("Stress test LSR IV chains"));
#else
static bool StressIVChain = false;
#endif
namespace {
class RegSortData {
public:
SmallBitVector UsedByIndices;
RegSortData() {}
void print(raw_ostream &OS) const;
void dump() const;
};
}
void RegSortData::print(raw_ostream &OS) const {
OS << "[NumUses=" << UsedByIndices.count() << ']';
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void RegSortData::dump() const {
print(errs()); errs() << '\n';
}
#endif
namespace {
class RegUseTracker {
typedef DenseMap<const SCEV *, RegSortData> RegUsesTy;
RegUsesTy RegUsesMap;
SmallVector<const SCEV *, 16> RegSequence;
public:
void CountRegister(const SCEV *Reg, size_t LUIdx);
void DropRegister(const SCEV *Reg, size_t LUIdx);
void SwapAndDropUse(size_t LUIdx, size_t LastLUIdx);
bool isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const;
const SmallBitVector &getUsedByIndices(const SCEV *Reg) const;
void clear();
typedef SmallVectorImpl<const SCEV *>::iterator iterator;
typedef SmallVectorImpl<const SCEV *>::const_iterator const_iterator;
iterator begin() { return RegSequence.begin(); }
iterator end() { return RegSequence.end(); }
const_iterator begin() const { return RegSequence.begin(); }
const_iterator end() const { return RegSequence.end(); }
};
}
void
RegUseTracker::CountRegister(const SCEV *Reg, size_t LUIdx) {
std::pair<RegUsesTy::iterator, bool> Pair =
RegUsesMap.insert(std::make_pair(Reg, RegSortData()));
RegSortData &RSD = Pair.first->second;
if (Pair.second)
RegSequence.push_back(Reg);
RSD.UsedByIndices.resize(std::max(RSD.UsedByIndices.size(), LUIdx + 1));
RSD.UsedByIndices.set(LUIdx);
}
void
RegUseTracker::DropRegister(const SCEV *Reg, size_t LUIdx) {
RegUsesTy::iterator It = RegUsesMap.find(Reg);
assert(It != RegUsesMap.end());
RegSortData &RSD = It->second;
assert(RSD.UsedByIndices.size() > LUIdx);
RSD.UsedByIndices.reset(LUIdx);
}
void
RegUseTracker::SwapAndDropUse(size_t LUIdx, size_t LastLUIdx) {
assert(LUIdx <= LastLUIdx);
for (RegUsesTy::iterator I = RegUsesMap.begin(), E = RegUsesMap.end();
I != E; ++I) {
SmallBitVector &UsedByIndices = I->second.UsedByIndices;
if (LUIdx < UsedByIndices.size())
UsedByIndices[LUIdx] =
LastLUIdx < UsedByIndices.size() ? UsedByIndices[LastLUIdx] : 0;
UsedByIndices.resize(std::min(UsedByIndices.size(), LastLUIdx));
}
}
bool
RegUseTracker::isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const {
RegUsesTy::const_iterator I = RegUsesMap.find(Reg);
if (I == RegUsesMap.end())
return false;
const SmallBitVector &UsedByIndices = I->second.UsedByIndices;
int i = UsedByIndices.find_first();
if (i == -1) return false;
if ((size_t)i != LUIdx) return true;
return UsedByIndices.find_next(i) != -1;
}
const SmallBitVector &RegUseTracker::getUsedByIndices(const SCEV *Reg) const {
RegUsesTy::const_iterator I = RegUsesMap.find(Reg);
assert(I != RegUsesMap.end() && "Unknown register!");
return I->second.UsedByIndices;
}
void RegUseTracker::clear() {
RegUsesMap.clear();
RegSequence.clear();
}
namespace {
struct Formula {
GlobalValue *BaseGV;
int64_t BaseOffset;
bool HasBaseReg;
int64_t Scale;
SmallVector<const SCEV *, 4> BaseRegs;
const SCEV *ScaledReg;
int64_t UnfoldedOffset;
Formula()
: BaseGV(nullptr), BaseOffset(0), HasBaseReg(false), Scale(0),
ScaledReg(nullptr), UnfoldedOffset(0) {}
void InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE);
bool isCanonical() const;
void Canonicalize();
bool Unscale();
size_t getNumRegs() const;
Type *getType() const;
void DeleteBaseReg(const SCEV *&S);
bool referencesReg(const SCEV *S) const;
bool hasRegsUsedByUsesOtherThan(size_t LUIdx,
const RegUseTracker &RegUses) const;
void print(raw_ostream &OS) const;
void dump() const;
};
}
static void DoInitialMatch(const SCEV *S, Loop *L,
SmallVectorImpl<const SCEV *> &Good,
SmallVectorImpl<const SCEV *> &Bad,
ScalarEvolution &SE) {
if (SE.properlyDominates(S, L->getHeader())) {
Good.push_back(S);
return;
}
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
I != E; ++I)
DoInitialMatch(*I, L, Good, Bad, SE);
return;
}
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
if (!AR->getStart()->isZero()) {
DoInitialMatch(AR->getStart(), L, Good, Bad, SE);
DoInitialMatch(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0),
AR->getStepRecurrence(SE),
AR->getLoop(), SCEV::FlagAnyWrap),
L, Good, Bad, SE);
return;
}
if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S))
if (Mul->getOperand(0)->isAllOnesValue()) {
SmallVector<const SCEV *, 4> Ops(Mul->op_begin()+1, Mul->op_end());
const SCEV *NewMul = SE.getMulExpr(Ops);
SmallVector<const SCEV *, 4> MyGood;
SmallVector<const SCEV *, 4> MyBad;
DoInitialMatch(NewMul, L, MyGood, MyBad, SE);
const SCEV *NegOne = SE.getSCEV(ConstantInt::getAllOnesValue(
SE.getEffectiveSCEVType(NewMul->getType())));
for (SmallVectorImpl<const SCEV *>::const_iterator I = MyGood.begin(),
E = MyGood.end(); I != E; ++I)
Good.push_back(SE.getMulExpr(NegOne, *I));
for (SmallVectorImpl<const SCEV *>::const_iterator I = MyBad.begin(),
E = MyBad.end(); I != E; ++I)
Bad.push_back(SE.getMulExpr(NegOne, *I));
return;
}
Bad.push_back(S);
}
void Formula::InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) {
SmallVector<const SCEV *, 4> Good;
SmallVector<const SCEV *, 4> Bad;
DoInitialMatch(S, L, Good, Bad, SE);
if (!Good.empty()) {
const SCEV *Sum = SE.getAddExpr(Good);
if (!Sum->isZero())
BaseRegs.push_back(Sum);
HasBaseReg = true;
}
if (!Bad.empty()) {
const SCEV *Sum = SE.getAddExpr(Bad);
if (!Sum->isZero())
BaseRegs.push_back(Sum);
HasBaseReg = true;
}
Canonicalize();
}
bool Formula::isCanonical() const {
if (ScaledReg)
return Scale != 1 || !BaseRegs.empty();
return BaseRegs.size() <= 1;
}
void Formula::Canonicalize() {
if (isCanonical())
return;
assert(!BaseRegs.empty() && "1*reg => reg, should not be needed.");
ScaledReg = BaseRegs.back();
BaseRegs.pop_back();
Scale = 1;
size_t BaseRegsSize = BaseRegs.size();
size_t Try = 0;
while (Try < BaseRegsSize && !isa<SCEVAddRecExpr>(ScaledReg))
std::swap(ScaledReg, BaseRegs[Try++]);
}
bool Formula::Unscale() {
if (Scale != 1)
return false;
Scale = 0;
BaseRegs.push_back(ScaledReg);
ScaledReg = nullptr;
return true;
}
size_t Formula::getNumRegs() const {
return !!ScaledReg + BaseRegs.size();
}
Type *Formula::getType() const {
return !BaseRegs.empty() ? BaseRegs.front()->getType() :
ScaledReg ? ScaledReg->getType() :
BaseGV ? BaseGV->getType() :
nullptr;
}
void Formula::DeleteBaseReg(const SCEV *&S) {
if (&S != &BaseRegs.back())
std::swap(S, BaseRegs.back());
BaseRegs.pop_back();
}
bool Formula::referencesReg(const SCEV *S) const {
return S == ScaledReg ||
std::find(BaseRegs.begin(), BaseRegs.end(), S) != BaseRegs.end();
}
bool Formula::hasRegsUsedByUsesOtherThan(size_t LUIdx,
const RegUseTracker &RegUses) const {
if (ScaledReg)
if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx))
return true;
for (SmallVectorImpl<const SCEV *>::const_iterator I = BaseRegs.begin(),
E = BaseRegs.end(); I != E; ++I)
if (RegUses.isRegUsedByUsesOtherThan(*I, LUIdx))
return true;
return false;
}
void Formula::print(raw_ostream &OS) const {
bool First = true;
if (BaseGV) {
if (!First) OS << " + "; else First = false;
BaseGV->printAsOperand(OS, false);
}
if (BaseOffset != 0) {
if (!First) OS << " + "; else First = false;
OS << BaseOffset;
}
for (SmallVectorImpl<const SCEV *>::const_iterator I = BaseRegs.begin(),
E = BaseRegs.end(); I != E; ++I) {
if (!First) OS << " + "; else First = false;
OS << "reg(" << **I << ')';
}
if (HasBaseReg && BaseRegs.empty()) {
if (!First) OS << " + "; else First = false;
OS << "**error: HasBaseReg**";
} else if (!HasBaseReg && !BaseRegs.empty()) {
if (!First) OS << " + "; else First = false;
OS << "**error: !HasBaseReg**";
}
if (Scale != 0) {
if (!First) OS << " + "; else First = false;
OS << Scale << "*reg(";
if (ScaledReg)
OS << *ScaledReg;
else
OS << "<unknown>";
OS << ')';
}
if (UnfoldedOffset != 0) {
if (!First) OS << " + ";
OS << "imm(" << UnfoldedOffset << ')';
}
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void Formula::dump() const {
print(errs()); errs() << '\n';
}
#endif
static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
Type *WideTy =
IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(AR->getType()) + 1);
return isa<SCEVAddRecExpr>(SE.getSignExtendExpr(AR, WideTy));
}
static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) {
Type *WideTy =
IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(A->getType()) + 1);
return isa<SCEVAddExpr>(SE.getSignExtendExpr(A, WideTy));
}
static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE) {
Type *WideTy =
IntegerType::get(SE.getContext(),
SE.getTypeSizeInBits(M->getType()) * M->getNumOperands());
return isa<SCEVMulExpr>(SE.getSignExtendExpr(M, WideTy));
}
static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
ScalarEvolution &SE,
bool IgnoreSignificantBits = false) {
if (LHS == RHS)
return SE.getConstant(LHS->getType(), 1);
const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS);
if (RC) {
const APInt &RA = RC->getValue()->getValue();
if (RA.isAllOnesValue())
return SE.getMulExpr(LHS, RC);
if (RA == 1)
return LHS;
}
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(LHS)) {
if (!RC)
return nullptr;
const APInt &LA = C->getValue()->getValue();
const APInt &RA = RC->getValue()->getValue();
if (LA.srem(RA) != 0)
return nullptr;
return SE.getConstant(LA.sdiv(RA));
}
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) {
if (IgnoreSignificantBits || isAddRecSExtable(AR, SE)) {
const SCEV *Step = getExactSDiv(AR->getStepRecurrence(SE), RHS, SE,
IgnoreSignificantBits);
if (!Step) return nullptr;
const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE,
IgnoreSignificantBits);
if (!Start) return nullptr;
return SE.getAddRecExpr(Start, Step, AR->getLoop(), SCEV::FlagAnyWrap);
}
return nullptr;
}
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(LHS)) {
if (IgnoreSignificantBits || isAddSExtable(Add, SE)) {
SmallVector<const SCEV *, 8> Ops;
for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
I != E; ++I) {
const SCEV *Op = getExactSDiv(*I, RHS, SE,
IgnoreSignificantBits);
if (!Op) return nullptr;
Ops.push_back(Op);
}
return SE.getAddExpr(Ops);
}
return nullptr;
}
if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS)) {
if (IgnoreSignificantBits || isMulSExtable(Mul, SE)) {
SmallVector<const SCEV *, 4> Ops;
bool Found = false;
for (SCEVMulExpr::op_iterator I = Mul->op_begin(), E = Mul->op_end();
I != E; ++I) {
const SCEV *S = *I;
if (!Found)
if (const SCEV *Q = getExactSDiv(S, RHS, SE,
IgnoreSignificantBits)) {
S = Q;
Found = true;
}
Ops.push_back(S);
}
return Found ? SE.getMulExpr(Ops) : nullptr;
}
return nullptr;
}
return nullptr;
}
static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) {
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
if (C->getValue()->getValue().getMinSignedBits() <= 64) {
S = SE.getConstant(C->getType(), 0);
return C->getValue()->getSExtValue();
}
} else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end());
int64_t Result = ExtractImmediate(NewOps.front(), SE);
if (Result != 0)
S = SE.getAddExpr(NewOps);
return Result;
} else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
int64_t Result = ExtractImmediate(NewOps.front(), SE);
if (Result != 0)
S = SE.getAddRecExpr(NewOps, AR->getLoop(),
SCEV::FlagAnyWrap);
return Result;
}
return 0;
}
static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) {
if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) {
S = SE.getConstant(GV->getType(), 0);
return GV;
}
} else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end());
GlobalValue *Result = ExtractSymbol(NewOps.back(), SE);
if (Result)
S = SE.getAddExpr(NewOps);
return Result;
} else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
GlobalValue *Result = ExtractSymbol(NewOps.front(), SE);
if (Result)
S = SE.getAddRecExpr(NewOps, AR->getLoop(),
SCEV::FlagAnyWrap);
return Result;
}
return nullptr;
}
static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
bool isAddress = isa<LoadInst>(Inst);
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
if (SI->getOperand(1) == OperandVal)
isAddress = true;
} else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
switch (II->getIntrinsicID()) {
default: break;
case Intrinsic::prefetch:
case Intrinsic::x86_sse_storeu_ps:
case Intrinsic::x86_sse2_storeu_pd:
case Intrinsic::x86_sse2_storeu_dq:
case Intrinsic::x86_sse2_storel_dq:
if (II->getArgOperand(0) == OperandVal)
isAddress = true;
break;
}
}
return isAddress;
}
static Type *getAccessType(const Instruction *Inst) {
Type *AccessTy = Inst->getType();
if (const StoreInst *SI = dyn_cast<StoreInst>(Inst))
AccessTy = SI->getOperand(0)->getType();
else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
switch (II->getIntrinsicID()) {
default: break;
case Intrinsic::x86_sse_storeu_ps:
case Intrinsic::x86_sse2_storeu_pd:
case Intrinsic::x86_sse2_storeu_dq:
case Intrinsic::x86_sse2_storel_dq:
AccessTy = II->getArgOperand(0)->getType();
break;
}
}
if (PointerType *PTy = dyn_cast<PointerType>(AccessTy))
AccessTy = PointerType::get(IntegerType::get(PTy->getContext(), 1),
PTy->getAddressSpace());
return AccessTy;
}
static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
for (BasicBlock::iterator I = AR->getLoop()->getHeader()->begin();
PHINode *PN = dyn_cast<PHINode>(I); ++I) {
if (SE.isSCEVable(PN->getType()) &&
(SE.getEffectiveSCEVType(PN->getType()) ==
SE.getEffectiveSCEVType(AR->getType())) &&
SE.getSCEV(PN) == AR)
return true;
}
return false;
}
static bool isHighCostExpansion(const SCEV *S,
SmallPtrSet<const SCEV*, 8> &Processed,
ScalarEvolution &SE) {
switch (S->getSCEVType()) {
case scUnknown:
case scConstant:
return false;
case scTruncate:
return isHighCostExpansion(cast<SCEVTruncateExpr>(S)->getOperand(),
Processed, SE);
case scZeroExtend:
return isHighCostExpansion(cast<SCEVZeroExtendExpr>(S)->getOperand(),
Processed, SE);
case scSignExtend:
return isHighCostExpansion(cast<SCEVSignExtendExpr>(S)->getOperand(),
Processed, SE);
}
if (!Processed.insert(S))
return false;
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
I != E; ++I) {
if (isHighCostExpansion(*I, Processed, SE))
return true;
}
return false;
}
if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
if (Mul->getNumOperands() == 2) {
if (isa<SCEVConstant>(Mul->getOperand(0)))
return isHighCostExpansion(Mul->getOperand(1), Processed, SE);
if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Mul->getOperand(1))) {
Value *UVal = U->getValue();
for (User *UR : UVal->users()) {
Instruction *UI = dyn_cast<Instruction>(UR);
if (UI && UI->getOpcode() == Instruction::Mul &&
SE.isSCEVable(UI->getType())) {
return SE.getSCEV(UI) == Mul;
}
}
}
}
}
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
if (isExistingPhi(AR, SE))
return false;
}
return true;
}
static bool
DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) {
bool Changed = false;
while (!DeadInsts.empty()) {
Value *V = DeadInsts.pop_back_val();
Instruction *I = dyn_cast_or_null<Instruction>(V);
if (!I || !isInstructionTriviallyDead(I))
continue;
for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
if (Instruction *U = dyn_cast<Instruction>(*OI)) {
*OI = nullptr;
if (U->use_empty())
DeadInsts.push_back(U);
}
I->eraseFromParent();
Changed = true;
}
return Changed;
}
namespace {
class LSRUse;
}
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
const LSRUse &LU, const Formula &F);
static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
const LSRUse &LU, const Formula &F);
namespace {
class Cost {
unsigned NumRegs;
unsigned AddRecCost;
unsigned NumIVMuls;
unsigned NumBaseAdds;
unsigned ImmCost;
unsigned SetupCost;
unsigned ScaleCost;
public:
Cost()
: NumRegs(0), AddRecCost(0), NumIVMuls(0), NumBaseAdds(0), ImmCost(0),
SetupCost(0), ScaleCost(0) {}
bool operator<(const Cost &Other) const;
void Lose();
#ifndef NDEBUG
bool isValid() {
return ((NumRegs | AddRecCost | NumIVMuls | NumBaseAdds
| ImmCost | SetupCost | ScaleCost) != ~0u)
|| ((NumRegs & AddRecCost & NumIVMuls & NumBaseAdds
& ImmCost & SetupCost & ScaleCost) == ~0u);
}
#endif
bool isLoser() {
assert(isValid() && "invalid cost");
return NumRegs == ~0u;
}
void RateFormula(const TargetTransformInfo &TTI,
const Formula &F,
SmallPtrSet<const SCEV *, 16> &Regs,
const DenseSet<const SCEV *> &VisitedRegs,
const Loop *L,
const SmallVectorImpl<int64_t> &Offsets,
ScalarEvolution &SE, DominatorTree &DT,
const LSRUse &LU,
SmallPtrSet<const SCEV *, 16> *LoserRegs = nullptr);
void print(raw_ostream &OS) const;
void dump() const;
private:
void RateRegister(const SCEV *Reg,
SmallPtrSet<const SCEV *, 16> &Regs,
const Loop *L,
ScalarEvolution &SE, DominatorTree &DT);
void RatePrimaryRegister(const SCEV *Reg,
SmallPtrSet<const SCEV *, 16> &Regs,
const Loop *L,
ScalarEvolution &SE, DominatorTree &DT,
SmallPtrSet<const SCEV *, 16> *LoserRegs);
};
}
void Cost::RateRegister(const SCEV *Reg,
SmallPtrSet<const SCEV *, 16> &Regs,
const Loop *L,
ScalarEvolution &SE, DominatorTree &DT) {
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) {
if (AR->getLoop() != L) {
if (isExistingPhi(AR, SE))
return;
Lose();
return;
}
AddRecCost += 1;
if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1))) {
if (!Regs.count(AR->getOperand(1))) {
RateRegister(AR->getOperand(1), Regs, L, SE, DT);
if (isLoser())
return;
}
}
}
++NumRegs;
if (!isa<SCEVUnknown>(Reg) &&
!isa<SCEVConstant>(Reg) &&
!(isa<SCEVAddRecExpr>(Reg) &&
(isa<SCEVUnknown>(cast<SCEVAddRecExpr>(Reg)->getStart()) ||
isa<SCEVConstant>(cast<SCEVAddRecExpr>(Reg)->getStart()))))
++SetupCost;
NumIVMuls += isa<SCEVMulExpr>(Reg) &&
SE.hasComputableLoopEvolution(Reg, L);
}
void Cost::RatePrimaryRegister(const SCEV *Reg,
SmallPtrSet<const SCEV *, 16> &Regs,
const Loop *L,
ScalarEvolution &SE, DominatorTree &DT,
SmallPtrSet<const SCEV *, 16> *LoserRegs) {
if (LoserRegs && LoserRegs->count(Reg)) {
Lose();
return;
}
if (Regs.insert(Reg)) {
RateRegister(Reg, Regs, L, SE, DT);
if (LoserRegs && isLoser())
LoserRegs->insert(Reg);
}
}
void Cost::RateFormula(const TargetTransformInfo &TTI,
const Formula &F,
SmallPtrSet<const SCEV *, 16> &Regs,
const DenseSet<const SCEV *> &VisitedRegs,
const Loop *L,
const SmallVectorImpl<int64_t> &Offsets,
ScalarEvolution &SE, DominatorTree &DT,
const LSRUse &LU,
SmallPtrSet<const SCEV *, 16> *LoserRegs) {
assert(F.isCanonical() && "Cost is accurate only for canonical formula");
if (const SCEV *ScaledReg = F.ScaledReg) {
if (VisitedRegs.count(ScaledReg)) {
Lose();
return;
}
RatePrimaryRegister(ScaledReg, Regs, L, SE, DT, LoserRegs);
if (isLoser())
return;
}
for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
E = F.BaseRegs.end(); I != E; ++I) {
const SCEV *BaseReg = *I;
if (VisitedRegs.count(BaseReg)) {
Lose();
return;
}
RatePrimaryRegister(BaseReg, Regs, L, SE, DT, LoserRegs);
if (isLoser())
return;
}
size_t NumBaseParts = F.getNumRegs();
if (NumBaseParts > 1)
NumBaseAdds +=
NumBaseParts - (1 + (F.Scale && isAMCompletelyFolded(TTI, LU, F)));
NumBaseAdds += (F.UnfoldedOffset != 0);
ScaleCost += getScalingFactorCost(TTI, LU, F);
for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(),
E = Offsets.end(); I != E; ++I) {
int64_t Offset = (uint64_t)*I + F.BaseOffset;
if (F.BaseGV)
ImmCost += 64; else if (Offset != 0)
ImmCost += APInt(64, Offset, true).getMinSignedBits();
}
assert(isValid() && "invalid cost");
}
void Cost::Lose() {
NumRegs = ~0u;
AddRecCost = ~0u;
NumIVMuls = ~0u;
NumBaseAdds = ~0u;
ImmCost = ~0u;
SetupCost = ~0u;
ScaleCost = ~0u;
}
bool Cost::operator<(const Cost &Other) const {
return std::tie(NumRegs, AddRecCost, NumIVMuls, NumBaseAdds, ScaleCost,
ImmCost, SetupCost) <
std::tie(Other.NumRegs, Other.AddRecCost, Other.NumIVMuls,
Other.NumBaseAdds, Other.ScaleCost, Other.ImmCost,
Other.SetupCost);
}
void Cost::print(raw_ostream &OS) const {
OS << NumRegs << " reg" << (NumRegs == 1 ? "" : "s");
if (AddRecCost != 0)
OS << ", with addrec cost " << AddRecCost;
if (NumIVMuls != 0)
OS << ", plus " << NumIVMuls << " IV mul" << (NumIVMuls == 1 ? "" : "s");
if (NumBaseAdds != 0)
OS << ", plus " << NumBaseAdds << " base add"
<< (NumBaseAdds == 1 ? "" : "s");
if (ScaleCost != 0)
OS << ", plus " << ScaleCost << " scale cost";
if (ImmCost != 0)
OS << ", plus " << ImmCost << " imm cost";
if (SetupCost != 0)
OS << ", plus " << SetupCost << " setup cost";
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void Cost::dump() const {
print(errs()); errs() << '\n';
}
#endif
namespace {
struct LSRFixup {
Instruction *UserInst;
Value *OperandValToReplace;
PostIncLoopSet PostIncLoops;
size_t LUIdx;
int64_t Offset;
bool isUseFullyOutsideLoop(const Loop *L) const;
LSRFixup();
void print(raw_ostream &OS) const;
void dump() const;
};
}
LSRFixup::LSRFixup()
: UserInst(nullptr), OperandValToReplace(nullptr), LUIdx(~size_t(0)),
Offset(0) {}
bool LSRFixup::isUseFullyOutsideLoop(const Loop *L) const {
if (const PHINode *PN = dyn_cast<PHINode>(UserInst)) {
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
if (PN->getIncomingValue(i) == OperandValToReplace &&
L->contains(PN->getIncomingBlock(i)))
return false;
return true;
}
return !L->contains(UserInst);
}
void LSRFixup::print(raw_ostream &OS) const {
OS << "UserInst=";
if (StoreInst *Store = dyn_cast<StoreInst>(UserInst)) {
OS << "store ";
Store->getOperand(0)->printAsOperand(OS, false);
} else if (UserInst->getType()->isVoidTy())
OS << UserInst->getOpcodeName();
else
UserInst->printAsOperand(OS, false);
OS << ", OperandValToReplace=";
OperandValToReplace->printAsOperand(OS, false);
for (PostIncLoopSet::const_iterator I = PostIncLoops.begin(),
E = PostIncLoops.end(); I != E; ++I) {
OS << ", PostIncLoop=";
(*I)->getHeader()->printAsOperand(OS, false);
}
if (LUIdx != ~size_t(0))
OS << ", LUIdx=" << LUIdx;
if (Offset != 0)
OS << ", Offset=" << Offset;
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void LSRFixup::dump() const {
print(errs()); errs() << '\n';
}
#endif
namespace {
struct UniquifierDenseMapInfo {
static SmallVector<const SCEV *, 4> getEmptyKey() {
SmallVector<const SCEV *, 4> V;
V.push_back(reinterpret_cast<const SCEV *>(-1));
return V;
}
static SmallVector<const SCEV *, 4> getTombstoneKey() {
SmallVector<const SCEV *, 4> V;
V.push_back(reinterpret_cast<const SCEV *>(-2));
return V;
}
static unsigned getHashValue(const SmallVector<const SCEV *, 4> &V) {
return static_cast<unsigned>(hash_combine_range(V.begin(), V.end()));
}
static bool isEqual(const SmallVector<const SCEV *, 4> &LHS,
const SmallVector<const SCEV *, 4> &RHS) {
return LHS == RHS;
}
};
class LSRUse {
DenseSet<SmallVector<const SCEV *, 4>, UniquifierDenseMapInfo> Uniquifier;
public:
enum KindType {
Basic, Special, Address, ICmpZero };
typedef PointerIntPair<const SCEV *, 2, KindType> SCEVUseKindPair;
KindType Kind;
Type *AccessTy;
SmallVector<int64_t, 8> Offsets;
int64_t MinOffset;
int64_t MaxOffset;
bool AllFixupsOutsideLoop;
bool RigidFormula;
Type *WidestFixupType;
SmallVector<Formula, 12> Formulae;
SmallPtrSet<const SCEV *, 4> Regs;
LSRUse(KindType K, Type *T) : Kind(K), AccessTy(T),
MinOffset(INT64_MAX),
MaxOffset(INT64_MIN),
AllFixupsOutsideLoop(true),
RigidFormula(false),
WidestFixupType(nullptr) {}
bool HasFormulaWithSameRegs(const Formula &F) const;
bool InsertFormula(const Formula &F);
void DeleteFormula(Formula &F);
void RecomputeRegs(size_t LUIdx, RegUseTracker &Reguses);
void print(raw_ostream &OS) const;
void dump() const;
};
}
bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
SmallVector<const SCEV *, 4> Key = F.BaseRegs;
if (F.ScaledReg) Key.push_back(F.ScaledReg);
std::sort(Key.begin(), Key.end());
return Uniquifier.count(Key);
}
bool LSRUse::InsertFormula(const Formula &F) {
assert(F.isCanonical() && "Invalid canonical representation");
if (!Formulae.empty() && RigidFormula)
return false;
SmallVector<const SCEV *, 4> Key = F.BaseRegs;
if (F.ScaledReg) Key.push_back(F.ScaledReg);
std::sort(Key.begin(), Key.end());
if (!Uniquifier.insert(Key).second)
return false;
assert((!F.ScaledReg || !F.ScaledReg->isZero()) &&
"Zero allocated in a scaled register!");
#ifndef NDEBUG
for (SmallVectorImpl<const SCEV *>::const_iterator I =
F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I)
assert(!(*I)->isZero() && "Zero allocated in a base register!");
#endif
Formulae.push_back(F);
Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
if (F.ScaledReg)
Regs.insert(F.ScaledReg);
return true;
}
void LSRUse::DeleteFormula(Formula &F) {
if (&F != &Formulae.back())
std::swap(F, Formulae.back());
Formulae.pop_back();
}
void LSRUse::RecomputeRegs(size_t LUIdx, RegUseTracker &RegUses) {
SmallPtrSet<const SCEV *, 4> OldRegs = Regs;
Regs.clear();
for (SmallVectorImpl<Formula>::const_iterator I = Formulae.begin(),
E = Formulae.end(); I != E; ++I) {
const Formula &F = *I;
if (F.ScaledReg) Regs.insert(F.ScaledReg);
Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
}
for (SmallPtrSet<const SCEV *, 4>::iterator I = OldRegs.begin(),
E = OldRegs.end(); I != E; ++I)
if (!Regs.count(*I))
RegUses.DropRegister(*I, LUIdx);
}
void LSRUse::print(raw_ostream &OS) const {
OS << "LSR Use: Kind=";
switch (Kind) {
case Basic: OS << "Basic"; break;
case Special: OS << "Special"; break;
case ICmpZero: OS << "ICmpZero"; break;
case Address:
OS << "Address of ";
if (AccessTy->isPointerTy())
OS << "pointer"; else
OS << *AccessTy;
}
OS << ", Offsets={";
for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(),
E = Offsets.end(); I != E; ++I) {
OS << *I;
if (std::next(I) != E)
OS << ',';
}
OS << '}';
if (AllFixupsOutsideLoop)
OS << ", all-fixups-outside-loop";
if (WidestFixupType)
OS << ", widest fixup type: " << *WidestFixupType;
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void LSRUse::dump() const {
print(errs()); errs() << '\n';
}
#endif
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
LSRUse::KindType Kind, Type *AccessTy,
GlobalValue *BaseGV, int64_t BaseOffset,
bool HasBaseReg, int64_t Scale) {
switch (Kind) {
case LSRUse::Address:
return TTI.isLegalAddressingMode(AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale);
case LSRUse::ICmpZero:
if (BaseGV)
return false;
if (Scale != 0 && HasBaseReg && BaseOffset != 0)
return false;
if (Scale != 0 && Scale != -1)
return false;
if (BaseOffset != 0) {
if (Scale == 0)
BaseOffset = -(uint64_t)BaseOffset;
return TTI.isLegalICmpImmediate(BaseOffset);
}
return true;
case LSRUse::Basic:
return !BaseGV && Scale == 0 && BaseOffset == 0;
case LSRUse::Special:
return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset == 0;
}
llvm_unreachable("Invalid LSRUse Kind!");
}
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
int64_t MinOffset, int64_t MaxOffset,
LSRUse::KindType Kind, Type *AccessTy,
GlobalValue *BaseGV, int64_t BaseOffset,
bool HasBaseReg, int64_t Scale) {
if (((int64_t)((uint64_t)BaseOffset + MinOffset) > BaseOffset) !=
(MinOffset > 0))
return false;
MinOffset = (uint64_t)BaseOffset + MinOffset;
if (((int64_t)((uint64_t)BaseOffset + MaxOffset) > BaseOffset) !=
(MaxOffset > 0))
return false;
MaxOffset = (uint64_t)BaseOffset + MaxOffset;
return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, MinOffset,
HasBaseReg, Scale) &&
isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, MaxOffset,
HasBaseReg, Scale);
}
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
int64_t MinOffset, int64_t MaxOffset,
LSRUse::KindType Kind, Type *AccessTy,
const Formula &F) {
assert((F.isCanonical() || F.Scale != 0));
return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy,
F.BaseGV, F.BaseOffset, F.HasBaseReg, F.Scale);
}
static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy,
GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg,
int64_t Scale) {
return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
BaseOffset, HasBaseReg, Scale) ||
(Scale == 1 &&
isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy,
BaseGV, BaseOffset, true, 0));
}
static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy,
const Formula &F) {
return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, F.BaseGV,
F.BaseOffset, F.HasBaseReg, F.Scale);
}
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
const LSRUse &LU, const Formula &F) {
return isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,
LU.AccessTy, F.BaseGV, F.BaseOffset, F.HasBaseReg,
F.Scale);
}
static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
const LSRUse &LU, const Formula &F) {
if (!F.Scale)
return 0;
if (!isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,
LU.AccessTy, F))
return F.Scale != 1;
switch (LU.Kind) {
case LSRUse::Address: {
int ScaleCostMinOffset =
TTI.getScalingFactorCost(LU.AccessTy, F.BaseGV,
F.BaseOffset + LU.MinOffset,
F.HasBaseReg, F.Scale);
int ScaleCostMaxOffset =
TTI.getScalingFactorCost(LU.AccessTy, F.BaseGV,
F.BaseOffset + LU.MaxOffset,
F.HasBaseReg, F.Scale);
assert(ScaleCostMinOffset >= 0 && ScaleCostMaxOffset >= 0 &&
"Legal addressing mode has an illegal cost!");
return std::max(ScaleCostMinOffset, ScaleCostMaxOffset);
}
case LSRUse::ICmpZero:
case LSRUse::Basic:
case LSRUse::Special:
return 0;
}
llvm_unreachable("Invalid LSRUse Kind!");
}
static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
LSRUse::KindType Kind, Type *AccessTy,
GlobalValue *BaseGV, int64_t BaseOffset,
bool HasBaseReg) {
if (BaseOffset == 0 && !BaseGV) return true;
int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
if (!HasBaseReg && Scale == 1) {
Scale = 0;
HasBaseReg = true;
}
return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, BaseOffset,
HasBaseReg, Scale);
}
static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
ScalarEvolution &SE, int64_t MinOffset,
int64_t MaxOffset, LSRUse::KindType Kind,
Type *AccessTy, const SCEV *S, bool HasBaseReg) {
if (S->isZero()) return true;
int64_t BaseOffset = ExtractImmediate(S, SE);
GlobalValue *BaseGV = ExtractSymbol(S, SE);
if (!S->isZero()) return false;
if (BaseOffset == 0 && !BaseGV) return true;
int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
BaseOffset, HasBaseReg, Scale);
}
namespace {
struct IVInc {
Instruction *UserInst;
Value* IVOperand;
const SCEV *IncExpr;
IVInc(Instruction *U, Value *O, const SCEV *E):
UserInst(U), IVOperand(O), IncExpr(E) {}
};
struct IVChain {
SmallVector<IVInc,1> Incs;
const SCEV *ExprBase;
IVChain() : ExprBase(nullptr) {}
IVChain(const IVInc &Head, const SCEV *Base)
: Incs(1, Head), ExprBase(Base) {}
typedef SmallVectorImpl<IVInc>::const_iterator const_iterator;
const_iterator begin() const {
assert(!Incs.empty());
return std::next(Incs.begin());
}
const_iterator end() const {
return Incs.end();
}
bool hasIncs() const { return Incs.size() >= 2; }
void add(const IVInc &X) { Incs.push_back(X); }
Instruction *tailUserInst() const { return Incs.back().UserInst; }
bool isProfitableIncrement(const SCEV *OperExpr,
const SCEV *IncExpr,
ScalarEvolution&);
};
struct ChainUsers {
SmallPtrSet<Instruction*, 4> FarUsers;
SmallPtrSet<Instruction*, 4> NearUsers;
};
class LSRInstance {
IVUsers &IU;
ScalarEvolution &SE;
DominatorTree &DT;
LoopInfo &LI;
const TargetTransformInfo &TTI;
Loop *const L;
bool Changed;
Instruction *IVIncInsertPos;
SmallSetVector<int64_t, 8> Factors;
SmallSetVector<Type *, 4> Types;
SmallVector<LSRFixup, 16> Fixups;
SmallVector<LSRUse, 16> Uses;
RegUseTracker RegUses;
static const unsigned MaxChains = 8;
SmallVector<IVChain, MaxChains> IVChainVec;
SmallPtrSet<Use*, MaxChains> IVIncSet;
void OptimizeShadowIV();
bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse);
ICmpInst *OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse);
void OptimizeLoopTermCond();
void ChainInstruction(Instruction *UserInst, Instruction *IVOper,
SmallVectorImpl<ChainUsers> &ChainUsersVec);
void FinalizeChain(IVChain &Chain);
void CollectChains();
void GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
SmallVectorImpl<WeakVH> &DeadInsts);
void CollectInterestingTypesAndFactors();
void CollectFixupsAndInitialFormulae();
LSRFixup &getNewFixup() {
Fixups.push_back(LSRFixup());
return Fixups.back();
}
typedef DenseMap<LSRUse::SCEVUseKindPair, size_t> UseMapTy;
UseMapTy UseMap;
bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
LSRUse::KindType Kind, Type *AccessTy);
std::pair<size_t, int64_t> getUse(const SCEV *&Expr,
LSRUse::KindType Kind,
Type *AccessTy);
void DeleteUse(LSRUse &LU, size_t LUIdx);
LSRUse *FindUseWithSimilarFormula(const Formula &F, const LSRUse &OrigLU);
void InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
void InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
void CountRegisters(const Formula &F, size_t LUIdx);
bool InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F);
void CollectLoopInvariantFixupsAndFormulae();
void GenerateReassociations(LSRUse &LU, unsigned LUIdx, Formula Base,
unsigned Depth = 0);
void GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
const Formula &Base, unsigned Depth,
size_t Idx, bool IsScaledReg = false);
void GenerateCombinations(LSRUse &LU, unsigned LUIdx, Formula Base);
void GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx,
const Formula &Base, size_t Idx,
bool IsScaledReg = false);
void GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
void GenerateConstantOffsetsImpl(LSRUse &LU, unsigned LUIdx,
const Formula &Base,
const SmallVectorImpl<int64_t> &Worklist,
size_t Idx, bool IsScaledReg = false);
void GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
void GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, Formula Base);
void GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base);
void GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base);
void GenerateCrossUseConstantOffsets();
void GenerateAllReuseFormulae();
void FilterOutUndesirableDedicatedRegisters();
size_t EstimateSearchSpaceComplexity() const;
void NarrowSearchSpaceByDetectingSupersets();
void NarrowSearchSpaceByCollapsingUnrolledCode();
void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
void NarrowSearchSpaceByPickingWinnerRegs();
void NarrowSearchSpaceUsingHeuristics();
void SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
Cost &SolutionCost,
SmallVectorImpl<const Formula *> &Workspace,
const Cost &CurCost,
const SmallPtrSet<const SCEV *, 16> &CurRegs,
DenseSet<const SCEV *> &VisitedRegs) const;
void Solve(SmallVectorImpl<const Formula *> &Solution) const;
BasicBlock::iterator
HoistInsertPosition(BasicBlock::iterator IP,
const SmallVectorImpl<Instruction *> &Inputs) const;
BasicBlock::iterator
AdjustInsertPositionForExpand(BasicBlock::iterator IP,
const LSRFixup &LF,
const LSRUse &LU,
SCEVExpander &Rewriter) const;
Value *Expand(const LSRFixup &LF,
const Formula &F,
BasicBlock::iterator IP,
SCEVExpander &Rewriter,
SmallVectorImpl<WeakVH> &DeadInsts) const;
void RewriteForPHI(PHINode *PN, const LSRFixup &LF,
const Formula &F,
SCEVExpander &Rewriter,
SmallVectorImpl<WeakVH> &DeadInsts,
Pass *P) const;
void Rewrite(const LSRFixup &LF,
const Formula &F,
SCEVExpander &Rewriter,
SmallVectorImpl<WeakVH> &DeadInsts,
Pass *P) const;
void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
Pass *P);
public:
LSRInstance(Loop *L, Pass *P);
bool getChanged() const { return Changed; }
void print_factors_and_types(raw_ostream &OS) const;
void print_fixups(raw_ostream &OS) const;
void print_uses(raw_ostream &OS) const;
void print(raw_ostream &OS) const;
void dump() const;
};
}
void LSRInstance::OptimizeShadowIV() {
const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
return;
for (IVUsers::const_iterator UI = IU.begin(), E = IU.end();
UI != E; ) {
IVUsers::const_iterator CandidateUI = UI;
++UI;
Instruction *ShadowUse = CandidateUI->getUser();
Type *DestTy = nullptr;
bool IsSigned = false;
if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser())) {
IsSigned = false;
DestTy = UCast->getDestTy();
}
else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser())) {
IsSigned = true;
DestTy = SCast->getDestTy();
}
if (!DestTy) continue;
if (!TTI.isTypeLegal(DestTy)) continue;
PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0));
if (!PH) continue;
if (PH->getNumIncomingValues() != 2) continue;
Type *SrcTy = PH->getType();
int Mantissa = DestTy->getFPMantissaWidth();
if (Mantissa == -1) continue;
if ((int)SE.getTypeSizeInBits(SrcTy) > Mantissa)
continue;
unsigned Entry, Latch;
if (PH->getIncomingBlock(0) == L->getLoopPreheader()) {
Entry = 0;
Latch = 1;
} else {
Entry = 1;
Latch = 0;
}
ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry));
if (!Init) continue;
Constant *NewInit = ConstantFP::get(DestTy, IsSigned ?
(double)Init->getSExtValue() :
(double)Init->getZExtValue());
BinaryOperator *Incr =
dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch));
if (!Incr) continue;
if (Incr->getOpcode() != Instruction::Add
&& Incr->getOpcode() != Instruction::Sub)
continue;
ConstantInt *C = nullptr;
if (Incr->getOperand(0) == PH)
C = dyn_cast<ConstantInt>(Incr->getOperand(1));
else if (Incr->getOperand(1) == PH)
C = dyn_cast<ConstantInt>(Incr->getOperand(0));
else
continue;
if (!C) continue;
if (!C->getValue().isStrictlyPositive()) continue;
PHINode *NewPH = PHINode::Create(DestTy, 2, "IV.S.", PH);
Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue());
BinaryOperator *NewIncr =
BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ?
Instruction::FAdd : Instruction::FSub,
NewPH, CFP, "IV.S.next.", Incr);
NewPH->addIncoming(NewInit, PH->getIncomingBlock(Entry));
NewPH->addIncoming(NewIncr, PH->getIncomingBlock(Latch));
ShadowUse->replaceAllUsesWith(NewPH);
ShadowUse->eraseFromParent();
Changed = true;
break;
}
}
bool LSRInstance::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse) {
for (IVUsers::iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI)
if (UI->getUser() == Cond) {
CondUse = UI;
return true;
}
return false;
}
ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
if (Cond->getPredicate() != CmpInst::ICMP_EQ &&
Cond->getPredicate() != CmpInst::ICMP_NE)
return Cond;
SelectInst *Sel = dyn_cast<SelectInst>(Cond->getOperand(1));
if (!Sel || !Sel->hasOneUse()) return Cond;
const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
return Cond;
const SCEV *One = SE.getConstant(BackedgeTakenCount->getType(), 1);
const SCEV *IterationCount = SE.getAddExpr(One, BackedgeTakenCount);
if (IterationCount != SE.getSCEV(Sel)) return Cond;
CmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
const SCEVNAryExpr *Max = nullptr;
if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(BackedgeTakenCount)) {
Pred = ICmpInst::ICMP_SLE;
Max = S;
} else if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(IterationCount)) {
Pred = ICmpInst::ICMP_SLT;
Max = S;
} else if (const SCEVUMaxExpr *U = dyn_cast<SCEVUMaxExpr>(IterationCount)) {
Pred = ICmpInst::ICMP_ULT;
Max = U;
} else {
return Cond;
}
if (Max->getNumOperands() != 2)
return Cond;
const SCEV *MaxLHS = Max->getOperand(0);
const SCEV *MaxRHS = Max->getOperand(1);
if (!MaxLHS ||
(ICmpInst::isTrueWhenEqual(Pred) ? !MaxLHS->isZero() : (MaxLHS != One)))
return Cond;
const SCEV *IV = SE.getSCEV(Cond->getOperand(0));
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
if (!AR || !AR->isAffine() ||
AR->getStart() != One ||
AR->getStepRecurrence(SE) != One)
return Cond;
assert(AR->getLoop() == L &&
"Loop condition operand is an addrec in a different loop!");
Value *NewRHS = nullptr;
if (ICmpInst::isTrueWhenEqual(Pred)) {
if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(1)))
if (ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS)
NewRHS = BO->getOperand(0);
if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(2)))
if (ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS)
NewRHS = BO->getOperand(0);
if (!NewRHS)
return Cond;
} else if (SE.getSCEV(Sel->getOperand(1)) == MaxRHS)
NewRHS = Sel->getOperand(1);
else if (SE.getSCEV(Sel->getOperand(2)) == MaxRHS)
NewRHS = Sel->getOperand(2);
else if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(MaxRHS))
NewRHS = SU->getValue();
else
return Cond;
if (Cond->getPredicate() == CmpInst::ICMP_EQ)
Pred = CmpInst::getInversePredicate(Pred);
ICmpInst *NewCond =
new ICmpInst(Cond, Pred, Cond->getOperand(0), NewRHS, "scmp");
Cond->replaceAllUsesWith(NewCond);
CondUse->setUser(NewCond);
Instruction *Cmp = cast<Instruction>(Sel->getOperand(0));
Cond->eraseFromParent();
Sel->eraseFromParent();
if (Cmp->use_empty())
Cmp->eraseFromParent();
return NewCond;
}
void
LSRInstance::OptimizeLoopTermCond() {
SmallPtrSet<Instruction *, 4> PostIncs;
BasicBlock *LatchBlock = L->getLoopLatch();
SmallVector<BasicBlock*, 8> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
BasicBlock *ExitingBlock = ExitingBlocks[i];
BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
if (!TermBr)
continue;
if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition()))
continue;
IVStrideUse *CondUse = nullptr;
ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
if (!FindIVUserForCond(Cond, CondUse))
continue;
Cond = OptimizeMax(Cond, CondUse);
if (!DT.dominates(ExitingBlock, LatchBlock))
continue;
if (LatchBlock != ExitingBlock)
for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI)
if (&*UI != CondUse &&
!DT.properlyDominates(UI->getUser()->getParent(), ExitingBlock)) {
const SCEV *A = IU.getStride(*CondUse, L);
const SCEV *B = IU.getStride(*UI, L);
if (!A || !B) continue;
if (SE.getTypeSizeInBits(A->getType()) !=
SE.getTypeSizeInBits(B->getType())) {
if (SE.getTypeSizeInBits(A->getType()) >
SE.getTypeSizeInBits(B->getType()))
B = SE.getSignExtendExpr(B, A->getType());
else
A = SE.getSignExtendExpr(A, B->getType());
}
if (const SCEVConstant *D =
dyn_cast_or_null<SCEVConstant>(getExactSDiv(B, A, SE))) {
const ConstantInt *C = D->getValue();
if (C->isOne() || C->isAllOnesValue())
goto decline_post_inc;
if (C->getValue().getMinSignedBits() >= 64 ||
C->getValue().isMinSignedValue())
goto decline_post_inc;
Type *AccessTy = getAccessType(UI->getUser());
int64_t Scale = C->getSExtValue();
if (TTI.isLegalAddressingMode(AccessTy, nullptr,
0,
false, Scale))
goto decline_post_inc;
Scale = -Scale;
if (TTI.isLegalAddressingMode(AccessTy, nullptr,
0,
false, Scale))
goto decline_post_inc;
}
}
DEBUG(dbgs() << " Change loop exiting icmp to use postinc iv: "
<< *Cond << '\n');
if (&*++BasicBlock::iterator(Cond) != TermBr) {
if (Cond->hasOneUse()) {
Cond->moveBefore(TermBr);
} else {
ICmpInst *OldCond = Cond;
Cond = cast<ICmpInst>(Cond->clone());
Cond->setName(L->getHeader()->getName() + ".termcond");
ExitingBlock->getInstList().insert(TermBr, Cond);
CondUse = &IU.AddUser(Cond, CondUse->getOperandValToReplace());
TermBr->replaceUsesOfWith(OldCond, Cond);
}
}
CondUse->transformToPostInc(L);
Changed = true;
PostIncs.insert(Cond);
decline_post_inc:;
}
IVIncInsertPos = L->getLoopLatch()->getTerminator();
for (SmallPtrSet<Instruction *, 4>::const_iterator I = PostIncs.begin(),
E = PostIncs.end(); I != E; ++I) {
BasicBlock *BB =
DT.findNearestCommonDominator(IVIncInsertPos->getParent(),
(*I)->getParent());
if (BB == (*I)->getParent())
IVIncInsertPos = *I;
else if (BB != IVIncInsertPos->getParent())
IVIncInsertPos = BB->getTerminator();
}
}
bool
LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
LSRUse::KindType Kind, Type *AccessTy) {
int64_t NewMinOffset = LU.MinOffset;
int64_t NewMaxOffset = LU.MaxOffset;
Type *NewAccessTy = AccessTy;
if (LU.Kind != Kind)
return false;
if (Kind == LSRUse::Address && AccessTy != LU.AccessTy)
NewAccessTy = Type::getVoidTy(AccessTy->getContext());
if (NewOffset < LU.MinOffset) {
if (!isAlwaysFoldable(TTI, Kind, NewAccessTy, nullptr,
LU.MaxOffset - NewOffset, HasBaseReg))
return false;
NewMinOffset = NewOffset;
} else if (NewOffset > LU.MaxOffset) {
if (!isAlwaysFoldable(TTI, Kind, NewAccessTy, nullptr,
NewOffset - LU.MinOffset, HasBaseReg))
return false;
NewMaxOffset = NewOffset;
}
LU.MinOffset = NewMinOffset;
LU.MaxOffset = NewMaxOffset;
LU.AccessTy = NewAccessTy;
if (NewOffset != LU.Offsets.back())
LU.Offsets.push_back(NewOffset);
return true;
}
std::pair<size_t, int64_t>
LSRInstance::getUse(const SCEV *&Expr,
LSRUse::KindType Kind, Type *AccessTy) {
const SCEV *Copy = Expr;
int64_t Offset = ExtractImmediate(Expr, SE);
if (!isAlwaysFoldable(TTI, Kind, AccessTy, nullptr,
Offset, true)) {
Expr = Copy;
Offset = 0;
}
std::pair<UseMapTy::iterator, bool> P =
UseMap.insert(std::make_pair(LSRUse::SCEVUseKindPair(Expr, Kind), 0));
if (!P.second) {
size_t LUIdx = P.first->second;
LSRUse &LU = Uses[LUIdx];
if (reconcileNewOffset(LU, Offset, true, Kind, AccessTy))
return std::make_pair(LUIdx, Offset);
}
size_t LUIdx = Uses.size();
P.first->second = LUIdx;
Uses.push_back(LSRUse(Kind, AccessTy));
LSRUse &LU = Uses[LUIdx];
if (LU.Offsets.empty() || Offset != LU.Offsets.back())
LU.Offsets.push_back(Offset);
LU.MinOffset = Offset;
LU.MaxOffset = Offset;
return std::make_pair(LUIdx, Offset);
}
void LSRInstance::DeleteUse(LSRUse &LU, size_t LUIdx) {
if (&LU != &Uses.back())
std::swap(LU, Uses.back());
Uses.pop_back();
RegUses.SwapAndDropUse(LUIdx, Uses.size());
}
LSRUse *
LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF,
const LSRUse &OrigLU) {
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
LSRUse &LU = Uses[LUIdx];
if (&LU != &OrigLU &&
LU.Kind != LSRUse::ICmpZero &&
LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&
LU.WidestFixupType == OrigLU.WidestFixupType &&
LU.HasFormulaWithSameRegs(OrigF)) {
for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
E = LU.Formulae.end(); I != E; ++I) {
const Formula &F = *I;
if (F.BaseRegs == OrigF.BaseRegs &&
F.ScaledReg == OrigF.ScaledReg &&
F.BaseGV == OrigF.BaseGV &&
F.Scale == OrigF.Scale &&
F.UnfoldedOffset == OrigF.UnfoldedOffset) {
if (F.BaseOffset == 0)
return &LU;
break;
}
}
}
}
return nullptr;
}
void LSRInstance::CollectInterestingTypesAndFactors() {
SmallSetVector<const SCEV *, 4> Strides;
SmallVector<const SCEV *, 4> Worklist;
for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) {
const SCEV *Expr = IU.getExpr(*UI);
Types.insert(SE.getEffectiveSCEVType(Expr->getType()));
Worklist.push_back(Expr);
do {
const SCEV *S = Worklist.pop_back_val();
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
if (AR->getLoop() == L)
Strides.insert(AR->getStepRecurrence(SE));
Worklist.push_back(AR->getStart());
} else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
Worklist.append(Add->op_begin(), Add->op_end());
}
} while (!Worklist.empty());
}
for (SmallSetVector<const SCEV *, 4>::const_iterator
I = Strides.begin(), E = Strides.end(); I != E; ++I)
for (SmallSetVector<const SCEV *, 4>::const_iterator NewStrideIter =
std::next(I); NewStrideIter != E; ++NewStrideIter) {
const SCEV *OldStride = *I;
const SCEV *NewStride = *NewStrideIter;
if (SE.getTypeSizeInBits(OldStride->getType()) !=
SE.getTypeSizeInBits(NewStride->getType())) {
if (SE.getTypeSizeInBits(OldStride->getType()) >
SE.getTypeSizeInBits(NewStride->getType()))
NewStride = SE.getSignExtendExpr(NewStride, OldStride->getType());
else
OldStride = SE.getSignExtendExpr(OldStride, NewStride->getType());
}
if (const SCEVConstant *Factor =
dyn_cast_or_null<SCEVConstant>(getExactSDiv(NewStride, OldStride,
SE, true))) {
if (Factor->getValue()->getValue().getMinSignedBits() <= 64)
Factors.insert(Factor->getValue()->getValue().getSExtValue());
} else if (const SCEVConstant *Factor =
dyn_cast_or_null<SCEVConstant>(getExactSDiv(OldStride,
NewStride,
SE, true))) {
if (Factor->getValue()->getValue().getMinSignedBits() <= 64)
Factors.insert(Factor->getValue()->getValue().getSExtValue());
}
}
if (Types.size() == 1)
Types.clear();
DEBUG(print_factors_and_types(dbgs()));
}
static User::op_iterator
findIVOperand(User::op_iterator OI, User::op_iterator OE,
Loop *L, ScalarEvolution &SE) {
for(; OI != OE; ++OI) {
if (Instruction *Oper = dyn_cast<Instruction>(*OI)) {
if (!SE.isSCEVable(Oper->getType()))
continue;
if (const SCEVAddRecExpr *AR =
dyn_cast<SCEVAddRecExpr>(SE.getSCEV(Oper))) {
if (AR->getLoop() == L)
break;
}
}
}
return OI;
}
static Value *getWideOperand(Value *Oper) {
if (TruncInst *Trunc = dyn_cast<TruncInst>(Oper))
return Trunc->getOperand(0);
return Oper;
}
static bool isCompatibleIVType(Value *LVal, Value *RVal) {
Type *LType = LVal->getType();
Type *RType = RVal->getType();
return (LType == RType) || (LType->isPointerTy() && RType->isPointerTy());
}
static const SCEV *getExprBase(const SCEV *S) {
switch (S->getSCEVType()) {
default: return S;
case scConstant:
return nullptr;
case scTruncate:
return getExprBase(cast<SCEVTruncateExpr>(S)->getOperand());
case scZeroExtend:
return getExprBase(cast<SCEVZeroExtendExpr>(S)->getOperand());
case scSignExtend:
return getExprBase(cast<SCEVSignExtendExpr>(S)->getOperand());
case scAddExpr: {
const SCEVAddExpr *Add = cast<SCEVAddExpr>(S);
for (std::reverse_iterator<SCEVAddExpr::op_iterator> I(Add->op_end()),
E(Add->op_begin()); I != E; ++I) {
const SCEV *SubExpr = *I;
if (SubExpr->getSCEVType() == scAddExpr)
return getExprBase(SubExpr);
if (SubExpr->getSCEVType() != scMulExpr)
return SubExpr;
}
return S; }
case scAddRecExpr:
return getExprBase(cast<SCEVAddRecExpr>(S)->getStart());
}
}
bool IVChain::isProfitableIncrement(const SCEV *OperExpr,
const SCEV *IncExpr,
ScalarEvolution &SE) {
if (StressIVChain)
return true;
if (!isa<SCEVConstant>(IncExpr)) {
const SCEV *HeadExpr = SE.getSCEV(getWideOperand(Incs[0].IVOperand));
if (isa<SCEVConstant>(SE.getMinusSCEV(OperExpr, HeadExpr)))
return 0;
}
SmallPtrSet<const SCEV*, 8> Processed;
return !isHighCostExpansion(IncExpr, Processed, SE);
}
static bool
isProfitableChain(IVChain &Chain, SmallPtrSet<Instruction*, 4> &Users,
ScalarEvolution &SE, const TargetTransformInfo &TTI) {
if (StressIVChain)
return true;
if (!Chain.hasIncs())
return false;
if (!Users.empty()) {
DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " users:\n";
for (SmallPtrSet<Instruction*, 4>::const_iterator I = Users.begin(),
E = Users.end(); I != E; ++I) {
dbgs() << " " << **I << "\n";
});
return false;
}
assert(!Chain.Incs.empty() && "empty IV chains are not allowed");
int cost = 1;
if (isa<PHINode>(Chain.tailUserInst())
&& SE.getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) {
--cost;
}
const SCEV *LastIncExpr = nullptr;
unsigned NumConstIncrements = 0;
unsigned NumVarIncrements = 0;
unsigned NumReusedIncrements = 0;
for (IVChain::const_iterator I = Chain.begin(), E = Chain.end();
I != E; ++I) {
if (I->IncExpr->isZero())
continue;
if (isa<SCEVConstant>(I->IncExpr)) {
++NumConstIncrements;
continue;
}
if (I->IncExpr == LastIncExpr)
++NumReusedIncrements;
else
++NumVarIncrements;
LastIncExpr = I->IncExpr;
}
if (NumConstIncrements > 1)
--cost;
cost += NumVarIncrements;
cost -= NumReusedIncrements;
DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " Cost: " << cost
<< "\n");
return cost < 0;
}
void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
SmallVectorImpl<ChainUsers> &ChainUsersVec) {
Value *const NextIV = getWideOperand(IVOper);
const SCEV *const OperExpr = SE.getSCEV(NextIV);
const SCEV *const OperExprBase = getExprBase(OperExpr);
unsigned ChainIdx = 0, NChains = IVChainVec.size();
const SCEV *LastIncExpr = nullptr;
for (; ChainIdx < NChains; ++ChainIdx) {
IVChain &Chain = IVChainVec[ChainIdx];
if (!StressIVChain && Chain.ExprBase != OperExprBase)
continue;
Value *PrevIV = getWideOperand(Chain.Incs.back().IVOperand);
if (!isCompatibleIVType(PrevIV, NextIV))
continue;
if (isa<PHINode>(UserInst) && isa<PHINode>(Chain.tailUserInst()))
continue;
const SCEV *PrevExpr = SE.getSCEV(PrevIV);
const SCEV *IncExpr = SE.getMinusSCEV(OperExpr, PrevExpr);
if (!SE.isLoopInvariant(IncExpr, L))
continue;
if (Chain.isProfitableIncrement(OperExpr, IncExpr, SE)) {
LastIncExpr = IncExpr;
break;
}
}
if (ChainIdx == NChains) {
if (isa<PHINode>(UserInst))
return;
if (NChains >= MaxChains && !StressIVChain) {
DEBUG(dbgs() << "IV Chain Limit\n");
return;
}
LastIncExpr = OperExpr;
if (!isa<SCEVAddRecExpr>(LastIncExpr))
return;
++NChains;
IVChainVec.push_back(IVChain(IVInc(UserInst, IVOper, LastIncExpr),
OperExprBase));
ChainUsersVec.resize(NChains);
DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Head: (" << *UserInst
<< ") IV=" << *LastIncExpr << "\n");
} else {
DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Inc: (" << *UserInst
<< ") IV+" << *LastIncExpr << "\n");
IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr));
}
IVChain &Chain = IVChainVec[ChainIdx];
SmallPtrSet<Instruction*,4> &NearUsers = ChainUsersVec[ChainIdx].NearUsers;
if (!LastIncExpr->isZero()) {
ChainUsersVec[ChainIdx].FarUsers.insert(NearUsers.begin(),
NearUsers.end());
NearUsers.clear();
}
for (User *U : IVOper->users()) {
Instruction *OtherUse = dyn_cast<Instruction>(U);
if (!OtherUse)
continue;
IVChain::const_iterator IncIter = Chain.Incs.begin();
IVChain::const_iterator IncEnd = Chain.Incs.end();
for( ; IncIter != IncEnd; ++IncIter) {
if (IncIter->UserInst == OtherUse)
break;
}
if (IncIter != IncEnd)
continue;
if (SE.isSCEVable(OtherUse->getType())
&& !isa<SCEVUnknown>(SE.getSCEV(OtherUse))
&& IU.isIVUserOrOperand(OtherUse)) {
continue;
}
NearUsers.insert(OtherUse);
}
ChainUsersVec[ChainIdx].FarUsers.erase(UserInst);
}
void LSRInstance::CollectChains() {
DEBUG(dbgs() << "Collecting IV Chains.\n");
SmallVector<ChainUsers, 8> ChainUsersVec;
SmallVector<BasicBlock *,8> LatchPath;
BasicBlock *LoopHeader = L->getHeader();
for (DomTreeNode *Rung = DT.getNode(L->getLoopLatch());
Rung->getBlock() != LoopHeader; Rung = Rung->getIDom()) {
LatchPath.push_back(Rung->getBlock());
}
LatchPath.push_back(LoopHeader);
for (SmallVectorImpl<BasicBlock *>::reverse_iterator
BBIter = LatchPath.rbegin(), BBEnd = LatchPath.rend();
BBIter != BBEnd; ++BBIter) {
for (BasicBlock::iterator I = (*BBIter)->begin(), E = (*BBIter)->end();
I != E; ++I) {
if (isa<PHINode>(I) || !IU.isIVUserOrOperand(I))
continue;
if (SE.isSCEVable(I->getType()) && !isa<SCEVUnknown>(SE.getSCEV(I)))
continue;
for (unsigned ChainIdx = 0, NChains = IVChainVec.size();
ChainIdx < NChains; ++ChainIdx) {
ChainUsersVec[ChainIdx].NearUsers.erase(I);
}
SmallPtrSet<Instruction*, 4> UniqueOperands;
User::op_iterator IVOpEnd = I->op_end();
User::op_iterator IVOpIter = findIVOperand(I->op_begin(), IVOpEnd, L, SE);
while (IVOpIter != IVOpEnd) {
Instruction *IVOpInst = cast<Instruction>(*IVOpIter);
if (UniqueOperands.insert(IVOpInst))
ChainInstruction(I, IVOpInst, ChainUsersVec);
IVOpIter = findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
}
} } for (BasicBlock::iterator I = L->getHeader()->begin();
PHINode *PN = dyn_cast<PHINode>(I); ++I) {
if (!SE.isSCEVable(PN->getType()))
continue;
Instruction *IncV =
dyn_cast<Instruction>(PN->getIncomingValueForBlock(L->getLoopLatch()));
if (IncV)
ChainInstruction(PN, IncV, ChainUsersVec);
}
unsigned ChainIdx = 0;
for (unsigned UsersIdx = 0, NChains = IVChainVec.size();
UsersIdx < NChains; ++UsersIdx) {
if (!isProfitableChain(IVChainVec[UsersIdx],
ChainUsersVec[UsersIdx].FarUsers, SE, TTI))
continue;
if (ChainIdx != UsersIdx)
IVChainVec[ChainIdx] = IVChainVec[UsersIdx];
FinalizeChain(IVChainVec[ChainIdx]);
++ChainIdx;
}
IVChainVec.resize(ChainIdx);
}
void LSRInstance::FinalizeChain(IVChain &Chain) {
assert(!Chain.Incs.empty() && "empty IV chains are not allowed");
DEBUG(dbgs() << "Final Chain: " << *Chain.Incs[0].UserInst << "\n");
for (IVChain::const_iterator I = Chain.begin(), E = Chain.end();
I != E; ++I) {
DEBUG(dbgs() << " Inc: " << *I->UserInst << "\n");
User::op_iterator UseI =
std::find(I->UserInst->op_begin(), I->UserInst->op_end(), I->IVOperand);
assert(UseI != I->UserInst->op_end() && "cannot find IV operand");
IVIncSet.insert(UseI);
}
}
static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
Value *Operand, const TargetTransformInfo &TTI) {
const SCEVConstant *IncConst = dyn_cast<SCEVConstant>(IncExpr);
if (!IncConst || !isAddressUse(UserInst, Operand))
return false;
if (IncConst->getValue()->getValue().getMinSignedBits() > 64)
return false;
int64_t IncOffset = IncConst->getValue()->getSExtValue();
if (!isAlwaysFoldable(TTI, LSRUse::Address,
getAccessType(UserInst), nullptr,
IncOffset, false))
return false;
return true;
}
void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
SmallVectorImpl<WeakVH> &DeadInsts) {
const IVInc &Head = Chain.Incs[0];
User::op_iterator IVOpEnd = Head.UserInst->op_end();
User::op_iterator IVOpIter = findIVOperand(Head.UserInst->op_begin(),
IVOpEnd, L, SE);
Value *IVSrc = nullptr;
while (IVOpIter != IVOpEnd) {
IVSrc = getWideOperand(*IVOpIter);
if (SE.getSCEV(*IVOpIter) == Head.IncExpr
|| SE.getSCEV(IVSrc) == Head.IncExpr) {
break;
}
IVOpIter = findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
}
if (IVOpIter == IVOpEnd) {
DEBUG(dbgs() << "Concealed chain head: " << *Head.UserInst << "\n");
return;
}
DEBUG(dbgs() << "Generate chain at: " << *IVSrc << "\n");
Type *IVTy = IVSrc->getType();
Type *IntTy = SE.getEffectiveSCEVType(IVTy);
const SCEV *LeftOverExpr = nullptr;
for (IVChain::const_iterator IncI = Chain.begin(),
IncE = Chain.end(); IncI != IncE; ++IncI) {
Instruction *InsertPt = IncI->UserInst;
if (isa<PHINode>(InsertPt))
InsertPt = L->getLoopLatch()->getTerminator();
Value *IVOper = IVSrc;
if (!IncI->IncExpr->isZero()) {
const SCEV *IncExpr = SE.getNoopOrSignExtend(IncI->IncExpr, IntTy);
LeftOverExpr = LeftOverExpr ?
SE.getAddExpr(LeftOverExpr, IncExpr) : IncExpr;
}
if (LeftOverExpr && !LeftOverExpr->isZero()) {
Rewriter.clearPostInc();
Value *IncV = Rewriter.expandCodeFor(LeftOverExpr, IntTy, InsertPt);
const SCEV *IVOperExpr = SE.getAddExpr(SE.getUnknown(IVSrc),
SE.getUnknown(IncV));
IVOper = Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
if (!canFoldIVIncExpr(LeftOverExpr, IncI->UserInst, IncI->IVOperand,
TTI)) {
assert(IVTy == IVOper->getType() && "inconsistent IV increment type");
IVSrc = IVOper;
LeftOverExpr = nullptr;
}
}
Type *OperTy = IncI->IVOperand->getType();
if (IVTy != OperTy) {
assert(SE.getTypeSizeInBits(IVTy) >= SE.getTypeSizeInBits(OperTy) &&
"cannot extend a chained IV");
IRBuilder<> Builder(InsertPt);
IVOper = Builder.CreateTruncOrBitCast(IVOper, OperTy, "lsr.chain");
}
IncI->UserInst->replaceUsesOfWith(IncI->IVOperand, IVOper);
DeadInsts.push_back(IncI->IVOperand);
}
if (isa<PHINode>(Chain.tailUserInst())) {
for (BasicBlock::iterator I = L->getHeader()->begin();
PHINode *Phi = dyn_cast<PHINode>(I); ++I) {
if (!isCompatibleIVType(Phi, IVSrc))
continue;
Instruction *PostIncV = dyn_cast<Instruction>(
Phi->getIncomingValueForBlock(L->getLoopLatch()));
if (!PostIncV || (SE.getSCEV(PostIncV) != SE.getSCEV(IVSrc)))
continue;
Value *IVOper = IVSrc;
Type *PostIncTy = PostIncV->getType();
if (IVTy != PostIncTy) {
assert(PostIncTy->isPointerTy() && "mixing int/ptr IV types");
IRBuilder<> Builder(L->getLoopLatch()->getTerminator());
Builder.SetCurrentDebugLocation(PostIncV->getDebugLoc());
IVOper = Builder.CreatePointerCast(IVSrc, PostIncTy, "lsr.chain");
}
Phi->replaceUsesOfWith(PostIncV, IVOper);
DeadInsts.push_back(PostIncV);
}
}
}
void LSRInstance::CollectFixupsAndInitialFormulae() {
for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) {
Instruction *UserInst = UI->getUser();
User::op_iterator UseI = std::find(UserInst->op_begin(), UserInst->op_end(),
UI->getOperandValToReplace());
assert(UseI != UserInst->op_end() && "cannot find IV operand");
if (IVIncSet.count(UseI))
continue;
LSRFixup &LF = getNewFixup();
LF.UserInst = UserInst;
LF.OperandValToReplace = UI->getOperandValToReplace();
LF.PostIncLoops = UI->getPostIncLoops();
LSRUse::KindType Kind = LSRUse::Basic;
Type *AccessTy = nullptr;
if (isAddressUse(LF.UserInst, LF.OperandValToReplace)) {
Kind = LSRUse::Address;
AccessTy = getAccessType(LF.UserInst);
}
const SCEV *S = IU.getExpr(*UI);
if (ICmpInst *CI = dyn_cast<ICmpInst>(LF.UserInst))
if (CI->isEquality()) {
Value *NV = CI->getOperand(1);
if (NV == LF.OperandValToReplace) {
CI->setOperand(1, CI->getOperand(0));
CI->setOperand(0, NV);
NV = CI->getOperand(1);
Changed = true;
}
const SCEV *N = SE.getSCEV(NV);
if (SE.isLoopInvariant(N, L) && isSafeToExpand(N, SE)) {
N = TransformForPostIncUse(Normalize, N, CI, nullptr,
LF.PostIncLoops, SE, DT);
Kind = LSRUse::ICmpZero;
S = SE.getMinusSCEV(N, S);
}
for (size_t i = 0, e = Factors.size(); i != e; ++i)
if (Factors[i] != -1)
Factors.insert(-(uint64_t)Factors[i]);
Factors.insert(-1);
}
std::pair<size_t, int64_t> P = getUse(S, Kind, AccessTy);
LF.LUIdx = P.first;
LF.Offset = P.second;
LSRUse &LU = Uses[LF.LUIdx];
LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
if (!LU.WidestFixupType ||
SE.getTypeSizeInBits(LU.WidestFixupType) <
SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))
LU.WidestFixupType = LF.OperandValToReplace->getType();
if (LU.Formulae.empty()) {
InsertInitialFormula(S, LU, LF.LUIdx);
CountRegisters(LU.Formulae.back(), LF.LUIdx);
}
}
DEBUG(print_fixups(dbgs()));
}
void
LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) {
if (!isSafeToExpand(S, SE))
LU.RigidFormula = true;
Formula F;
F.InitialMatch(S, L, SE);
bool Inserted = InsertFormula(LU, LUIdx, F);
assert(Inserted && "Initial formula already exists!"); (void)Inserted;
}
void
LSRInstance::InsertSupplementalFormula(const SCEV *S,
LSRUse &LU, size_t LUIdx) {
Formula F;
F.BaseRegs.push_back(S);
F.HasBaseReg = true;
bool Inserted = InsertFormula(LU, LUIdx, F);
assert(Inserted && "Supplemental formula already exists!"); (void)Inserted;
}
void LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) {
if (F.ScaledReg)
RegUses.CountRegister(F.ScaledReg, LUIdx);
for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
E = F.BaseRegs.end(); I != E; ++I)
RegUses.CountRegister(*I, LUIdx);
}
bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) {
assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F) &&
"Formula is illegal");
if (!LU.InsertFormula(F))
return false;
CountRegisters(F, LUIdx);
return true;
}
void
LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
SmallVector<const SCEV *, 8> Worklist(RegUses.begin(), RegUses.end());
SmallPtrSet<const SCEV *, 32> Visited;
while (!Worklist.empty()) {
const SCEV *S = Worklist.pop_back_val();
if (!Visited.insert(S))
continue;
if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S))
Worklist.append(N->op_begin(), N->op_end());
else if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
Worklist.push_back(C->getOperand());
else if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
Worklist.push_back(D->getLHS());
Worklist.push_back(D->getRHS());
} else if (const SCEVUnknown *US = dyn_cast<SCEVUnknown>(S)) {
const Value *V = US->getValue();
if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
if (L->contains(Inst)) continue;
} else if (isa<UndefValue>(V))
continue;
for (const Use &U : V->uses()) {
const Instruction *UserInst = dyn_cast<Instruction>(U.getUser());
if (!UserInst)
continue;
if (UserInst->getParent()->getParent() != L->getHeader()->getParent())
continue;
const BasicBlock *UseBB = !isa<PHINode>(UserInst) ?
UserInst->getParent() :
cast<PHINode>(UserInst)->getIncomingBlock(
PHINode::getIncomingValueNumForOperand(U.getOperandNo()));
if (!DT.dominates(L->getHeader(), UseBB))
continue;
if (SE.isSCEVable(UserInst->getType())) {
const SCEV *UserS = SE.getSCEV(const_cast<Instruction *>(UserInst));
if (!isa<SCEVUnknown>(UserS))
continue;
if (UserS == US) {
Worklist.push_back(
SE.getUnknown(const_cast<Instruction *>(UserInst)));
continue;
}
}
if (const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) {
unsigned OtherIdx = !U.getOperandNo();
Value *OtherOp = const_cast<Value *>(ICI->getOperand(OtherIdx));
if (SE.hasComputableLoopEvolution(SE.getSCEV(OtherOp), L))
continue;
}
LSRFixup &LF = getNewFixup();
LF.UserInst = const_cast<Instruction *>(UserInst);
LF.OperandValToReplace = U;
std::pair<size_t, int64_t> P = getUse(S, LSRUse::Basic, nullptr);
LF.LUIdx = P.first;
LF.Offset = P.second;
LSRUse &LU = Uses[LF.LUIdx];
LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
if (!LU.WidestFixupType ||
SE.getTypeSizeInBits(LU.WidestFixupType) <
SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))
LU.WidestFixupType = LF.OperandValToReplace->getType();
InsertSupplementalFormula(US, LU, LF.LUIdx);
CountRegisters(LU.Formulae.back(), Uses.size() - 1);
break;
}
}
}
}
static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C,
SmallVectorImpl<const SCEV *> &Ops,
const Loop *L,
ScalarEvolution &SE,
unsigned Depth = 0) {
if (Depth >= 3)
return S;
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
I != E; ++I) {
const SCEV *Remainder = CollectSubexprs(*I, C, Ops, L, SE, Depth+1);
if (Remainder)
Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
}
return nullptr;
} else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
if (AR->getStart()->isZero())
return S;
const SCEV *Remainder = CollectSubexprs(AR->getStart(),
C, Ops, L, SE, Depth+1);
if (Remainder && (AR->getLoop() == L || !isa<SCEVAddRecExpr>(Remainder))) {
Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
Remainder = nullptr;
}
if (Remainder != AR->getStart()) {
if (!Remainder)
Remainder = SE.getConstant(AR->getType(), 0);
return SE.getAddRecExpr(Remainder,
AR->getStepRecurrence(SE),
AR->getLoop(),
SCEV::FlagAnyWrap);
}
} else if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
if (Mul->getNumOperands() != 2)
return S;
if (const SCEVConstant *Op0 =
dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
C = C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0;
const SCEV *Remainder =
CollectSubexprs(Mul->getOperand(1), C, Ops, L, SE, Depth+1);
if (Remainder)
Ops.push_back(SE.getMulExpr(C, Remainder));
return nullptr;
}
}
return S;
}
void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
const Formula &Base,
unsigned Depth, size_t Idx,
bool IsScaledReg) {
const SCEV *BaseReg = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
SmallVector<const SCEV *, 8> AddOps;
const SCEV *Remainder = CollectSubexprs(BaseReg, nullptr, AddOps, L, SE);
if (Remainder)
AddOps.push_back(Remainder);
if (AddOps.size() == 1)
return;
for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(),
JE = AddOps.end();
J != JE; ++J) {
if (isa<SCEVUnknown>(*J) && !SE.isLoopInvariant(*J, L))
continue;
if (isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
LU.AccessTy, *J, Base.getNumRegs() > 1))
continue;
SmallVector<const SCEV *, 8> InnerAddOps(
((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J);
InnerAddOps.append(std::next(J),
((const SmallVector<const SCEV *, 8> &)AddOps).end());
if (InnerAddOps.size() == 1 &&
isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
LU.AccessTy, InnerAddOps[0], Base.getNumRegs() > 1))
continue;
const SCEV *InnerSum = SE.getAddExpr(InnerAddOps);
if (InnerSum->isZero())
continue;
Formula F = Base;
const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);
if (InnerSumSC && SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 &&
TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
InnerSumSC->getValue()->getZExtValue())) {
F.UnfoldedOffset =
(uint64_t)F.UnfoldedOffset + InnerSumSC->getValue()->getZExtValue();
if (IsScaledReg)
F.ScaledReg = nullptr;
else
F.BaseRegs.erase(F.BaseRegs.begin() + Idx);
} else if (IsScaledReg)
F.ScaledReg = InnerSum;
else
F.BaseRegs[Idx] = InnerSum;
const SCEVConstant *SC = dyn_cast<SCEVConstant>(*J);
if (SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&
TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
SC->getValue()->getZExtValue()))
F.UnfoldedOffset =
(uint64_t)F.UnfoldedOffset + SC->getValue()->getZExtValue();
else
F.BaseRegs.push_back(*J);
F.Canonicalize();
if (InsertFormula(LU, LUIdx, F))
GenerateReassociations(LU, LUIdx, LU.Formulae.back(), Depth + 1);
}
}
void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
Formula Base, unsigned Depth) {
assert(Base.isCanonical() && "Input must be in the canonical form");
if (Depth >= 3)
return;
for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
GenerateReassociationsImpl(LU, LUIdx, Base, Depth, i);
if (Base.Scale == 1)
GenerateReassociationsImpl(LU, LUIdx, Base, Depth,
-1, true);
}
void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
Formula Base) {
if (Base.BaseRegs.size() + (Base.Scale == 1) <= 1)
return;
Base.Unscale();
Formula F = Base;
F.BaseRegs.clear();
SmallVector<const SCEV *, 4> Ops;
for (SmallVectorImpl<const SCEV *>::const_iterator
I = Base.BaseRegs.begin(), E = Base.BaseRegs.end(); I != E; ++I) {
const SCEV *BaseReg = *I;
if (SE.properlyDominates(BaseReg, L->getHeader()) &&
!SE.hasComputableLoopEvolution(BaseReg, L))
Ops.push_back(BaseReg);
else
F.BaseRegs.push_back(BaseReg);
}
if (Ops.size() > 1) {
const SCEV *Sum = SE.getAddExpr(Ops);
if (!Sum->isZero()) {
F.BaseRegs.push_back(Sum);
F.Canonicalize();
(void)InsertFormula(LU, LUIdx, F);
}
}
}
void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx,
const Formula &Base, size_t Idx,
bool IsScaledReg) {
const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
GlobalValue *GV = ExtractSymbol(G, SE);
if (G->isZero() || !GV)
return;
Formula F = Base;
F.BaseGV = GV;
if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
return;
if (IsScaledReg)
F.ScaledReg = G;
else
F.BaseRegs[Idx] = G;
(void)InsertFormula(LU, LUIdx, F);
}
void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx,
Formula Base) {
if (Base.BaseGV) return;
for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, i);
if (Base.Scale == 1)
GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, -1,
true);
}
void LSRInstance::GenerateConstantOffsetsImpl(
LSRUse &LU, unsigned LUIdx, const Formula &Base,
const SmallVectorImpl<int64_t> &Worklist, size_t Idx, bool IsScaledReg) {
const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
for (SmallVectorImpl<int64_t>::const_iterator I = Worklist.begin(),
E = Worklist.end();
I != E; ++I) {
Formula F = Base;
F.BaseOffset = (uint64_t)Base.BaseOffset - *I;
if (isLegalUse(TTI, LU.MinOffset - *I, LU.MaxOffset - *I, LU.Kind,
LU.AccessTy, F)) {
const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), *I), G);
if (NewG->isZero()) {
if (IsScaledReg) {
F.Scale = 0;
F.ScaledReg = nullptr;
} else
F.DeleteBaseReg(F.BaseRegs[Idx]);
F.Canonicalize();
} else if (IsScaledReg)
F.ScaledReg = NewG;
else
F.BaseRegs[Idx] = NewG;
(void)InsertFormula(LU, LUIdx, F);
}
}
int64_t Imm = ExtractImmediate(G, SE);
if (G->isZero() || Imm == 0)
return;
Formula F = Base;
F.BaseOffset = (uint64_t)F.BaseOffset + Imm;
if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
return;
if (IsScaledReg)
F.ScaledReg = G;
else
F.BaseRegs[Idx] = G;
(void)InsertFormula(LU, LUIdx, F);
}
void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
Formula Base) {
SmallVector<int64_t, 2> Worklist;
Worklist.push_back(LU.MinOffset);
if (LU.MaxOffset != LU.MinOffset)
Worklist.push_back(LU.MaxOffset);
for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, i);
if (Base.Scale == 1)
GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, -1,
true);
}
void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
Formula Base) {
if (LU.Kind != LSRUse::ICmpZero) return;
Type *IntTy = Base.getType();
if (!IntTy) return;
if (SE.getTypeSizeInBits(IntTy) > 64) return;
if (LU.MinOffset != LU.MaxOffset) return;
assert(!Base.BaseGV && "ICmpZero use is not legal!");
for (SmallSetVector<int64_t, 8>::const_iterator
I = Factors.begin(), E = Factors.end(); I != E; ++I) {
int64_t Factor = *I;
if (Base.BaseOffset == INT64_MIN && Factor == -1)
continue;
int64_t NewBaseOffset = (uint64_t)Base.BaseOffset * Factor;
if (NewBaseOffset / Factor != Base.BaseOffset)
continue;
if (!IntTy->isPointerTy() &&
!ConstantInt::isValueValidForType(IntTy, NewBaseOffset))
continue;
int64_t Offset = LU.MinOffset;
if (Offset == INT64_MIN && Factor == -1)
continue;
Offset = (uint64_t)Offset * Factor;
if (Offset / Factor != LU.MinOffset)
continue;
if (!IntTy->isPointerTy() &&
!ConstantInt::isValueValidForType(IntTy, Offset))
continue;
Formula F = Base;
F.BaseOffset = NewBaseOffset;
if (!isLegalUse(TTI, Offset, Offset, LU.Kind, LU.AccessTy, F))
continue;
F.BaseOffset = (uint64_t)F.BaseOffset + Offset - LU.MinOffset;
const SCEV *FactorS = SE.getConstant(IntTy, Factor);
for (size_t i = 0, e = F.BaseRegs.size(); i != e; ++i) {
F.BaseRegs[i] = SE.getMulExpr(F.BaseRegs[i], FactorS);
if (getExactSDiv(F.BaseRegs[i], FactorS, SE) != Base.BaseRegs[i])
goto next;
}
if (F.ScaledReg) {
F.ScaledReg = SE.getMulExpr(F.ScaledReg, FactorS);
if (getExactSDiv(F.ScaledReg, FactorS, SE) != Base.ScaledReg)
continue;
}
if (F.UnfoldedOffset != 0) {
if (F.UnfoldedOffset == INT64_MIN && Factor == -1)
continue;
F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset * Factor;
if (F.UnfoldedOffset / Factor != Base.UnfoldedOffset)
continue;
if (!IntTy->isPointerTy() &&
!ConstantInt::isValueValidForType(IntTy, F.UnfoldedOffset))
continue;
}
(void)InsertFormula(LU, LUIdx, F);
next:;
}
}
void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
Type *IntTy = Base.getType();
if (!IntTy) return;
if (Base.Scale != 0 && !Base.Unscale())
return;
assert(Base.Scale == 0 && "Unscale did not did its job!");
for (SmallSetVector<int64_t, 8>::const_iterator
I = Factors.begin(), E = Factors.end(); I != E; ++I) {
int64_t Factor = *I;
Base.Scale = Factor;
Base.HasBaseReg = Base.BaseRegs.size() > 1;
if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
Base)) {
if (LU.Kind == LSRUse::Basic &&
isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LSRUse::Special,
LU.AccessTy, Base) &&
LU.AllFixupsOutsideLoop)
LU.Kind = LSRUse::Special;
else
continue;
}
if (LU.Kind == LSRUse::ICmpZero &&
!Base.HasBaseReg && Base.BaseOffset == 0 && !Base.BaseGV)
continue;
for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
if (const SCEVAddRecExpr *AR =
dyn_cast<SCEVAddRecExpr>(Base.BaseRegs[i])) {
const SCEV *FactorS = SE.getConstant(IntTy, Factor);
if (FactorS->isZero())
continue;
if (const SCEV *Quotient = getExactSDiv(AR, FactorS, SE, true)) {
Formula F = Base;
F.ScaledReg = Quotient;
F.DeleteBaseReg(F.BaseRegs[i]);
if (F.Scale == 1 && F.BaseRegs.empty())
continue;
(void)InsertFormula(LU, LUIdx, F);
}
}
}
}
void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
if (Base.BaseGV) return;
Type *DstTy = Base.getType();
if (!DstTy) return;
DstTy = SE.getEffectiveSCEVType(DstTy);
for (SmallSetVector<Type *, 4>::const_iterator
I = Types.begin(), E = Types.end(); I != E; ++I) {
Type *SrcTy = *I;
if (SrcTy != DstTy && TTI.isTruncateFree(SrcTy, DstTy)) {
Formula F = Base;
if (F.ScaledReg) F.ScaledReg = SE.getAnyExtendExpr(F.ScaledReg, *I);
for (SmallVectorImpl<const SCEV *>::iterator J = F.BaseRegs.begin(),
JE = F.BaseRegs.end(); J != JE; ++J)
*J = SE.getAnyExtendExpr(*J, SrcTy);
if (!F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses))
continue;
(void)InsertFormula(LU, LUIdx, F);
}
}
}
namespace {
struct WorkItem {
size_t LUIdx;
int64_t Imm;
const SCEV *OrigReg;
WorkItem(size_t LI, int64_t I, const SCEV *R)
: LUIdx(LI), Imm(I), OrigReg(R) {}
void print(raw_ostream &OS) const;
void dump() const;
};
}
void WorkItem::print(raw_ostream &OS) const {
OS << "in formulae referencing " << *OrigReg << " in use " << LUIdx
<< " , add offset " << Imm;
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void WorkItem::dump() const {
print(errs()); errs() << '\n';
}
#endif
void LSRInstance::GenerateCrossUseConstantOffsets() {
typedef std::map<int64_t, const SCEV *> ImmMapTy;
typedef DenseMap<const SCEV *, ImmMapTy> RegMapTy;
RegMapTy Map;
DenseMap<const SCEV *, SmallBitVector> UsedByIndicesMap;
SmallVector<const SCEV *, 8> Sequence;
for (RegUseTracker::const_iterator I = RegUses.begin(), E = RegUses.end();
I != E; ++I) {
const SCEV *Reg = *I;
int64_t Imm = ExtractImmediate(Reg, SE);
std::pair<RegMapTy::iterator, bool> Pair =
Map.insert(std::make_pair(Reg, ImmMapTy()));
if (Pair.second)
Sequence.push_back(Reg);
Pair.first->second.insert(std::make_pair(Imm, *I));
UsedByIndicesMap[Reg] |= RegUses.getUsedByIndices(*I);
}
SmallVector<WorkItem, 32> WorkItems;
SmallSet<std::pair<size_t, int64_t>, 32> UniqueItems;
for (SmallVectorImpl<const SCEV *>::const_iterator I = Sequence.begin(),
E = Sequence.end(); I != E; ++I) {
const SCEV *Reg = *I;
const ImmMapTy &Imms = Map.find(Reg)->second;
if (Imms.size() == 1)
continue;
DEBUG(dbgs() << "Generating cross-use offsets for " << *Reg << ':';
for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
J != JE; ++J)
dbgs() << ' ' << J->first;
dbgs() << '\n');
for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
J != JE; ++J) {
const SCEV *OrigReg = J->second;
int64_t JImm = J->first;
const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg);
if (!isa<SCEVConstant>(OrigReg) &&
UsedByIndicesMap[Reg].count() == 1) {
DEBUG(dbgs() << "Skipping cross-use reuse for " << *OrigReg << '\n');
continue;
}
ImmMapTy::const_iterator OtherImms[] = {
Imms.begin(), std::prev(Imms.end()),
Imms.lower_bound((Imms.begin()->first + std::prev(Imms.end())->first) /
2)
};
for (size_t i = 0, e = array_lengthof(OtherImms); i != e; ++i) {
ImmMapTy::const_iterator M = OtherImms[i];
if (M == J || M == JE) continue;
int64_t Imm = (uint64_t)JImm - M->first;
for (int LUIdx = UsedByIndices.find_first(); LUIdx != -1;
LUIdx = UsedByIndices.find_next(LUIdx))
if (UniqueItems.insert(std::make_pair(LUIdx, Imm)))
WorkItems.push_back(WorkItem(LUIdx, Imm, OrigReg));
}
}
}
Map.clear();
Sequence.clear();
UsedByIndicesMap.clear();
UniqueItems.clear();
for (SmallVectorImpl<WorkItem>::const_iterator I = WorkItems.begin(),
E = WorkItems.end(); I != E; ++I) {
const WorkItem &WI = *I;
size_t LUIdx = WI.LUIdx;
LSRUse &LU = Uses[LUIdx];
int64_t Imm = WI.Imm;
const SCEV *OrigReg = WI.OrigReg;
Type *IntTy = SE.getEffectiveSCEVType(OrigReg->getType());
const SCEV *NegImmS = SE.getSCEV(ConstantInt::get(IntTy, -(uint64_t)Imm));
unsigned BitWidth = SE.getTypeSizeInBits(IntTy);
for (size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {
Formula F = LU.Formulae[L];
F.Unscale();
if (F.ScaledReg == OrigReg) {
int64_t Offset = (uint64_t)F.BaseOffset + Imm * (uint64_t)F.Scale;
if (F.referencesReg(SE.getSCEV(
ConstantInt::get(IntTy, -(uint64_t)Offset))))
continue;
Formula NewF = F;
NewF.BaseOffset = Offset;
if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
NewF))
continue;
NewF.ScaledReg = SE.getAddExpr(NegImmS, NewF.ScaledReg);
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewF.ScaledReg))
if (C->getValue()->isNegative() !=
(NewF.BaseOffset < 0) &&
(C->getValue()->getValue().abs() * APInt(BitWidth, F.Scale))
.ule(abs64(NewF.BaseOffset)))
continue;
NewF.Canonicalize();
(void)InsertFormula(LU, LUIdx, NewF);
} else {
for (size_t N = 0, NE = F.BaseRegs.size(); N != NE; ++N) {
const SCEV *BaseReg = F.BaseRegs[N];
if (BaseReg != OrigReg)
continue;
Formula NewF = F;
NewF.BaseOffset = (uint64_t)NewF.BaseOffset + Imm;
if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset,
LU.Kind, LU.AccessTy, NewF)) {
if (!TTI.isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm))
continue;
NewF = F;
NewF.UnfoldedOffset = (uint64_t)NewF.UnfoldedOffset + Imm;
}
NewF.BaseRegs[N] = SE.getAddExpr(NegImmS, BaseReg);
for (SmallVectorImpl<const SCEV *>::const_iterator
J = NewF.BaseRegs.begin(), JE = NewF.BaseRegs.end();
J != JE; ++J)
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*J))
if ((C->getValue()->getValue() + NewF.BaseOffset).abs().slt(
abs64(NewF.BaseOffset)) &&
(C->getValue()->getValue() +
NewF.BaseOffset).countTrailingZeros() >=
countTrailingZeros<uint64_t>(NewF.BaseOffset))
goto skip_formula;
NewF.Canonicalize();
(void)InsertFormula(LU, LUIdx, NewF);
break;
skip_formula:;
}
}
}
}
}
void
LSRInstance::GenerateAllReuseFormulae() {
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
LSRUse &LU = Uses[LUIdx];
for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
GenerateReassociations(LU, LUIdx, LU.Formulae[i]);
for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
GenerateCombinations(LU, LUIdx, LU.Formulae[i]);
}
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
LSRUse &LU = Uses[LUIdx];
for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
GenerateSymbolicOffsets(LU, LUIdx, LU.Formulae[i]);
for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
GenerateConstantOffsets(LU, LUIdx, LU.Formulae[i]);
for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[i]);
for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
GenerateScales(LU, LUIdx, LU.Formulae[i]);
}
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
LSRUse &LU = Uses[LUIdx];
for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
GenerateTruncates(LU, LUIdx, LU.Formulae[i]);
}
GenerateCrossUseConstantOffsets();
DEBUG(dbgs() << "\n"
"After generating reuse formulae:\n";
print_uses(dbgs()));
}
void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
DenseSet<const SCEV *> VisitedRegs;
SmallPtrSet<const SCEV *, 16> Regs;
SmallPtrSet<const SCEV *, 16> LoserRegs;
#ifndef NDEBUG
bool ChangedFormulae = false;
#endif
typedef DenseMap<SmallVector<const SCEV *, 4>, size_t, UniquifierDenseMapInfo>
BestFormulaeTy;
BestFormulaeTy BestFormulae;
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
LSRUse &LU = Uses[LUIdx];
DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs()); dbgs() << '\n');
bool Any = false;
for (size_t FIdx = 0, NumForms = LU.Formulae.size();
FIdx != NumForms; ++FIdx) {
Formula &F = LU.Formulae[FIdx];
Cost CostF;
Regs.clear();
CostF.RateFormula(TTI, F, Regs, VisitedRegs, L, LU.Offsets, SE, DT, LU,
&LoserRegs);
if (CostF.isLoser()) {
DEBUG(dbgs() << " Filtering loser "; F.print(dbgs());
dbgs() << "\n");
}
else {
SmallVector<const SCEV *, 4> Key;
for (SmallVectorImpl<const SCEV *>::const_iterator J = F.BaseRegs.begin(),
JE = F.BaseRegs.end(); J != JE; ++J) {
const SCEV *Reg = *J;
if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
Key.push_back(Reg);
}
if (F.ScaledReg &&
RegUses.isRegUsedByUsesOtherThan(F.ScaledReg, LUIdx))
Key.push_back(F.ScaledReg);
std::sort(Key.begin(), Key.end());
std::pair<BestFormulaeTy::const_iterator, bool> P =
BestFormulae.insert(std::make_pair(Key, FIdx));
if (P.second)
continue;
Formula &Best = LU.Formulae[P.first->second];
Cost CostBest;
Regs.clear();
CostBest.RateFormula(TTI, Best, Regs, VisitedRegs, L, LU.Offsets, SE,
DT, LU);
if (CostF < CostBest)
std::swap(F, Best);
DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs());
dbgs() << "\n"
" in favor of formula "; Best.print(dbgs());
dbgs() << '\n');
}
#ifndef NDEBUG
ChangedFormulae = true;
#endif
LU.DeleteFormula(F);
--FIdx;
--NumForms;
Any = true;
}
if (Any)
LU.RecomputeRegs(LUIdx, RegUses);
BestFormulae.clear();
}
DEBUG(if (ChangedFormulae) {
dbgs() << "\n"
"After filtering out undesirable candidates:\n";
print_uses(dbgs());
});
}
static const size_t ComplexityLimit = UINT16_MAX;
size_t LSRInstance::EstimateSearchSpaceComplexity() const {
size_t Power = 1;
for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
E = Uses.end(); I != E; ++I) {
size_t FSize = I->Formulae.size();
if (FSize >= ComplexityLimit) {
Power = ComplexityLimit;
break;
}
Power *= FSize;
if (Power >= ComplexityLimit)
break;
}
return Power;
}
void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
DEBUG(dbgs() << "The search space is too complex.\n");
DEBUG(dbgs() << "Narrowing the search space by eliminating formulae "
"which use a superset of registers used by other "
"formulae.\n");
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
LSRUse &LU = Uses[LUIdx];
bool Any = false;
for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
Formula &F = LU.Formulae[i];
for (SmallVectorImpl<const SCEV *>::const_iterator
I = F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I) {
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*I)) {
Formula NewF = F;
NewF.BaseOffset += C->getValue()->getSExtValue();
NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
(I - F.BaseRegs.begin()));
if (LU.HasFormulaWithSameRegs(NewF)) {
DEBUG(dbgs() << " Deleting "; F.print(dbgs()); dbgs() << '\n');
LU.DeleteFormula(F);
--i;
--e;
Any = true;
break;
}
} else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(*I)) {
if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue()))
if (!F.BaseGV) {
Formula NewF = F;
NewF.BaseGV = GV;
NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
(I - F.BaseRegs.begin()));
if (LU.HasFormulaWithSameRegs(NewF)) {
DEBUG(dbgs() << " Deleting "; F.print(dbgs());
dbgs() << '\n');
LU.DeleteFormula(F);
--i;
--e;
Any = true;
break;
}
}
}
}
}
if (Any)
LU.RecomputeRegs(LUIdx, RegUses);
}
DEBUG(dbgs() << "After pre-selection:\n";
print_uses(dbgs()));
}
}
void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
if (EstimateSearchSpaceComplexity() < ComplexityLimit)
return;
DEBUG(dbgs() << "The search space is too complex.\n"
"Narrowing the search space by assuming that uses separated "
"by a constant offset will use the same registers.\n");
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
LSRUse &LU = Uses[LUIdx];
for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
E = LU.Formulae.end(); I != E; ++I) {
const Formula &F = *I;
if (F.BaseOffset == 0 || (F.Scale != 0 && F.Scale != 1))
continue;
LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU);
if (!LUThatHas)
continue;
if (!reconcileNewOffset(*LUThatHas, F.BaseOffset, false,
LU.Kind, LU.AccessTy))
continue;
DEBUG(dbgs() << " Deleting use "; LU.print(dbgs()); dbgs() << '\n');
LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
for (SmallVectorImpl<LSRFixup>::iterator I = Fixups.begin(),
E = Fixups.end(); I != E; ++I) {
LSRFixup &Fixup = *I;
if (Fixup.LUIdx == LUIdx) {
Fixup.LUIdx = LUThatHas - &Uses.front();
Fixup.Offset += F.BaseOffset;
if (LUThatHas->Offsets.back() != Fixup.Offset) {
LUThatHas->Offsets.push_back(Fixup.Offset);
if (Fixup.Offset > LUThatHas->MaxOffset)
LUThatHas->MaxOffset = Fixup.Offset;
if (Fixup.Offset < LUThatHas->MinOffset)
LUThatHas->MinOffset = Fixup.Offset;
}
DEBUG(dbgs() << "New fixup has offset " << Fixup.Offset << '\n');
}
if (Fixup.LUIdx == NumUses-1)
Fixup.LUIdx = LUIdx;
}
bool Any = false;
for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
Formula &F = LUThatHas->Formulae[i];
if (!isLegalUse(TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset,
LUThatHas->Kind, LUThatHas->AccessTy, F)) {
DEBUG(dbgs() << " Deleting "; F.print(dbgs());
dbgs() << '\n');
LUThatHas->DeleteFormula(F);
--i;
--e;
Any = true;
}
}
if (Any)
LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses);
DeleteUse(LU, LUIdx);
--LUIdx;
--NumUses;
break;
}
}
DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
}
void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
DEBUG(dbgs() << "The search space is too complex.\n");
DEBUG(dbgs() << "Narrowing the search space by re-filtering out "
"undesirable dedicated registers.\n");
FilterOutUndesirableDedicatedRegisters();
DEBUG(dbgs() << "After pre-selection:\n";
print_uses(dbgs()));
}
}
void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
SmallPtrSet<const SCEV *, 4> Taken;
while (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
DEBUG(dbgs() << "The search space is too complex.\n");
const SCEV *Best = nullptr;
unsigned BestNum = 0;
for (RegUseTracker::const_iterator I = RegUses.begin(), E = RegUses.end();
I != E; ++I) {
const SCEV *Reg = *I;
if (Taken.count(Reg))
continue;
if (!Best)
Best = Reg;
else {
unsigned Count = RegUses.getUsedByIndices(Reg).count();
if (Count > BestNum) {
Best = Reg;
BestNum = Count;
}
}
}
DEBUG(dbgs() << "Narrowing the search space by assuming " << *Best
<< " will yield profitable reuse.\n");
Taken.insert(Best);
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
LSRUse &LU = Uses[LUIdx];
if (!LU.Regs.count(Best)) continue;
bool Any = false;
for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
Formula &F = LU.Formulae[i];
if (!F.referencesReg(Best)) {
DEBUG(dbgs() << " Deleting "; F.print(dbgs()); dbgs() << '\n');
LU.DeleteFormula(F);
--e;
--i;
Any = true;
assert(e != 0 && "Use has no formulae left! Is Regs inconsistent?");
continue;
}
}
if (Any)
LU.RecomputeRegs(LUIdx, RegUses);
}
DEBUG(dbgs() << "After pre-selection:\n";
print_uses(dbgs()));
}
}
void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
NarrowSearchSpaceByDetectingSupersets();
NarrowSearchSpaceByCollapsingUnrolledCode();
NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
NarrowSearchSpaceByPickingWinnerRegs();
}
void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
Cost &SolutionCost,
SmallVectorImpl<const Formula *> &Workspace,
const Cost &CurCost,
const SmallPtrSet<const SCEV *, 16> &CurRegs,
DenseSet<const SCEV *> &VisitedRegs) const {
const LSRUse &LU = Uses[Workspace.size()];
SmallSetVector<const SCEV *, 4> ReqRegs;
for (SmallPtrSet<const SCEV *, 16>::const_iterator I = CurRegs.begin(),
E = CurRegs.end(); I != E; ++I)
if (LU.Regs.count(*I))
ReqRegs.insert(*I);
SmallPtrSet<const SCEV *, 16> NewRegs;
Cost NewCost;
for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
E = LU.Formulae.end(); I != E; ++I) {
const Formula &F = *I;
int NumReqRegsToFind = std::min(F.getNumRegs(), ReqRegs.size());
for (SmallSetVector<const SCEV *, 4>::const_iterator J = ReqRegs.begin(),
JE = ReqRegs.end(); J != JE; ++J) {
const SCEV *Reg = *J;
if ((F.ScaledReg && F.ScaledReg == Reg) ||
std::find(F.BaseRegs.begin(), F.BaseRegs.end(), Reg) !=
F.BaseRegs.end()) {
--NumReqRegsToFind;
if (NumReqRegsToFind == 0)
break;
}
}
if (NumReqRegsToFind != 0) {
continue;
}
NewCost = CurCost;
NewRegs = CurRegs;
NewCost.RateFormula(TTI, F, NewRegs, VisitedRegs, L, LU.Offsets, SE, DT,
LU);
if (NewCost < SolutionCost) {
Workspace.push_back(&F);
if (Workspace.size() != Uses.size()) {
SolveRecurse(Solution, SolutionCost, Workspace, NewCost,
NewRegs, VisitedRegs);
if (F.getNumRegs() == 1 && Workspace.size() == 1)
VisitedRegs.insert(F.ScaledReg ? F.ScaledReg : F.BaseRegs[0]);
} else {
DEBUG(dbgs() << "New best at "; NewCost.print(dbgs());
dbgs() << ".\n Regs:";
for (SmallPtrSet<const SCEV *, 16>::const_iterator
I = NewRegs.begin(), E = NewRegs.end(); I != E; ++I)
dbgs() << ' ' << **I;
dbgs() << '\n');
SolutionCost = NewCost;
Solution = Workspace;
}
Workspace.pop_back();
}
}
}
void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const {
SmallVector<const Formula *, 8> Workspace;
Cost SolutionCost;
SolutionCost.Lose();
Cost CurCost;
SmallPtrSet<const SCEV *, 16> CurRegs;
DenseSet<const SCEV *> VisitedRegs;
Workspace.reserve(Uses.size());
SolveRecurse(Solution, SolutionCost, Workspace, CurCost,
CurRegs, VisitedRegs);
if (Solution.empty()) {
DEBUG(dbgs() << "\nNo Satisfactory Solution\n");
return;
}
DEBUG(dbgs() << "\n"
"The chosen solution requires "; SolutionCost.print(dbgs());
dbgs() << ":\n";
for (size_t i = 0, e = Uses.size(); i != e; ++i) {
dbgs() << " ";
Uses[i].print(dbgs());
dbgs() << "\n"
" ";
Solution[i]->print(dbgs());
dbgs() << '\n';
});
assert(Solution.size() == Uses.size() && "Malformed solution!");
}
BasicBlock::iterator
LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
const SmallVectorImpl<Instruction *> &Inputs)
const {
for (;;) {
const Loop *IPLoop = LI.getLoopFor(IP->getParent());
unsigned IPLoopDepth = IPLoop ? IPLoop->getLoopDepth() : 0;
BasicBlock *IDom;
for (DomTreeNode *Rung = DT.getNode(IP->getParent()); ; ) {
if (!Rung) return IP;
Rung = Rung->getIDom();
if (!Rung) return IP;
IDom = Rung->getBlock();
const Loop *IDomLoop = LI.getLoopFor(IDom);
unsigned IDomDepth = IDomLoop ? IDomLoop->getLoopDepth() : 0;
if (IDomDepth <= IPLoopDepth &&
(IDomDepth != IPLoopDepth || IDomLoop == IPLoop))
break;
}
bool AllDominate = true;
Instruction *BetterPos = nullptr;
Instruction *Tentative = IDom->getTerminator();
for (SmallVectorImpl<Instruction *>::const_iterator I = Inputs.begin(),
E = Inputs.end(); I != E; ++I) {
Instruction *Inst = *I;
if (Inst == Tentative || !DT.dominates(Inst, Tentative)) {
AllDominate = false;
break;
}
if (IDom == Inst->getParent() &&
(!BetterPos || !DT.dominates(Inst, BetterPos)))
BetterPos = std::next(BasicBlock::iterator(Inst));
}
if (!AllDominate)
break;
if (BetterPos)
IP = BetterPos;
else
IP = Tentative;
}
return IP;
}
BasicBlock::iterator
LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP,
const LSRFixup &LF,
const LSRUse &LU,
SCEVExpander &Rewriter) const {
SmallVector<Instruction *, 4> Inputs;
if (Instruction *I = dyn_cast<Instruction>(LF.OperandValToReplace))
Inputs.push_back(I);
if (LU.Kind == LSRUse::ICmpZero)
if (Instruction *I =
dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1)))
Inputs.push_back(I);
if (LF.PostIncLoops.count(L)) {
if (LF.isUseFullyOutsideLoop(L))
Inputs.push_back(L->getLoopLatch()->getTerminator());
else
Inputs.push_back(IVIncInsertPos);
}
for (PostIncLoopSet::const_iterator I = LF.PostIncLoops.begin(),
E = LF.PostIncLoops.end(); I != E; ++I) {
const Loop *PIL = *I;
if (PIL == L) continue;
SmallVector<BasicBlock *, 4> ExitingBlocks;
PIL->getExitingBlocks(ExitingBlocks);
if (!ExitingBlocks.empty()) {
BasicBlock *BB = ExitingBlocks[0];
for (unsigned i = 1, e = ExitingBlocks.size(); i != e; ++i)
BB = DT.findNearestCommonDominator(BB, ExitingBlocks[i]);
Inputs.push_back(BB->getTerminator());
}
}
assert(!isa<PHINode>(LowestIP) && !isa<LandingPadInst>(LowestIP)
&& !isa<DbgInfoIntrinsic>(LowestIP) &&
"Insertion point must be a normal instruction");
BasicBlock::iterator IP = HoistInsertPosition(LowestIP, Inputs);
while (isa<PHINode>(IP)) ++IP;
while (isa<LandingPadInst>(IP)) ++IP;
while (isa<DbgInfoIntrinsic>(IP)) ++IP;
while (Rewriter.isInsertedInstruction(IP) && IP != LowestIP) ++IP;
return IP;
}
Value *LSRInstance::Expand(const LSRFixup &LF,
const Formula &F,
BasicBlock::iterator IP,
SCEVExpander &Rewriter,
SmallVectorImpl<WeakVH> &DeadInsts) const {
const LSRUse &LU = Uses[LF.LUIdx];
if (LU.RigidFormula)
return LF.OperandValToReplace;
IP = AdjustInsertPositionForExpand(IP, LF, LU, Rewriter);
Rewriter.setPostInc(LF.PostIncLoops);
Type *OpTy = LF.OperandValToReplace->getType();
Type *Ty = F.getType();
if (!Ty)
Ty = OpTy;
else if (SE.getEffectiveSCEVType(Ty) == SE.getEffectiveSCEVType(OpTy))
Ty = OpTy;
Type *IntTy = SE.getEffectiveSCEVType(Ty);
SmallVector<const SCEV *, 8> Ops;
for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
E = F.BaseRegs.end(); I != E; ++I) {
const SCEV *Reg = *I;
assert(!Reg->isZero() && "Zero allocated in a base register!");
PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops);
Reg = TransformForPostIncUse(Denormalize, Reg,
LF.UserInst, LF.OperandValToReplace,
Loops, SE, DT);
Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, nullptr, IP)));
}
Value *ICmpScaledV = nullptr;
if (F.Scale != 0) {
const SCEV *ScaledS = F.ScaledReg;
PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops);
ScaledS = TransformForPostIncUse(Denormalize, ScaledS,
LF.UserInst, LF.OperandValToReplace,
Loops, SE, DT);
if (LU.Kind == LSRUse::ICmpZero) {
if (F.Scale == 1)
Ops.push_back(
SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, IP)));
else {
assert(F.Scale == -1 &&
"The only scale supported by ICmpZero uses is -1!");
ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr, IP);
}
} else {
if (!Ops.empty() && LU.Kind == LSRUse::Address &&
isAMCompletelyFolded(TTI, LU, F)) {
Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
Ops.clear();
Ops.push_back(SE.getUnknown(FullV));
}
ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, IP));
if (F.Scale != 1)
ScaledS =
SE.getMulExpr(ScaledS, SE.getConstant(ScaledS->getType(), F.Scale));
Ops.push_back(ScaledS);
}
}
if (F.BaseGV) {
if (!Ops.empty()) {
Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
Ops.clear();
Ops.push_back(SE.getUnknown(FullV));
}
Ops.push_back(SE.getUnknown(F.BaseGV));
}
if (!Ops.empty()) {
Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
Ops.clear();
Ops.push_back(SE.getUnknown(FullV));
}
int64_t Offset = (uint64_t)F.BaseOffset + LF.Offset;
if (Offset != 0) {
if (LU.Kind == LSRUse::ICmpZero) {
if (!ICmpScaledV)
ICmpScaledV = ConstantInt::get(IntTy, -(uint64_t)Offset);
else {
Ops.push_back(SE.getUnknown(ICmpScaledV));
ICmpScaledV = ConstantInt::get(IntTy, Offset);
}
} else {
Ops.push_back(SE.getUnknown(ConstantInt::getSigned(IntTy, Offset)));
}
}
int64_t UnfoldedOffset = F.UnfoldedOffset;
if (UnfoldedOffset != 0) {
Ops.push_back(SE.getUnknown(ConstantInt::getSigned(IntTy,
UnfoldedOffset)));
}
const SCEV *FullS = Ops.empty() ?
SE.getConstant(IntTy, 0) :
SE.getAddExpr(Ops);
Value *FullV = Rewriter.expandCodeFor(FullS, Ty, IP);
Rewriter.clearPostInc();
if (LU.Kind == LSRUse::ICmpZero) {
ICmpInst *CI = cast<ICmpInst>(LF.UserInst);
DeadInsts.push_back(CI->getOperand(1));
assert(!F.BaseGV && "ICmp does not support folding a global value and "
"a scale at the same time!");
if (F.Scale == -1) {
if (ICmpScaledV->getType() != OpTy) {
Instruction *Cast =
CastInst::Create(CastInst::getCastOpcode(ICmpScaledV, false,
OpTy, false),
ICmpScaledV, OpTy, "tmp", CI);
ICmpScaledV = Cast;
}
CI->setOperand(1, ICmpScaledV);
} else {
assert((F.Scale == 0 || F.Scale == 1) &&
"ICmp does not support folding a global value and "
"a scale at the same time!");
Constant *C = ConstantInt::getSigned(SE.getEffectiveSCEVType(OpTy),
-(uint64_t)Offset);
if (C->getType() != OpTy)
C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
OpTy, false),
C, OpTy);
CI->setOperand(1, C);
}
}
return FullV;
}
void LSRInstance::RewriteForPHI(PHINode *PN,
const LSRFixup &LF,
const Formula &F,
SCEVExpander &Rewriter,
SmallVectorImpl<WeakVH> &DeadInsts,
Pass *P) const {
DenseMap<BasicBlock *, Value *> Inserted;
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
if (PN->getIncomingValue(i) == LF.OperandValToReplace) {
BasicBlock *BB = PN->getIncomingBlock(i);
if (e != 1 && BB->getTerminator()->getNumSuccessors() > 1 &&
!isa<IndirectBrInst>(BB->getTerminator())) {
BasicBlock *Parent = PN->getParent();
Loop *PNLoop = LI.getLoopFor(Parent);
if (!PNLoop || Parent != PNLoop->getHeader()) {
BasicBlock *NewBB = nullptr;
if (!Parent->isLandingPad()) {
NewBB = SplitCriticalEdge(BB, Parent, P,
true,
true);
} else {
SmallVector<BasicBlock*, 2> NewBBs;
SplitLandingPadPredecessors(Parent, BB, "", "", P, NewBBs);
NewBB = NewBBs[0];
}
if (NewBB) {
if (L->contains(BB) && !L->contains(PN))
NewBB->moveBefore(PN->getParent());
e = PN->getNumIncomingValues();
BB = NewBB;
i = PN->getBasicBlockIndex(BB);
}
}
}
std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> Pair =
Inserted.insert(std::make_pair(BB, static_cast<Value *>(nullptr)));
if (!Pair.second)
PN->setIncomingValue(i, Pair.first->second);
else {
Value *FullV = Expand(LF, F, BB->getTerminator(), Rewriter, DeadInsts);
Type *OpTy = LF.OperandValToReplace->getType();
if (FullV->getType() != OpTy)
FullV =
CastInst::Create(CastInst::getCastOpcode(FullV, false,
OpTy, false),
FullV, LF.OperandValToReplace->getType(),
"tmp", BB->getTerminator());
PN->setIncomingValue(i, FullV);
Pair.first->second = FullV;
}
}
}
void LSRInstance::Rewrite(const LSRFixup &LF,
const Formula &F,
SCEVExpander &Rewriter,
SmallVectorImpl<WeakVH> &DeadInsts,
Pass *P) const {
if (PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) {
RewriteForPHI(PN, LF, F, Rewriter, DeadInsts, P);
} else {
Value *FullV = Expand(LF, F, LF.UserInst, Rewriter, DeadInsts);
Type *OpTy = LF.OperandValToReplace->getType();
if (FullV->getType() != OpTy) {
Instruction *Cast =
CastInst::Create(CastInst::getCastOpcode(FullV, false, OpTy, false),
FullV, OpTy, "tmp", LF.UserInst);
FullV = Cast;
}
if (Uses[LF.LUIdx].Kind == LSRUse::ICmpZero)
LF.UserInst->setOperand(0, FullV);
else
LF.UserInst->replaceUsesOfWith(LF.OperandValToReplace, FullV);
}
DeadInsts.push_back(LF.OperandValToReplace);
}
void
LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
Pass *P) {
SmallVector<WeakVH, 16> DeadInsts;
SCEVExpander Rewriter(SE, "lsr");
#ifndef NDEBUG
Rewriter.setDebugType(DEBUG_TYPE);
#endif
Rewriter.disableCanonicalMode();
Rewriter.enableLSRMode();
Rewriter.setIVIncInsertPos(L, IVIncInsertPos);
for (SmallVectorImpl<IVChain>::const_iterator ChainI = IVChainVec.begin(),
ChainE = IVChainVec.end(); ChainI != ChainE; ++ChainI) {
if (PHINode *PN = dyn_cast<PHINode>(ChainI->tailUserInst()))
Rewriter.setChainedPhi(PN);
}
for (SmallVectorImpl<LSRFixup>::const_iterator I = Fixups.begin(),
E = Fixups.end(); I != E; ++I) {
const LSRFixup &Fixup = *I;
Rewrite(Fixup, *Solution[Fixup.LUIdx], Rewriter, DeadInsts, P);
Changed = true;
}
for (SmallVectorImpl<IVChain>::const_iterator ChainI = IVChainVec.begin(),
ChainE = IVChainVec.end(); ChainI != ChainE; ++ChainI) {
GenerateIVChain(*ChainI, Rewriter, DeadInsts);
Changed = true;
}
Rewriter.clear();
Changed |= DeleteTriviallyDeadInstructions(DeadInsts);
}
LSRInstance::LSRInstance(Loop *L, Pass *P)
: IU(P->getAnalysis<IVUsers>()), SE(P->getAnalysis<ScalarEvolution>()),
DT(P->getAnalysis<DominatorTreeWrapperPass>().getDomTree()),
LI(P->getAnalysis<LoopInfo>()),
TTI(P->getAnalysis<TargetTransformInfo>()), L(L), Changed(false),
IVIncInsertPos(nullptr) {
if (!L->isLoopSimplifyForm())
return;
if (IU.empty()) return;
unsigned NumUsers = 0;
for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) {
if (++NumUsers > MaxIVUsers) {
DEBUG(dbgs() << "LSR skipping loop, too many IV Users in " << *L
<< "\n");
return;
}
}
#ifndef NDEBUG
for (DomTreeNode *Rung = DT.getNode(L->getLoopPreheader());
Rung; Rung = Rung->getIDom()) {
BasicBlock *BB = Rung->getBlock();
const Loop *DomLoop = LI.getLoopFor(BB);
if (DomLoop && DomLoop->getHeader() == BB) {
assert(DomLoop->getLoopPreheader() && "LSR needs a simplified loop nest");
}
}
#endif // DEBUG
DEBUG(dbgs() << "\nLSR on loop ";
L->getHeader()->printAsOperand(dbgs(), false);
dbgs() << ":\n");
OptimizeShadowIV();
OptimizeLoopTermCond();
if (IU.empty()) return;
if (!L->empty()) {
DEBUG(dbgs() << "LSR skipping outer loop " << *L << "\n");
return;
}
CollectChains();
CollectInterestingTypesAndFactors();
CollectFixupsAndInitialFormulae();
CollectLoopInvariantFixupsAndFormulae();
assert(!Uses.empty() && "IVUsers reported at least one use");
DEBUG(dbgs() << "LSR found " << Uses.size() << " uses:\n";
print_uses(dbgs()));
GenerateAllReuseFormulae();
FilterOutUndesirableDedicatedRegisters();
NarrowSearchSpaceUsingHeuristics();
SmallVector<const Formula *, 8> Solution;
Solve(Solution);
Factors.clear();
Types.clear();
RegUses.clear();
if (Solution.empty())
return;
#ifndef NDEBUG
for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(), E = Uses.end();
I != E; ++I) {
const LSRUse &LU = *I;
for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(),
JE = LU.Formulae.end();
J != JE; ++J)
assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
*J) && "Illegal formula generated!");
};
#endif
ImplementSolution(Solution, P);
}
void LSRInstance::print_factors_and_types(raw_ostream &OS) const {
if (Factors.empty() && Types.empty()) return;
OS << "LSR has identified the following interesting factors and types: ";
bool First = true;
for (SmallSetVector<int64_t, 8>::const_iterator
I = Factors.begin(), E = Factors.end(); I != E; ++I) {
if (!First) OS << ", ";
First = false;
OS << '*' << *I;
}
for (SmallSetVector<Type *, 4>::const_iterator
I = Types.begin(), E = Types.end(); I != E; ++I) {
if (!First) OS << ", ";
First = false;
OS << '(' << **I << ')';
}
OS << '\n';
}
void LSRInstance::print_fixups(raw_ostream &OS) const {
OS << "LSR is examining the following fixup sites:\n";
for (SmallVectorImpl<LSRFixup>::const_iterator I = Fixups.begin(),
E = Fixups.end(); I != E; ++I) {
dbgs() << " ";
I->print(OS);
OS << '\n';
}
}
void LSRInstance::print_uses(raw_ostream &OS) const {
OS << "LSR is examining the following uses:\n";
for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
E = Uses.end(); I != E; ++I) {
const LSRUse &LU = *I;
dbgs() << " ";
LU.print(OS);
OS << '\n';
for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(),
JE = LU.Formulae.end(); J != JE; ++J) {
OS << " ";
J->print(OS);
OS << '\n';
}
}
}
void LSRInstance::print(raw_ostream &OS) const {
print_factors_and_types(OS);
print_fixups(OS);
print_uses(OS);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void LSRInstance::dump() const {
print(errs()); errs() << '\n';
}
#endif
namespace {
class LoopStrengthReduce : public LoopPass {
public:
static char ID; LoopStrengthReduce();
private:
bool runOnLoop(Loop *L, LPPassManager &LPM) override;
void getAnalysisUsage(AnalysisUsage &AU) const override;
};
}
char LoopStrengthReduce::ID = 0;
INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce",
"Loop Strength Reduction", false, false)
INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
INITIALIZE_PASS_DEPENDENCY(IVUsers)
INITIALIZE_PASS_DEPENDENCY(LoopInfo)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_END(LoopStrengthReduce, "loop-reduce",
"Loop Strength Reduction", false, false)
Pass *llvm::createLoopStrengthReducePass() {
return new LoopStrengthReduce();
}
LoopStrengthReduce::LoopStrengthReduce() : LoopPass(ID) {
initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry());
}
void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreservedID(LoopSimplifyID);
AU.addRequired<LoopInfo>();
AU.addPreserved<LoopInfo>();
AU.addRequiredID(LoopSimplifyID);
AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addRequired<ScalarEvolution>();
AU.addPreserved<ScalarEvolution>();
AU.addRequiredID(LoopSimplifyID);
AU.addRequired<IVUsers>();
AU.addPreserved<IVUsers>();
AU.addRequired<TargetTransformInfo>();
}
bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & ) {
if (skipOptnoneFunction(L))
return false;
bool Changed = false;
Changed |= LSRInstance(L, this).getChanged();
Changed |= DeleteDeadPHIs(L->getHeader());
if (EnablePhiElim && L->isLoopSimplifyForm()) {
SmallVector<WeakVH, 16> DeadInsts;
SCEVExpander Rewriter(getAnalysis<ScalarEvolution>(), "lsr");
#ifndef NDEBUG
Rewriter.setDebugType(DEBUG_TYPE);
#endif
unsigned numFolded = Rewriter.replaceCongruentIVs(
L, &getAnalysis<DominatorTreeWrapperPass>().getDomTree(), DeadInsts,
&getAnalysis<TargetTransformInfo>());
if (numFolded) {
Changed = true;
DeleteTriviallyDeadInstructions(DeadInsts);
DeleteDeadPHIs(L->getHeader());
}
}
return Changed;
}