IndVarSimplify.cpp [plain text]
#define DEBUG_TYPE "indvars"
#include "llvm/Transforms/Scalar.h"
#include "llvm/BasicBlock.h"
#include "llvm/Constants.h"
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/LLVMContext.h"
#include "llvm/Type.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Support/CFG.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/SimplifyIndVar.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
using namespace llvm;
STATISTIC(NumWidened , "Number of indvars widened");
STATISTIC(NumReplaced , "Number of exit values replaced");
STATISTIC(NumLFTR , "Number of loop exit tests replaced");
STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated");
STATISTIC(NumElimIV , "Number of congruent IVs eliminated");
static cl::opt<bool> VerifyIndvars(
"verify-indvars", cl::Hidden,
cl::desc("Verify the ScalarEvolution result after running indvars"));
namespace {
class IndVarSimplify : public LoopPass {
LoopInfo *LI;
ScalarEvolution *SE;
DominatorTree *DT;
TargetData *TD;
TargetLibraryInfo *TLI;
SmallVector<WeakVH, 16> DeadInsts;
bool Changed;
public:
static char ID; IndVarSimplify() : LoopPass(ID), LI(0), SE(0), DT(0), TD(0),
Changed(false) {
initializeIndVarSimplifyPass(*PassRegistry::getPassRegistry());
}
virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<DominatorTree>();
AU.addRequired<LoopInfo>();
AU.addRequired<ScalarEvolution>();
AU.addRequiredID(LoopSimplifyID);
AU.addRequiredID(LCSSAID);
AU.addPreserved<ScalarEvolution>();
AU.addPreservedID(LoopSimplifyID);
AU.addPreservedID(LCSSAID);
AU.setPreservesCFG();
}
private:
virtual void releaseMemory() {
DeadInsts.clear();
}
bool isValidRewrite(Value *FromVal, Value *ToVal);
void HandleFloatingPointIV(Loop *L, PHINode *PH);
void RewriteNonIntegerIVs(Loop *L);
void SimplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LPPassManager &LPM);
void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
Value *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
PHINode *IndVar, SCEVExpander &Rewriter);
void SinkUnusedInvariants(Loop *L);
};
}
char IndVarSimplify::ID = 0;
INITIALIZE_PASS_BEGIN(IndVarSimplify, "indvars",
"Induction Variable Simplification", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTree)
INITIALIZE_PASS_DEPENDENCY(LoopInfo)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(LCSSA)
INITIALIZE_PASS_END(IndVarSimplify, "indvars",
"Induction Variable Simplification", false, false)
Pass *llvm::createIndVarSimplifyPass() {
return new IndVarSimplify();
}
bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
Value *FromPtr = FromVal;
Value *ToPtr = ToVal;
if (GEPOperator *GEP = dyn_cast<GEPOperator>(FromVal)) {
FromPtr = GEP->getPointerOperand();
}
if (GEPOperator *GEP = dyn_cast<GEPOperator>(ToVal)) {
ToPtr = GEP->getPointerOperand();
}
if (FromPtr != FromVal || ToPtr != ToVal) {
if (FromPtr == ToPtr)
return true;
if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy())
return false;
const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr));
const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr));
if (FromBase == ToBase)
return true;
DEBUG(dbgs() << "INDVARS: GEP rewrite bail out "
<< *FromBase << " != " << *ToBase << "\n");
return false;
}
return true;
}
static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
DominatorTree *DT) {
PHINode *PHI = dyn_cast<PHINode>(User);
if (!PHI)
return User;
Instruction *InsertPt = 0;
for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
if (PHI->getIncomingValue(i) != Def)
continue;
BasicBlock *InsertBB = PHI->getIncomingBlock(i);
if (!InsertPt) {
InsertPt = InsertBB->getTerminator();
continue;
}
InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
InsertPt = InsertBB->getTerminator();
}
assert(InsertPt && "Missing phi operand");
assert((!isa<Instruction>(Def) ||
DT->dominates(cast<Instruction>(Def), InsertPt)) &&
"def does not dominate all uses");
return InsertPt;
}
static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
bool isExact = false;
if (&APF.getSemantics() == &APFloat::PPCDoubleDouble)
return false;
uint64_t UIntVal;
if (APF.convertToInteger(&UIntVal, 64, true, APFloat::rmTowardZero,
&isExact) != APFloat::opOK || !isExact)
return false;
IntVal = UIntVal;
return true;
}
void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
unsigned BackEdge = IncomingEdge^1;
ConstantFP *InitValueVal =
dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
int64_t InitValue;
if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
return;
BinaryOperator *Incr =
dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
if (Incr == 0 || Incr->getOpcode() != Instruction::FAdd) return;
ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
int64_t IncValue;
if (IncValueVal == 0 || Incr->getOperand(0) != PN ||
!ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
return;
Value::use_iterator IncrUse = Incr->use_begin();
Instruction *U1 = cast<Instruction>(*IncrUse++);
if (IncrUse == Incr->use_end()) return;
Instruction *U2 = cast<Instruction>(*IncrUse++);
if (IncrUse != Incr->use_end()) return;
FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
if (!Compare)
Compare = dyn_cast<FCmpInst>(U2);
if (Compare == 0 || !Compare->hasOneUse() ||
!isa<BranchInst>(Compare->use_back()))
return;
BranchInst *TheBr = cast<BranchInst>(Compare->use_back());
assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
if (!L->contains(TheBr->getParent()) ||
(L->contains(TheBr->getSuccessor(0)) &&
L->contains(TheBr->getSuccessor(1))))
return;
ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
int64_t ExitValue;
if (ExitValueVal == 0 ||
!ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
return;
CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
switch (Compare->getPredicate()) {
default: return; case CmpInst::FCMP_OEQ:
case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
case CmpInst::FCMP_ONE:
case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
case CmpInst::FCMP_OGT:
case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
case CmpInst::FCMP_OGE:
case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
case CmpInst::FCMP_OLT:
case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
case CmpInst::FCMP_OLE:
case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
}
if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
return;
if (IncValue == 0)
return;
if (IncValue > 0) {
if (InitValue >= ExitValue)
return;
uint32_t Range = uint32_t(ExitValue-InitValue);
if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
if (++Range == 0) return; }
unsigned Leftover = Range % uint32_t(IncValue);
if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
Leftover != 0)
return;
if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
return;
} else {
if (InitValue <= ExitValue)
return;
uint32_t Range = uint32_t(InitValue-ExitValue);
if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
if (++Range == 0) return; }
unsigned Leftover = Range % uint32_t(-IncValue);
if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
Leftover != 0)
return;
if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
return;
}
IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
PN->getIncomingBlock(IncomingEdge));
Value *NewAdd =
BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
Incr->getName()+".int", Incr);
NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
ConstantInt::get(Int32Ty, ExitValue),
Compare->getName());
WeakVH WeakPH = PN;
NewCompare->takeName(Compare);
Compare->replaceAllUsesWith(NewCompare);
RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI);
Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI);
if (WeakPH) {
Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
PN->getParent()->getFirstInsertionPt());
PN->replaceAllUsesWith(Conv);
RecursivelyDeleteTriviallyDeadInstructions(PN, TLI);
}
Changed = true;
}
void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) {
BasicBlock *Header = L->getHeader();
SmallVector<WeakVH, 8> PHIs;
for (BasicBlock::iterator I = Header->begin();
PHINode *PN = dyn_cast<PHINode>(I); ++I)
PHIs.push_back(PN);
for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
HandleFloatingPointIV(L, PN);
if (Changed)
SE->forgetLoop(L);
}
void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
assert(L->isLCSSAForm(*DT));
SmallVector<BasicBlock*, 8> ExitBlocks;
L->getUniqueExitBlocks(ExitBlocks);
for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
BasicBlock *ExitBB = ExitBlocks[i];
PHINode *PN = dyn_cast<PHINode>(ExitBB->begin());
if (!PN) continue;
unsigned NumPreds = PN->getNumIncomingValues();
BasicBlock::iterator BBI = ExitBB->begin();
while ((PN = dyn_cast<PHINode>(BBI++))) {
if (PN->use_empty())
continue;
if (!PN->getType()->isIntegerTy() && !PN->getType()->isPointerTy())
continue;
SE->forgetValue(PN);
for (unsigned i = 0; i != NumPreds; ++i) {
Value *InVal = PN->getIncomingValue(i);
if (!isa<Instruction>(InVal))
continue;
if (LI->getLoopFor(PN->getIncomingBlock(i)) != L)
continue;
Instruction *Inst = cast<Instruction>(InVal);
if (!L->contains(Inst))
continue;
const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
if (!SE->isLoopInvariant(ExitValue, L))
continue;
Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst);
DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n'
<< " LoopVal = " << *Inst << "\n");
if (!isValidRewrite(Inst, ExitVal)) {
DeadInsts.push_back(ExitVal);
continue;
}
Changed = true;
++NumReplaced;
PN->setIncomingValue(i, ExitVal);
if (isInstructionTriviallyDead(Inst, TLI))
DeadInsts.push_back(Inst);
if (NumPreds == 1) {
PN->replaceAllUsesWith(ExitVal);
PN->eraseFromParent();
}
}
if (NumPreds != 1) {
PHINode *NewPN = cast<PHINode>(PN->clone());
NewPN->takeName(PN);
NewPN->insertBefore(PN);
PN->replaceAllUsesWith(NewPN);
PN->eraseFromParent();
}
}
}
Rewriter.clearInsertPoint();
}
namespace {
struct WideIVInfo {
PHINode *NarrowIV;
Type *WidestNativeType; bool IsSigned;
WideIVInfo() : NarrowIV(0), WidestNativeType(0), IsSigned(false) {}
};
class WideIVVisitor : public IVVisitor {
ScalarEvolution *SE;
const TargetData *TD;
public:
WideIVInfo WI;
WideIVVisitor(PHINode *NarrowIV, ScalarEvolution *SCEV,
const TargetData *TData) :
SE(SCEV), TD(TData) { WI.NarrowIV = NarrowIV; }
virtual void visitCast(CastInst *Cast);
};
}
void WideIVVisitor::visitCast(CastInst *Cast) {
bool IsSigned = Cast->getOpcode() == Instruction::SExt;
if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
return;
Type *Ty = Cast->getType();
uint64_t Width = SE->getTypeSizeInBits(Ty);
if (TD && !TD->isLegalInteger(Width))
return;
if (!WI.WidestNativeType) {
WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
WI.IsSigned = IsSigned;
return;
}
if (WI.IsSigned != IsSigned)
return;
if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
}
namespace {
struct NarrowIVDefUse {
Instruction *NarrowDef;
Instruction *NarrowUse;
Instruction *WideDef;
NarrowIVDefUse(): NarrowDef(0), NarrowUse(0), WideDef(0) {}
NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD):
NarrowDef(ND), NarrowUse(NU), WideDef(WD) {}
};
class WidenIV {
PHINode *OrigPhi;
Type *WideType;
bool IsSigned;
LoopInfo *LI;
Loop *L;
ScalarEvolution *SE;
DominatorTree *DT;
PHINode *WidePhi;
Instruction *WideInc;
const SCEV *WideIncExpr;
SmallVectorImpl<WeakVH> &DeadInsts;
SmallPtrSet<Instruction*,16> Widened;
SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
public:
WidenIV(const WideIVInfo &WI, LoopInfo *LInfo,
ScalarEvolution *SEv, DominatorTree *DTree,
SmallVectorImpl<WeakVH> &DI) :
OrigPhi(WI.NarrowIV),
WideType(WI.WidestNativeType),
IsSigned(WI.IsSigned),
LI(LInfo),
L(LI->getLoopFor(OrigPhi->getParent())),
SE(SEv),
DT(DTree),
WidePhi(0),
WideInc(0),
WideIncExpr(0),
DeadInsts(DI) {
assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
}
PHINode *CreateWideIV(SCEVExpander &Rewriter);
protected:
Value *getExtend(Value *NarrowOper, Type *WideType, bool IsSigned,
Instruction *Use);
Instruction *CloneIVUser(NarrowIVDefUse DU);
const SCEVAddRecExpr *GetWideRecurrence(Instruction *NarrowUse);
const SCEVAddRecExpr* GetExtendedOperandRecurrence(NarrowIVDefUse DU);
Instruction *WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
};
}
static bool isLoopInvariant(Value *V, const Loop *L, const DominatorTree *DT) {
Instruction *Inst = dyn_cast<Instruction>(V);
if (!Inst)
return true;
return DT->properlyDominates(Inst->getParent(), L->getHeader());
}
Value *WidenIV::getExtend(Value *NarrowOper, Type *WideType, bool IsSigned,
Instruction *Use) {
IRBuilder<> Builder(Use);
for (const Loop *L = LI->getLoopFor(Use->getParent());
L && L->getLoopPreheader() && isLoopInvariant(NarrowOper, L, DT);
L = L->getParentLoop())
Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
Builder.CreateZExt(NarrowOper, WideType);
}
Instruction *WidenIV::CloneIVUser(NarrowIVDefUse DU) {
unsigned Opcode = DU.NarrowUse->getOpcode();
switch (Opcode) {
default:
return 0;
case Instruction::Add:
case Instruction::Mul:
case Instruction::UDiv:
case Instruction::Sub:
case Instruction::And:
case Instruction::Or:
case Instruction::Xor:
case Instruction::Shl:
case Instruction::LShr:
case Instruction::AShr:
DEBUG(dbgs() << "Cloning IVUser: " << *DU.NarrowUse << "\n");
Value *LHS = (DU.NarrowUse->getOperand(0) == DU.NarrowDef) ? DU.WideDef :
getExtend(DU.NarrowUse->getOperand(0), WideType, IsSigned, DU.NarrowUse);
Value *RHS = (DU.NarrowUse->getOperand(1) == DU.NarrowDef) ? DU.WideDef :
getExtend(DU.NarrowUse->getOperand(1), WideType, IsSigned, DU.NarrowUse);
BinaryOperator *NarrowBO = cast<BinaryOperator>(DU.NarrowUse);
BinaryOperator *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(),
LHS, RHS,
NarrowBO->getName());
IRBuilder<> Builder(DU.NarrowUse);
Builder.Insert(WideBO);
if (const OverflowingBinaryOperator *OBO =
dyn_cast<OverflowingBinaryOperator>(NarrowBO)) {
if (OBO->hasNoUnsignedWrap()) WideBO->setHasNoUnsignedWrap();
if (OBO->hasNoSignedWrap()) WideBO->setHasNoSignedWrap();
}
return WideBO;
}
}
const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) {
if (DU.NarrowUse->getOpcode() != Instruction::Add)
return 0;
unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
const SCEV *ExtendOperExpr = 0;
const OverflowingBinaryOperator *OBO =
cast<OverflowingBinaryOperator>(DU.NarrowUse);
if (IsSigned && OBO->hasNoSignedWrap())
ExtendOperExpr = SE->getSignExtendExpr(
SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
else if(!IsSigned && OBO->hasNoUnsignedWrap())
ExtendOperExpr = SE->getZeroExtendExpr(
SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
else
return 0;
const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(
SE->getAddExpr(SE->getSCEV(DU.WideDef), ExtendOperExpr));
if (!AddRec || AddRec->getLoop() != L)
return 0;
return AddRec;
}
const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) {
if (!SE->isSCEVable(NarrowUse->getType()))
return 0;
const SCEV *NarrowExpr = SE->getSCEV(NarrowUse);
if (SE->getTypeSizeInBits(NarrowExpr->getType())
>= SE->getTypeSizeInBits(WideType)) {
return 0;
}
const SCEV *WideExpr = IsSigned ?
SE->getSignExtendExpr(NarrowExpr, WideType) :
SE->getZeroExtendExpr(NarrowExpr, WideType);
const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
if (!AddRec || AddRec->getLoop() != L)
return 0;
return AddRec;
}
Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
if (isa<PHINode>(DU.NarrowUse) &&
LI->getLoopFor(DU.NarrowUse->getParent()) != L)
return 0;
if (IsSigned ? isa<SExtInst>(DU.NarrowUse) : isa<ZExtInst>(DU.NarrowUse)) {
Value *NewDef = DU.WideDef;
if (DU.NarrowUse->getType() != WideType) {
unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
unsigned IVWidth = SE->getTypeSizeInBits(WideType);
if (CastWidth < IVWidth) {
IRBuilder<> Builder(DU.NarrowUse);
NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType());
}
else {
DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
<< " not wide enough to subsume " << *DU.NarrowUse << "\n");
DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
NewDef = DU.NarrowUse;
}
}
if (NewDef != DU.NarrowUse) {
DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
<< " replaced by " << *DU.WideDef << "\n");
++NumElimExt;
DU.NarrowUse->replaceAllUsesWith(NewDef);
DeadInsts.push_back(DU.NarrowUse);
}
return 0;
}
const SCEVAddRecExpr *WideAddRec = GetWideRecurrence(DU.NarrowUse);
if (!WideAddRec) {
WideAddRec = GetExtendedOperandRecurrence(DU);
}
if (!WideAddRec) {
IRBuilder<> Builder(getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT));
Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
return 0;
}
assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() &&
"SCEV is not expected to evaluate a block terminator");
Instruction *WideUse = 0;
if (WideAddRec == WideIncExpr
&& Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
WideUse = WideInc;
else {
WideUse = CloneIVUser(DU);
if (!WideUse)
return 0;
}
if (WideAddRec != SE->getSCEV(WideUse)) {
DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse
<< ": " << *SE->getSCEV(WideUse) << " != " << *WideAddRec << "\n");
DeadInsts.push_back(WideUse);
return 0;
}
return WideUse;
}
void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
for (Value::use_iterator UI = NarrowDef->use_begin(),
UE = NarrowDef->use_end(); UI != UE; ++UI) {
Instruction *NarrowUse = cast<Instruction>(*UI);
if (!Widened.insert(NarrowUse))
continue;
NarrowIVUsers.push_back(NarrowIVDefUse(NarrowDef, NarrowUse, WideDef));
}
}
PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
if (!AddRec)
return NULL;
const SCEV *WideIVExpr = IsSigned ?
SE->getSignExtendExpr(AddRec, WideType) :
SE->getZeroExtendExpr(AddRec, WideType);
assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
"Expect the new IV expression to preserve its type");
AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
if (!AddRec || AddRec->getLoop() != L)
return NULL;
assert(SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader())
&& "Loop header phi recurrence inputs do not dominate the loop");
Instruction *InsertPt = L->getHeader()->begin();
WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt));
if (BasicBlock *LatchBlock = L->getLoopLatch()) {
WideInc =
cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
WideIncExpr = SE->getSCEV(WideInc);
}
DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
++NumWidened;
assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
Widened.insert(OrigPhi);
pushNarrowIVUsers(OrigPhi, WidePhi);
while (!NarrowIVUsers.empty()) {
NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
Instruction *WideUse = WidenIVUse(DU, Rewriter);
if (WideUse)
pushNarrowIVUsers(DU.NarrowUse, WideUse);
if (DU.NarrowDef->use_empty())
DeadInsts.push_back(DU.NarrowDef);
}
return WidePhi;
}
void IndVarSimplify::SimplifyAndExtend(Loop *L,
SCEVExpander &Rewriter,
LPPassManager &LPM) {
SmallVector<WideIVInfo, 8> WideIVs;
SmallVector<PHINode*, 8> LoopPhis;
for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
LoopPhis.push_back(cast<PHINode>(I));
}
while (!LoopPhis.empty()) {
do {
PHINode *CurrIV = LoopPhis.pop_back_val();
WideIVVisitor WIV(CurrIV, SE, TD);
Changed |= simplifyUsersOfIV(CurrIV, SE, &LPM, DeadInsts, &WIV);
if (WIV.WI.WidestNativeType) {
WideIVs.push_back(WIV.WI);
}
} while(!LoopPhis.empty());
for (; !WideIVs.empty(); WideIVs.pop_back()) {
WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts);
if (PHINode *WidePhi = Widener.CreateWideIV(Rewriter)) {
Changed = true;
LoopPhis.push_back(WidePhi);
}
}
}
}
static bool isHighCostExpansion(const SCEV *S, BranchInst *BI,
SmallPtrSet<const SCEV*, 8> &Processed,
ScalarEvolution *SE) {
if (!Processed.insert(S))
return false;
if (isa<SCEVUDivExpr>(S)) {
ICmpInst *OrigCond = dyn_cast<ICmpInst>(BI->getCondition());
if (!OrigCond) return true;
const SCEV *R = SE->getSCEV(OrigCond->getOperand(1));
R = SE->getMinusSCEV(R, SE->getConstant(R->getType(), 1));
if (R != S) {
const SCEV *L = SE->getSCEV(OrigCond->getOperand(0));
L = SE->getMinusSCEV(L, SE->getConstant(L->getType(), 1));
if (L != S)
return true;
}
}
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
I != E; ++I) {
if (isHighCostExpansion(*I, BI, Processed, SE))
return true;
}
return false;
}
if (isa<SCEVSMaxExpr>(S) || isa<SCEVUMaxExpr>(S))
return true;
return false;
}
static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) {
const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) ||
BackedgeTakenCount->isZero())
return false;
if (!L->getExitingBlock())
return false;
BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
if (!BI)
return false;
SmallPtrSet<const SCEV*, 8> Processed;
if (isHighCostExpansion(BackedgeTakenCount, BI, Processed, SE))
return false;
return true;
}
static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) {
Instruction *IncI = dyn_cast<Instruction>(IncV);
if (!IncI)
return 0;
switch (IncI->getOpcode()) {
case Instruction::Add:
case Instruction::Sub:
break;
case Instruction::GetElementPtr:
if (IncI->getNumOperands() == 2)
break;
default:
return 0;
}
PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
if (Phi && Phi->getParent() == L->getHeader()) {
if (isLoopInvariant(IncI->getOperand(1), L, DT))
return Phi;
return 0;
}
if (IncI->getOpcode() == Instruction::GetElementPtr)
return 0;
Phi = dyn_cast<PHINode>(IncI->getOperand(1));
if (Phi && Phi->getParent() == L->getHeader()) {
if (isLoopInvariant(IncI->getOperand(0), L, DT))
return Phi;
}
return 0;
}
static ICmpInst *getLoopTest(Loop *L) {
assert(L->getExitingBlock() && "expected loop exit");
BasicBlock *LatchBlock = L->getLoopLatch();
if (!LatchBlock)
return 0;
BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
assert(BI && "expected exit branch");
return dyn_cast<ICmpInst>(BI->getCondition());
}
static bool needsLFTR(Loop *L, DominatorTree *DT) {
ICmpInst *Cond = getLoopTest(L);
if (!Cond)
return true;
ICmpInst::Predicate Pred = Cond->getPredicate();
if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
return true;
Value *LHS = Cond->getOperand(0);
Value *RHS = Cond->getOperand(1);
if (!isLoopInvariant(RHS, L, DT)) {
if (!isLoopInvariant(LHS, L, DT))
return true;
std::swap(LHS, RHS);
}
PHINode *Phi = dyn_cast<PHINode>(LHS);
if (!Phi)
Phi = getLoopPhiForCounter(LHS, L, DT);
if (!Phi)
return true;
int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
if (Idx < 0)
return true;
Value *IncV = Phi->getIncomingValue(Idx);
return Phi != getLoopPhiForCounter(IncV, L, DT);
}
static bool hasConcreteDefImpl(Value *V, SmallPtrSet<Value*, 8> &Visited,
unsigned Depth) {
if (isa<Constant>(V))
return !isa<UndefValue>(V);
if (Depth >= 6)
return false;
Instruction *I = dyn_cast<Instruction>(V);
if (!I)
return false;
if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
return false;
for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) {
if (!Visited.insert(*OI))
continue;
if (!hasConcreteDefImpl(*OI, Visited, Depth+1))
return false;
}
return true;
}
static bool hasConcreteDef(Value *V) {
SmallPtrSet<Value*, 8> Visited;
Visited.insert(V);
return hasConcreteDefImpl(V, Visited, 0);
}
static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
Value *IncV = Phi->getIncomingValue(LatchIdx);
for (Value::use_iterator UI = Phi->use_begin(), UE = Phi->use_end();
UI != UE; ++UI) {
if (*UI != Cond && *UI != IncV) return false;
}
for (Value::use_iterator UI = IncV->use_begin(), UE = IncV->use_end();
UI != UE; ++UI) {
if (*UI != Cond && *UI != Phi) return false;
}
return true;
}
static PHINode *
FindLoopCounter(Loop *L, const SCEV *BECount,
ScalarEvolution *SE, DominatorTree *DT, const TargetData *TD) {
uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
Value *Cond =
cast<BranchInst>(L->getExitingBlock()->getTerminator())->getCondition();
PHINode *BestPhi = 0;
const SCEV *BestInit = 0;
BasicBlock *LatchBlock = L->getLoopLatch();
assert(LatchBlock && "needsLFTR should guarantee a loop latch");
for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
PHINode *Phi = cast<PHINode>(I);
if (!SE->isSCEVable(Phi->getType()))
continue;
if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
continue;
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
if (!AR || AR->getLoop() != L || !AR->isAffine())
continue;
uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
if (PhiWidth < BCWidth || (TD && !TD->isLegalInteger(PhiWidth)))
continue;
const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
if (!Step || !Step->isOne())
continue;
int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
Value *IncV = Phi->getIncomingValue(LatchIdx);
if (getLoopPhiForCounter(IncV, L, DT) != Phi)
continue;
if (!hasConcreteDef(Phi)) {
if (ICmpInst *Cond = getLoopTest(L)) {
if (Phi != getLoopPhiForCounter(Cond->getOperand(0), L, DT)
&& Phi != getLoopPhiForCounter(Cond->getOperand(1), L, DT)) {
continue;
}
}
}
const SCEV *Init = AR->getStart();
if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
if (AlmostDeadIV(Phi, LatchBlock, Cond))
continue;
if (BestInit->isZero() != Init->isZero()) {
if (BestInit->isZero())
continue;
}
else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
continue;
}
BestPhi = Phi;
BestInit = Init;
}
return BestPhi;
}
static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
SCEVExpander &Rewriter, ScalarEvolution *SE) {
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
assert(AR && AR->getLoop() == L && AR->isAffine() && "bad loop counter");
const SCEV *IVInit = AR->getStart();
if (IndVar->getType()->isPointerTy()
&& !IVCount->getType()->isPointerTy()) {
Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
const SCEV *IVOffset = SE->getTruncateOrSignExtend(IVCount, OfsTy);
assert(SE->isLoopInvariant(IVOffset, L) &&
"Computed iteration count is not loop invariant!");
BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
Value *GEPOffset = Rewriter.expandCodeFor(IVOffset, OfsTy, BI);
Value *GEPBase = IndVar->getIncomingValueForBlock(L->getLoopPreheader());
assert(AR->getStart() == SE->getSCEV(GEPBase) && "bad loop counter");
assert(SE->getSizeOfExpr(
cast<PointerType>(GEPBase->getType())->getElementType())->isOne()
&& "unit stride pointer IV must be i8*");
IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
return Builder.CreateGEP(GEPBase, GEPOffset, "lftr.limit");
}
else {
const SCEV *IVLimit = 0;
if (AR->getStart()->isZero())
IVLimit = IVCount;
else {
assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
const SCEV *IVInit = AR->getStart();
if (SE->getTypeSizeInBits(IVInit->getType())
> SE->getTypeSizeInBits(IVCount->getType()))
IVInit = SE->getTruncateExpr(IVInit, IVCount->getType());
IVLimit = SE->getAddExpr(IVInit, IVCount);
}
BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
IRBuilder<> Builder(BI);
assert(SE->isLoopInvariant(IVLimit, L) &&
"Computed iteration count is not loop invariant!");
Type *LimitTy = IVCount->getType()->isPointerTy() ?
IndVar->getType() : IVCount->getType();
return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
}
}
Value *IndVarSimplify::
LinearFunctionTestReplace(Loop *L,
const SCEV *BackedgeTakenCount,
PHINode *IndVar,
SCEVExpander &Rewriter) {
assert(canExpandBackedgeTakenCount(L, SE) && "precondition");
Type *CntTy = BackedgeTakenCount->getType();
const SCEV *IVCount = BackedgeTakenCount;
Value *CmpIndVar;
if (L->getExitingBlock() == L->getLoopLatch()) {
const SCEV *N =
SE->getAddExpr(IVCount, SE->getConstant(IVCount->getType(), 1));
if (CntTy == IVCount->getType())
IVCount = N;
else {
const SCEV *Zero = SE->getConstant(IVCount->getType(), 0);
if ((isa<SCEVConstant>(N) && !N->isZero()) ||
SE->isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) {
IVCount = SE->getTruncateOrZeroExtend(N, CntTy);
} else {
IVCount = SE->getTruncateOrZeroExtend(IVCount, CntTy);
IVCount = SE->getAddExpr(IVCount, SE->getConstant(CntTy, 1));
}
}
CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock());
} else {
IVCount = SE->getTruncateOrZeroExtend(IVCount, CntTy);
CmpIndVar = IndVar;
}
Value *ExitCnt = genLoopLimit(IndVar, IVCount, L, Rewriter, SE);
assert(ExitCnt->getType()->isPointerTy() == IndVar->getType()->isPointerTy()
&& "genLoopLimit missed a cast");
BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
ICmpInst::Predicate P;
if (L->contains(BI->getSuccessor(0)))
P = ICmpInst::ICMP_NE;
else
P = ICmpInst::ICMP_EQ;
DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
<< " LHS:" << *CmpIndVar << '\n'
<< " op:\t"
<< (P == ICmpInst::ICMP_NE ? "!=" : "==") << "\n"
<< " RHS:\t" << *ExitCnt << "\n"
<< " IVCount:\t" << *IVCount << "\n");
IRBuilder<> Builder(BI);
if (SE->getTypeSizeInBits(CmpIndVar->getType())
> SE->getTypeSizeInBits(ExitCnt->getType())) {
CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
"lftr.wideiv");
}
Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
Value *OrigCond = BI->getCondition();
BI->setCondition(Cond);
DeadInsts.push_back(OrigCond);
++NumLFTR;
Changed = true;
return Cond;
}
void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
BasicBlock *ExitBlock = L->getExitBlock();
if (!ExitBlock) return;
BasicBlock *Preheader = L->getLoopPreheader();
if (!Preheader) return;
Instruction *InsertPt = ExitBlock->getFirstInsertionPt();
BasicBlock::iterator I = Preheader->getTerminator();
while (I != Preheader->begin()) {
--I;
if (isa<PHINode>(I))
break;
if (I->mayHaveSideEffects() || I->mayReadFromMemory())
continue;
if (isa<DbgInfoIntrinsic>(I))
continue;
if (isa<LandingPadInst>(I))
continue;
if (isa<AllocaInst>(I))
continue;
bool UsedInLoop = false;
for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
UI != UE; ++UI) {
User *U = *UI;
BasicBlock *UseBB = cast<Instruction>(U)->getParent();
if (PHINode *P = dyn_cast<PHINode>(U)) {
unsigned i =
PHINode::getIncomingValueNumForOperand(UI.getOperandNo());
UseBB = P->getIncomingBlock(i);
}
if (UseBB == Preheader || L->contains(UseBB)) {
UsedInLoop = true;
break;
}
}
if (UsedInLoop)
continue;
Instruction *ToMove = I;
bool Done = false;
if (I != Preheader->begin()) {
do {
--I;
} while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
Done = true;
} else {
Done = true;
}
ToMove->moveBefore(InsertPt);
if (Done) break;
InsertPt = ToMove;
}
}
bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
if (!L->isLoopSimplifyForm())
return false;
LI = &getAnalysis<LoopInfo>();
SE = &getAnalysis<ScalarEvolution>();
DT = &getAnalysis<DominatorTree>();
TD = getAnalysisIfAvailable<TargetData>();
TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
DeadInsts.clear();
Changed = false;
RewriteNonIntegerIVs(L);
const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
SCEVExpander Rewriter(*SE, "indvars");
#ifndef NDEBUG
Rewriter.setDebugType(DEBUG_TYPE);
#endif
Rewriter.disableCanonicalMode();
SimplifyAndExtend(L, Rewriter, LPM);
if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount))
RewriteLoopExitValues(L, Rewriter);
NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
if (canExpandBackedgeTakenCount(L, SE) && needsLFTR(L, DT)) {
PHINode *IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT, TD);
if (IndVar) {
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BackedgeTakenCount);
if (!AR || AR->getLoop()->getLoopPreheader())
(void)LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar,
Rewriter);
}
}
Rewriter.clear();
while (!DeadInsts.empty())
if (Instruction *Inst =
dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI);
SinkUnusedInvariants(L);
Changed |= DeleteDeadPHIs(L->getHeader(), TLI);
assert(L->isLCSSAForm(*DT) &&
"Indvars did not leave the loop in lcssa form!");
#ifndef NDEBUG
if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
SE->forgetLoop(L);
const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
SE->getTypeSizeInBits(NewBECount->getType()))
NewBECount = SE->getTruncateOrNoop(NewBECount,
BackedgeTakenCount->getType());
else
BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
NewBECount->getType());
assert(BackedgeTakenCount == NewBECount && "indvars must preserve SCEV");
}
#endif
return Changed;
}