#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/PtrUseVisitor.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Operator.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/TimeValue.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
#if __cplusplus >= 201103L && !defined(NDEBUG)
#include <random>
#endif
using namespace llvm;
#define DEBUG_TYPE "sroa"
STATISTIC(NumAllocasAnalyzed, "Number of allocas analyzed for replacement");
STATISTIC(NumAllocaPartitions, "Number of alloca partitions formed");
STATISTIC(MaxPartitionsPerAlloca, "Maximum number of partitions per alloca");
STATISTIC(NumAllocaPartitionUses, "Number of alloca partition uses rewritten");
STATISTIC(MaxUsesPerAllocaPartition, "Maximum number of uses of a partition");
STATISTIC(NumNewAllocas, "Number of new, smaller allocas introduced");
STATISTIC(NumPromoted, "Number of allocas promoted to SSA values");
STATISTIC(NumLoadsSpeculated, "Number of loads speculated to allow promotion");
STATISTIC(NumDeleted, "Number of instructions deleted");
STATISTIC(NumVectorized, "Number of vectorized aggregates");
static cl::opt<bool>
ForceSSAUpdater("force-ssa-updater", cl::init(false), cl::Hidden);
static cl::opt<bool> SROARandomShuffleSlices("sroa-random-shuffle-slices",
cl::init(false), cl::Hidden);
static cl::opt<bool> SROAStrictInbounds("sroa-strict-inbounds",
cl::init(false), cl::Hidden);
namespace {
template <bool preserveNames = true>
class IRBuilderPrefixedInserter :
public IRBuilderDefaultInserter<preserveNames> {
std::string Prefix;
public:
void SetNamePrefix(const Twine &P) { Prefix = P.str(); }
protected:
void InsertHelper(Instruction *I, const Twine &Name, BasicBlock *BB,
BasicBlock::iterator InsertPt) const {
IRBuilderDefaultInserter<preserveNames>::InsertHelper(
I, Name.isTriviallyEmpty() ? Name : Prefix + Name, BB, InsertPt);
}
};
template <>
class IRBuilderPrefixedInserter<false> :
public IRBuilderDefaultInserter<false> {
public:
void SetNamePrefix(const Twine &P) {}
};
#ifndef NDEBUG
typedef llvm::IRBuilder<true, ConstantFolder,
IRBuilderPrefixedInserter<true> > IRBuilderTy;
#else
typedef llvm::IRBuilder<false, ConstantFolder,
IRBuilderPrefixedInserter<false> > IRBuilderTy;
#endif
}
namespace {
class Slice {
uint64_t BeginOffset;
uint64_t EndOffset;
PointerIntPair<Use *, 1, bool> UseAndIsSplittable;
public:
Slice() : BeginOffset(), EndOffset() {}
Slice(uint64_t BeginOffset, uint64_t EndOffset, Use *U, bool IsSplittable)
: BeginOffset(BeginOffset), EndOffset(EndOffset),
UseAndIsSplittable(U, IsSplittable) {}
uint64_t beginOffset() const { return BeginOffset; }
uint64_t endOffset() const { return EndOffset; }
bool isSplittable() const { return UseAndIsSplittable.getInt(); }
void makeUnsplittable() { UseAndIsSplittable.setInt(false); }
Use *getUse() const { return UseAndIsSplittable.getPointer(); }
bool isDead() const { return getUse() == nullptr; }
void kill() { UseAndIsSplittable.setPointer(nullptr); }
bool operator<(const Slice &RHS) const {
if (beginOffset() < RHS.beginOffset()) return true;
if (beginOffset() > RHS.beginOffset()) return false;
if (isSplittable() != RHS.isSplittable()) return !isSplittable();
if (endOffset() > RHS.endOffset()) return true;
return false;
}
friend LLVM_ATTRIBUTE_UNUSED bool operator<(const Slice &LHS,
uint64_t RHSOffset) {
return LHS.beginOffset() < RHSOffset;
}
friend LLVM_ATTRIBUTE_UNUSED bool operator<(uint64_t LHSOffset,
const Slice &RHS) {
return LHSOffset < RHS.beginOffset();
}
bool operator==(const Slice &RHS) const {
return isSplittable() == RHS.isSplittable() &&
beginOffset() == RHS.beginOffset() && endOffset() == RHS.endOffset();
}
bool operator!=(const Slice &RHS) const { return !operator==(RHS); }
};
}
namespace llvm {
template <typename T> struct isPodLike;
template <> struct isPodLike<Slice> {
static const bool value = true;
};
}
namespace {
class AllocaSlices {
public:
AllocaSlices(const DataLayout &DL, AllocaInst &AI);
bool isEscaped() const { return PointerEscapingInstr; }
typedef SmallVectorImpl<Slice>::iterator iterator;
iterator begin() { return Slices.begin(); }
iterator end() { return Slices.end(); }
typedef SmallVectorImpl<Slice>::const_iterator const_iterator;
const_iterator begin() const { return Slices.begin(); }
const_iterator end() const { return Slices.end(); }
typedef SmallVectorImpl<Instruction *>::const_iterator dead_user_iterator;
dead_user_iterator dead_user_begin() const { return DeadUsers.begin(); }
dead_user_iterator dead_user_end() const { return DeadUsers.end(); }
typedef SmallVectorImpl<Use *>::const_iterator dead_op_iterator;
dead_op_iterator dead_op_begin() const { return DeadOperands.begin(); }
dead_op_iterator dead_op_end() const { return DeadOperands.end(); }
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void print(raw_ostream &OS, const_iterator I, StringRef Indent = " ") const;
void printSlice(raw_ostream &OS, const_iterator I,
StringRef Indent = " ") const;
void printUse(raw_ostream &OS, const_iterator I,
StringRef Indent = " ") const;
void print(raw_ostream &OS) const;
void dump(const_iterator I) const;
void dump() const;
#endif
private:
template <typename DerivedT, typename RetT = void> class BuilderBase;
class SliceBuilder;
friend class AllocaSlices::SliceBuilder;
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
AllocaInst &AI;
#endif
Instruction *PointerEscapingInstr;
SmallVector<Slice, 8> Slices;
SmallVector<Instruction *, 8> DeadUsers;
SmallVector<Use *, 8> DeadOperands;
};
}
static Value *foldSelectInst(SelectInst &SI) {
if (ConstantInt *CI = dyn_cast<ConstantInt>(SI.getCondition()))
return SI.getOperand(1+CI->isZero());
if (SI.getOperand(1) == SI.getOperand(2))
return SI.getOperand(1);
return nullptr;
}
static Value *foldPHINodeOrSelectInst(Instruction &I) {
if (PHINode *PN = dyn_cast<PHINode>(&I)) {
return PN->hasConstantValue();
}
return foldSelectInst(cast<SelectInst>(I));
}
class AllocaSlices::SliceBuilder : public PtrUseVisitor<SliceBuilder> {
friend class PtrUseVisitor<SliceBuilder>;
friend class InstVisitor<SliceBuilder>;
typedef PtrUseVisitor<SliceBuilder> Base;
const uint64_t AllocSize;
AllocaSlices &S;
SmallDenseMap<Instruction *, unsigned> MemTransferSliceMap;
SmallDenseMap<Instruction *, uint64_t> PHIOrSelectSizes;
SmallPtrSet<Instruction *, 4> VisitedDeadInsts;
public:
SliceBuilder(const DataLayout &DL, AllocaInst &AI, AllocaSlices &S)
: PtrUseVisitor<SliceBuilder>(DL),
AllocSize(DL.getTypeAllocSize(AI.getAllocatedType())), S(S) {}
private:
void markAsDead(Instruction &I) {
if (VisitedDeadInsts.insert(&I))
S.DeadUsers.push_back(&I);
}
void insertUse(Instruction &I, const APInt &Offset, uint64_t Size,
bool IsSplittable = false) {
if (Size == 0 || Offset.uge(AllocSize)) {
DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte use @" << Offset
<< " which has zero size or starts outside of the "
<< AllocSize << " byte alloca:\n"
<< " alloca: " << S.AI << "\n"
<< " use: " << I << "\n");
return markAsDead(I);
}
uint64_t BeginOffset = Offset.getZExtValue();
uint64_t EndOffset = BeginOffset + Size;
assert(AllocSize >= BeginOffset); if (Size > AllocSize - BeginOffset) {
DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @" << Offset
<< " to remain within the " << AllocSize << " byte alloca:\n"
<< " alloca: " << S.AI << "\n"
<< " use: " << I << "\n");
EndOffset = AllocSize;
}
S.Slices.push_back(Slice(BeginOffset, EndOffset, U, IsSplittable));
}
void visitBitCastInst(BitCastInst &BC) {
if (BC.use_empty())
return markAsDead(BC);
return Base::visitBitCastInst(BC);
}
void visitGetElementPtrInst(GetElementPtrInst &GEPI) {
if (GEPI.use_empty())
return markAsDead(GEPI);
if (SROAStrictInbounds && GEPI.isInBounds()) {
APInt GEPOffset = Offset;
for (gep_type_iterator GTI = gep_type_begin(GEPI),
GTE = gep_type_end(GEPI);
GTI != GTE; ++GTI) {
ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
if (!OpC)
break;
if (StructType *STy = dyn_cast<StructType>(*GTI)) {
unsigned ElementIdx = OpC->getZExtValue();
const StructLayout *SL = DL.getStructLayout(STy);
GEPOffset +=
APInt(Offset.getBitWidth(), SL->getElementOffset(ElementIdx));
} else {
APInt Index = OpC->getValue().sextOrTrunc(Offset.getBitWidth());
GEPOffset += Index * APInt(Offset.getBitWidth(),
DL.getTypeAllocSize(GTI.getIndexedType()));
}
if (GEPOffset.ugt(AllocSize))
return markAsDead(GEPI);
}
}
return Base::visitGetElementPtrInst(GEPI);
}
void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset,
uint64_t Size, bool IsVolatile) {
bool IsSplittable =
Ty->isIntegerTy() && !IsVolatile && Offset == 0 && Size >= AllocSize;
insertUse(I, Offset, Size, IsSplittable);
}
void visitLoadInst(LoadInst &LI) {
assert((!LI.isSimple() || LI.getType()->isSingleValueType()) &&
"All simple FCA loads should have been pre-split");
if (!IsOffsetKnown)
return PI.setAborted(&LI);
uint64_t Size = DL.getTypeStoreSize(LI.getType());
return handleLoadOrStore(LI.getType(), LI, Offset, Size, LI.isVolatile());
}
void visitStoreInst(StoreInst &SI) {
Value *ValOp = SI.getValueOperand();
if (ValOp == *U)
return PI.setEscapedAndAborted(&SI);
if (!IsOffsetKnown)
return PI.setAborted(&SI);
uint64_t Size = DL.getTypeStoreSize(ValOp->getType());
if (Size > AllocSize || Offset.ugt(AllocSize - Size)) {
DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte store @" << Offset
<< " which extends past the end of the " << AllocSize
<< " byte alloca:\n"
<< " alloca: " << S.AI << "\n"
<< " use: " << SI << "\n");
return markAsDead(SI);
}
assert((!SI.isSimple() || ValOp->getType()->isSingleValueType()) &&
"All simple FCA stores should have been pre-split");
handleLoadOrStore(ValOp->getType(), SI, Offset, Size, SI.isVolatile());
}
void visitMemSetInst(MemSetInst &II) {
assert(II.getRawDest() == *U && "Pointer use is not the destination?");
ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength());
if ((Length && Length->getValue() == 0) ||
(IsOffsetKnown && Offset.uge(AllocSize)))
return markAsDead(II);
if (!IsOffsetKnown)
return PI.setAborted(&II);
insertUse(II, Offset,
Length ? Length->getLimitedValue()
: AllocSize - Offset.getLimitedValue(),
(bool)Length);
}
void visitMemTransferInst(MemTransferInst &II) {
ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength());
if (Length && Length->getValue() == 0)
return markAsDead(II);
if (VisitedDeadInsts.count(&II))
return;
if (!IsOffsetKnown)
return PI.setAborted(&II);
if (Offset.uge(AllocSize)) {
SmallDenseMap<Instruction *, unsigned>::iterator MTPI = MemTransferSliceMap.find(&II);
if (MTPI != MemTransferSliceMap.end())
S.Slices[MTPI->second].kill();
return markAsDead(II);
}
uint64_t RawOffset = Offset.getLimitedValue();
uint64_t Size = Length ? Length->getLimitedValue()
: AllocSize - RawOffset;
if (*U == II.getRawDest() && *U == II.getRawSource()) {
if (!II.isVolatile())
return markAsDead(II);
return insertUse(II, Offset, Size, false);
}
bool Inserted;
SmallDenseMap<Instruction *, unsigned>::iterator MTPI;
std::tie(MTPI, Inserted) =
MemTransferSliceMap.insert(std::make_pair(&II, S.Slices.size()));
unsigned PrevIdx = MTPI->second;
if (!Inserted) {
Slice &PrevP = S.Slices[PrevIdx];
if (!II.isVolatile() && PrevP.beginOffset() == RawOffset) {
PrevP.kill();
return markAsDead(II);
}
PrevP.makeUnsplittable();
}
insertUse(II, Offset, Size, Inserted && Length);
assert(S.Slices[PrevIdx].getUse()->getUser() == &II &&
"Map index doesn't point back to a slice with this user.");
}
void visitIntrinsicInst(IntrinsicInst &II) {
if (!IsOffsetKnown)
return PI.setAborted(&II);
if (II.getIntrinsicID() == Intrinsic::lifetime_start ||
II.getIntrinsicID() == Intrinsic::lifetime_end) {
ConstantInt *Length = cast<ConstantInt>(II.getArgOperand(0));
uint64_t Size = std::min(AllocSize - Offset.getLimitedValue(),
Length->getLimitedValue());
insertUse(II, Offset, Size, true);
return;
}
Base::visitIntrinsicInst(II);
}
Instruction *hasUnsafePHIOrSelectUse(Instruction *Root, uint64_t &Size) {
SmallPtrSet<Instruction *, 4> Visited;
SmallVector<std::pair<Instruction *, Instruction *>, 4> Uses;
Visited.insert(Root);
Uses.push_back(std::make_pair(cast<Instruction>(*U), Root));
Size = 0;
do {
Instruction *I, *UsedI;
std::tie(UsedI, I) = Uses.pop_back_val();
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
Size = std::max(Size, DL.getTypeStoreSize(LI->getType()));
continue;
}
if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
Value *Op = SI->getOperand(0);
if (Op == UsedI)
return SI;
Size = std::max(Size, DL.getTypeStoreSize(Op->getType()));
continue;
}
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
if (!GEP->hasAllZeroIndices())
return GEP;
} else if (!isa<BitCastInst>(I) && !isa<PHINode>(I) &&
!isa<SelectInst>(I)) {
return I;
}
for (User *U : I->users())
if (Visited.insert(cast<Instruction>(U)))
Uses.push_back(std::make_pair(I, cast<Instruction>(U)));
} while (!Uses.empty());
return nullptr;
}
void visitPHINodeOrSelectInst(Instruction &I) {
assert(isa<PHINode>(I) || isa<SelectInst>(I));
if (I.use_empty())
return markAsDead(I);
if (Value *Result = foldPHINodeOrSelectInst(I)) {
if (Result == *U)
enqueueUsers(I);
else
S.DeadOperands.push_back(U);
return;
}
if (!IsOffsetKnown)
return PI.setAborted(&I);
uint64_t &Size = PHIOrSelectSizes[&I];
if (!Size) {
if (Instruction *UnsafeI = hasUnsafePHIOrSelectUse(&I, Size))
return PI.setAborted(UnsafeI);
}
if (Offset.uge(AllocSize)) {
S.DeadOperands.push_back(U);
return;
}
insertUse(I, Offset, Size);
}
void visitPHINode(PHINode &PN) {
visitPHINodeOrSelectInst(PN);
}
void visitSelectInst(SelectInst &SI) {
visitPHINodeOrSelectInst(SI);
}
void visitInstruction(Instruction &I) {
PI.setAborted(&I);
}
};
AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI)
:
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
AI(AI),
#endif
PointerEscapingInstr(nullptr) {
SliceBuilder PB(DL, AI, *this);
SliceBuilder::PtrInfo PtrI = PB.visitPtr(AI);
if (PtrI.isEscaped() || PtrI.isAborted()) {
PointerEscapingInstr = PtrI.getEscapingInst() ? PtrI.getEscapingInst()
: PtrI.getAbortingInst();
assert(PointerEscapingInstr && "Did not track a bad instruction");
return;
}
Slices.erase(std::remove_if(Slices.begin(), Slices.end(),
std::mem_fun_ref(&Slice::isDead)),
Slices.end());
#if __cplusplus >= 201103L && !defined(NDEBUG)
if (SROARandomShuffleSlices) {
std::mt19937 MT(static_cast<unsigned>(sys::TimeValue::now().msec()));
std::shuffle(Slices.begin(), Slices.end(), MT);
}
#endif
std::sort(Slices.begin(), Slices.end());
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void AllocaSlices::print(raw_ostream &OS, const_iterator I,
StringRef Indent) const {
printSlice(OS, I, Indent);
printUse(OS, I, Indent);
}
void AllocaSlices::printSlice(raw_ostream &OS, const_iterator I,
StringRef Indent) const {
OS << Indent << "[" << I->beginOffset() << "," << I->endOffset() << ")"
<< " slice #" << (I - begin())
<< (I->isSplittable() ? " (splittable)" : "") << "\n";
}
void AllocaSlices::printUse(raw_ostream &OS, const_iterator I,
StringRef Indent) const {
OS << Indent << " used by: " << *I->getUse()->getUser() << "\n";
}
void AllocaSlices::print(raw_ostream &OS) const {
if (PointerEscapingInstr) {
OS << "Can't analyze slices for alloca: " << AI << "\n"
<< " A pointer to this alloca escaped by:\n"
<< " " << *PointerEscapingInstr << "\n";
return;
}
OS << "Slices of alloca: " << AI << "\n";
for (const_iterator I = begin(), E = end(); I != E; ++I)
print(OS, I);
}
LLVM_DUMP_METHOD void AllocaSlices::dump(const_iterator I) const {
print(dbgs(), I);
}
LLVM_DUMP_METHOD void AllocaSlices::dump() const { print(dbgs()); }
#endif // !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
namespace {
class AllocaPromoter : public LoadAndStorePromoter {
AllocaInst &AI;
DIBuilder &DIB;
SmallVector<DbgDeclareInst *, 4> DDIs;
SmallVector<DbgValueInst *, 4> DVIs;
public:
AllocaPromoter(const SmallVectorImpl<Instruction *> &Insts, SSAUpdater &S,
AllocaInst &AI, DIBuilder &DIB)
: LoadAndStorePromoter(Insts, S), AI(AI), DIB(DIB) {}
void run(const SmallVectorImpl<Instruction*> &Insts) {
if (MDNode *DebugNode = MDNode::getIfExists(AI.getContext(), &AI)) {
for (User *U : DebugNode->users())
if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(U))
DDIs.push_back(DDI);
else if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(U))
DVIs.push_back(DVI);
}
LoadAndStorePromoter::run(Insts);
while (!DDIs.empty())
DDIs.pop_back_val()->eraseFromParent();
while (!DVIs.empty())
DVIs.pop_back_val()->eraseFromParent();
}
bool isInstInList(Instruction *I,
const SmallVectorImpl<Instruction*> &Insts) const override {
Value *Ptr;
if (LoadInst *LI = dyn_cast<LoadInst>(I))
Ptr = LI->getOperand(0);
else
Ptr = cast<StoreInst>(I)->getPointerOperand();
SmallPtrSet<Value *, 4> Visited;
do {
if (Ptr == &AI)
return true;
if (BitCastInst *BCI = dyn_cast<BitCastInst>(Ptr))
Ptr = BCI->getOperand(0);
else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr))
Ptr = GEPI->getPointerOperand();
else
return false;
} while (Visited.insert(Ptr));
return false;
}
void updateDebugInfo(Instruction *Inst) const override {
for (SmallVectorImpl<DbgDeclareInst *>::const_iterator I = DDIs.begin(),
E = DDIs.end(); I != E; ++I) {
DbgDeclareInst *DDI = *I;
if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
ConvertDebugDeclareToDebugValue(DDI, SI, DIB);
else if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
ConvertDebugDeclareToDebugValue(DDI, LI, DIB);
}
for (SmallVectorImpl<DbgValueInst *>::const_iterator I = DVIs.begin(),
E = DVIs.end(); I != E; ++I) {
DbgValueInst *DVI = *I;
Value *Arg = nullptr;
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
if (ZExtInst *ZExt = dyn_cast<ZExtInst>(SI->getOperand(0)))
Arg = dyn_cast<Argument>(ZExt->getOperand(0));
else if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0)))
Arg = dyn_cast<Argument>(SExt->getOperand(0));
if (!Arg)
Arg = SI->getValueOperand();
} else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
Arg = LI->getPointerOperand();
} else {
continue;
}
Instruction *DbgVal =
DIB.insertDbgValueIntrinsic(Arg, 0, DIVariable(DVI->getVariable()),
Inst);
DbgVal->setDebugLoc(DVI->getDebugLoc());
}
}
};
}
namespace {
class SROA : public FunctionPass {
const bool RequiresDomTree;
LLVMContext *C;
const DataLayout *DL;
DominatorTree *DT;
SetVector<AllocaInst *, SmallVector<AllocaInst *, 16> > Worklist;
SetVector<Instruction *, SmallVector<Instruction *, 8> > DeadInsts;
SetVector<AllocaInst *, SmallVector<AllocaInst *, 16> > PostPromotionWorklist;
std::vector<AllocaInst *> PromotableAllocas;
SetVector<PHINode *, SmallVector<PHINode *, 2> > SpeculatablePHIs;
SetVector<SelectInst *, SmallVector<SelectInst *, 2> > SpeculatableSelects;
public:
SROA(bool RequiresDomTree = true)
: FunctionPass(ID), RequiresDomTree(RequiresDomTree),
C(nullptr), DL(nullptr), DT(nullptr) {
initializeSROAPass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F) override;
void getAnalysisUsage(AnalysisUsage &AU) const override;
const char *getPassName() const override { return "SROA"; }
static char ID;
private:
friend class PHIOrSelectSpeculator;
friend class AllocaSliceRewriter;
bool rewritePartition(AllocaInst &AI, AllocaSlices &S,
AllocaSlices::iterator B, AllocaSlices::iterator E,
int64_t BeginOffset, int64_t EndOffset,
ArrayRef<AllocaSlices::iterator> SplitUses);
bool splitAlloca(AllocaInst &AI, AllocaSlices &S);
bool runOnAlloca(AllocaInst &AI);
void clobberUse(Use &U);
void deleteDeadInstructions(SmallPtrSet<AllocaInst *, 4> &DeletedAllocas);
bool promoteAllocas(Function &F);
};
}
char SROA::ID = 0;
FunctionPass *llvm::createSROAPass(bool RequiresDomTree) {
return new SROA(RequiresDomTree);
}
INITIALIZE_PASS_BEGIN(SROA, "sroa", "Scalar Replacement Of Aggregates",
false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_END(SROA, "sroa", "Scalar Replacement Of Aggregates",
false, false)
static Type *findCommonType(AllocaSlices::const_iterator B,
AllocaSlices::const_iterator E,
uint64_t EndOffset) {
Type *Ty = nullptr;
bool TyIsCommon = true;
IntegerType *ITy = nullptr;
for (AllocaSlices::const_iterator I = B; I != E; ++I) {
Use *U = I->getUse();
if (isa<IntrinsicInst>(*U->getUser()))
continue;
if (I->beginOffset() != B->beginOffset() || I->endOffset() != EndOffset)
continue;
Type *UserTy = nullptr;
if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
UserTy = LI->getType();
} else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
UserTy = SI->getValueOperand()->getType();
}
if (IntegerType *UserITy = dyn_cast_or_null<IntegerType>(UserTy)) {
if (UserITy->getBitWidth() % 8 != 0 ||
UserITy->getBitWidth() / 8 > (EndOffset - B->beginOffset()))
continue;
if (!ITy || ITy->getBitWidth() < UserITy->getBitWidth())
ITy = UserITy;
}
if (!UserTy || (Ty && Ty != UserTy))
TyIsCommon = false; else
Ty = UserTy;
}
return TyIsCommon ? Ty : ITy;
}
static bool isSafePHIToSpeculate(PHINode &PN,
const DataLayout *DL = nullptr) {
BasicBlock *BB = PN.getParent();
unsigned MaxAlign = 0;
bool HaveLoad = false;
for (User *U : PN.users()) {
LoadInst *LI = dyn_cast<LoadInst>(U);
if (!LI || !LI->isSimple())
return false;
if (LI->getParent() != BB)
return false;
for (BasicBlock::iterator BBI = &PN; &*BBI != LI; ++BBI)
if (BBI->mayWriteToMemory())
return false;
MaxAlign = std::max(MaxAlign, LI->getAlignment());
HaveLoad = true;
}
if (!HaveLoad)
return false;
for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) {
TerminatorInst *TI = PN.getIncomingBlock(Idx)->getTerminator();
Value *InVal = PN.getIncomingValue(Idx);
if (TI == InVal || TI->mayHaveSideEffects())
return false;
if (TI->getNumSuccessors() == 1)
continue;
if (InVal->isDereferenceablePointer(DL) ||
isSafeToLoadUnconditionally(InVal, TI, MaxAlign, DL))
continue;
return false;
}
return true;
}
static void speculatePHINodeLoads(PHINode &PN) {
DEBUG(dbgs() << " original: " << PN << "\n");
Type *LoadTy = cast<PointerType>(PN.getType())->getElementType();
IRBuilderTy PHIBuilder(&PN);
PHINode *NewPN = PHIBuilder.CreatePHI(LoadTy, PN.getNumIncomingValues(),
PN.getName() + ".sroa.speculated");
LoadInst *SomeLoad = cast<LoadInst>(PN.user_back());
AAMDNodes AATags;
SomeLoad->getAAMetadata(AATags);
unsigned Align = SomeLoad->getAlignment();
while (!PN.use_empty()) {
LoadInst *LI = cast<LoadInst>(PN.user_back());
LI->replaceAllUsesWith(NewPN);
LI->eraseFromParent();
}
for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) {
BasicBlock *Pred = PN.getIncomingBlock(Idx);
TerminatorInst *TI = Pred->getTerminator();
Value *InVal = PN.getIncomingValue(Idx);
IRBuilderTy PredBuilder(TI);
LoadInst *Load = PredBuilder.CreateLoad(
InVal, (PN.getName() + ".sroa.speculate.load." + Pred->getName()));
++NumLoadsSpeculated;
Load->setAlignment(Align);
if (AATags)
Load->setAAMetadata(AATags);
NewPN->addIncoming(Load, Pred);
}
DEBUG(dbgs() << " speculated to: " << *NewPN << "\n");
PN.eraseFromParent();
}
static bool isSafeSelectToSpeculate(SelectInst &SI,
const DataLayout *DL = nullptr) {
Value *TValue = SI.getTrueValue();
Value *FValue = SI.getFalseValue();
bool TDerefable = TValue->isDereferenceablePointer(DL);
bool FDerefable = FValue->isDereferenceablePointer(DL);
for (User *U : SI.users()) {
LoadInst *LI = dyn_cast<LoadInst>(U);
if (!LI || !LI->isSimple())
return false;
if (!TDerefable &&
!isSafeToLoadUnconditionally(TValue, LI, LI->getAlignment(), DL))
return false;
if (!FDerefable &&
!isSafeToLoadUnconditionally(FValue, LI, LI->getAlignment(), DL))
return false;
}
return true;
}
static void speculateSelectInstLoads(SelectInst &SI) {
DEBUG(dbgs() << " original: " << SI << "\n");
IRBuilderTy IRB(&SI);
Value *TV = SI.getTrueValue();
Value *FV = SI.getFalseValue();
while (!SI.use_empty()) {
LoadInst *LI = cast<LoadInst>(SI.user_back());
assert(LI->isSimple() && "We only speculate simple loads");
IRB.SetInsertPoint(LI);
LoadInst *TL =
IRB.CreateLoad(TV, LI->getName() + ".sroa.speculate.load.true");
LoadInst *FL =
IRB.CreateLoad(FV, LI->getName() + ".sroa.speculate.load.false");
NumLoadsSpeculated += 2;
TL->setAlignment(LI->getAlignment());
FL->setAlignment(LI->getAlignment());
AAMDNodes Tags;
LI->getAAMetadata(Tags);
if (Tags) {
TL->setAAMetadata(Tags);
FL->setAAMetadata(Tags);
}
Value *V = IRB.CreateSelect(SI.getCondition(), TL, FL,
LI->getName() + ".sroa.speculated");
DEBUG(dbgs() << " speculated to: " << *V << "\n");
LI->replaceAllUsesWith(V);
LI->eraseFromParent();
}
SI.eraseFromParent();
}
static Value *buildGEP(IRBuilderTy &IRB, Value *BasePtr,
SmallVectorImpl<Value *> &Indices, Twine NamePrefix) {
if (Indices.empty())
return BasePtr;
if (Indices.size() == 1 && cast<ConstantInt>(Indices.back())->isZero())
return BasePtr;
return IRB.CreateInBoundsGEP(BasePtr, Indices, NamePrefix + "sroa_idx");
}
static Value *getNaturalGEPWithType(IRBuilderTy &IRB, const DataLayout &DL,
Value *BasePtr, Type *Ty, Type *TargetTy,
SmallVectorImpl<Value *> &Indices,
Twine NamePrefix) {
if (Ty == TargetTy)
return buildGEP(IRB, BasePtr, Indices, NamePrefix);
unsigned PtrSize = DL.getPointerTypeSizeInBits(BasePtr->getType());
unsigned NumLayers = 0;
Type *ElementTy = Ty;
do {
if (ElementTy->isPointerTy())
break;
if (ArrayType *ArrayTy = dyn_cast<ArrayType>(ElementTy)) {
ElementTy = ArrayTy->getElementType();
Indices.push_back(IRB.getIntN(PtrSize, 0));
} else if (VectorType *VectorTy = dyn_cast<VectorType>(ElementTy)) {
ElementTy = VectorTy->getElementType();
Indices.push_back(IRB.getInt32(0));
} else if (StructType *STy = dyn_cast<StructType>(ElementTy)) {
if (STy->element_begin() == STy->element_end())
break; ElementTy = *STy->element_begin();
Indices.push_back(IRB.getInt32(0));
} else {
break;
}
++NumLayers;
} while (ElementTy != TargetTy);
if (ElementTy != TargetTy)
Indices.erase(Indices.end() - NumLayers, Indices.end());
return buildGEP(IRB, BasePtr, Indices, NamePrefix);
}
static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL,
Value *Ptr, Type *Ty, APInt &Offset,
Type *TargetTy,
SmallVectorImpl<Value *> &Indices,
Twine NamePrefix) {
if (Offset == 0)
return getNaturalGEPWithType(IRB, DL, Ptr, Ty, TargetTy, Indices, NamePrefix);
if (Ty->isPointerTy())
return nullptr;
if (VectorType *VecTy = dyn_cast<VectorType>(Ty)) {
unsigned ElementSizeInBits = DL.getTypeSizeInBits(VecTy->getScalarType());
if (ElementSizeInBits % 8 != 0) {
return nullptr;
}
APInt ElementSize(Offset.getBitWidth(), ElementSizeInBits / 8);
APInt NumSkippedElements = Offset.sdiv(ElementSize);
if (NumSkippedElements.ugt(VecTy->getNumElements()))
return nullptr;
Offset -= NumSkippedElements * ElementSize;
Indices.push_back(IRB.getInt(NumSkippedElements));
return getNaturalGEPRecursively(IRB, DL, Ptr, VecTy->getElementType(),
Offset, TargetTy, Indices, NamePrefix);
}
if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {
Type *ElementTy = ArrTy->getElementType();
APInt ElementSize(Offset.getBitWidth(), DL.getTypeAllocSize(ElementTy));
APInt NumSkippedElements = Offset.sdiv(ElementSize);
if (NumSkippedElements.ugt(ArrTy->getNumElements()))
return nullptr;
Offset -= NumSkippedElements * ElementSize;
Indices.push_back(IRB.getInt(NumSkippedElements));
return getNaturalGEPRecursively(IRB, DL, Ptr, ElementTy, Offset, TargetTy,
Indices, NamePrefix);
}
StructType *STy = dyn_cast<StructType>(Ty);
if (!STy)
return nullptr;
const StructLayout *SL = DL.getStructLayout(STy);
uint64_t StructOffset = Offset.getZExtValue();
if (StructOffset >= SL->getSizeInBytes())
return nullptr;
unsigned Index = SL->getElementContainingOffset(StructOffset);
Offset -= APInt(Offset.getBitWidth(), SL->getElementOffset(Index));
Type *ElementTy = STy->getElementType(Index);
if (Offset.uge(DL.getTypeAllocSize(ElementTy)))
return nullptr;
Indices.push_back(IRB.getInt32(Index));
return getNaturalGEPRecursively(IRB, DL, Ptr, ElementTy, Offset, TargetTy,
Indices, NamePrefix);
}
static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &DL,
Value *Ptr, APInt Offset, Type *TargetTy,
SmallVectorImpl<Value *> &Indices,
Twine NamePrefix) {
PointerType *Ty = cast<PointerType>(Ptr->getType());
if (Ty == IRB.getInt8PtrTy(Ty->getAddressSpace()) && TargetTy->isIntegerTy(8))
return nullptr;
Type *ElementTy = Ty->getElementType();
if (!ElementTy->isSized())
return nullptr; APInt ElementSize(Offset.getBitWidth(), DL.getTypeAllocSize(ElementTy));
if (ElementSize == 0)
return nullptr; APInt NumSkippedElements = Offset.sdiv(ElementSize);
Offset -= NumSkippedElements * ElementSize;
Indices.push_back(IRB.getInt(NumSkippedElements));
return getNaturalGEPRecursively(IRB, DL, Ptr, ElementTy, Offset, TargetTy,
Indices, NamePrefix);
}
static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr,
APInt Offset, Type *PointerTy,
Twine NamePrefix) {
SmallPtrSet<Value *, 4> Visited;
Visited.insert(Ptr);
SmallVector<Value *, 4> Indices;
Value *OffsetPtr = nullptr;
Value *Int8Ptr = nullptr;
APInt Int8PtrOffset(Offset.getBitWidth(), 0);
Type *TargetTy = PointerTy->getPointerElementType();
do {
while (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) {
APInt GEPOffset(Offset.getBitWidth(), 0);
if (!GEP->accumulateConstantOffset(DL, GEPOffset))
break;
Offset += GEPOffset;
Ptr = GEP->getPointerOperand();
if (!Visited.insert(Ptr))
break;
}
Indices.clear();
if (Value *P = getNaturalGEPWithOffset(IRB, DL, Ptr, Offset, TargetTy,
Indices, NamePrefix)) {
if (P->getType() == PointerTy) {
if (OffsetPtr && OffsetPtr->use_empty())
if (Instruction *I = dyn_cast<Instruction>(OffsetPtr))
I->eraseFromParent();
return P;
}
if (!OffsetPtr) {
OffsetPtr = P;
}
}
if (Ptr->getType()->isIntegerTy(8)) {
Int8Ptr = Ptr;
Int8PtrOffset = Offset;
}
if (Operator::getOpcode(Ptr) == Instruction::BitCast) {
Ptr = cast<Operator>(Ptr)->getOperand(0);
} else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(Ptr)) {
if (GA->mayBeOverridden())
break;
Ptr = GA->getAliasee();
} else {
break;
}
assert(Ptr->getType()->isPointerTy() && "Unexpected operand type!");
} while (Visited.insert(Ptr));
if (!OffsetPtr) {
if (!Int8Ptr) {
Int8Ptr = IRB.CreateBitCast(
Ptr, IRB.getInt8PtrTy(PointerTy->getPointerAddressSpace()),
NamePrefix + "sroa_raw_cast");
Int8PtrOffset = Offset;
}
OffsetPtr = Int8PtrOffset == 0 ? Int8Ptr :
IRB.CreateInBoundsGEP(Int8Ptr, IRB.getInt(Int8PtrOffset),
NamePrefix + "sroa_raw_idx");
}
Ptr = OffsetPtr;
if (Ptr->getType() != PointerTy)
Ptr = IRB.CreateBitCast(Ptr, PointerTy, NamePrefix + "sroa_cast");
return Ptr;
}
static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) {
if (OldTy == NewTy)
return true;
if (IntegerType *OldITy = dyn_cast<IntegerType>(OldTy))
if (IntegerType *NewITy = dyn_cast<IntegerType>(NewTy))
if (NewITy->getBitWidth() >= OldITy->getBitWidth())
return true;
if (DL.getTypeSizeInBits(NewTy) != DL.getTypeSizeInBits(OldTy))
return false;
if (!NewTy->isSingleValueType() || !OldTy->isSingleValueType())
return false;
OldTy = OldTy->getScalarType();
NewTy = NewTy->getScalarType();
if (NewTy->isPointerTy() || OldTy->isPointerTy()) {
if (NewTy->isPointerTy() && OldTy->isPointerTy())
return true;
if (NewTy->isIntegerTy() || OldTy->isIntegerTy())
return true;
return false;
}
return true;
}
static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
Type *NewTy) {
Type *OldTy = V->getType();
assert(canConvertValue(DL, OldTy, NewTy) && "Value not convertable to type");
if (OldTy == NewTy)
return V;
if (IntegerType *OldITy = dyn_cast<IntegerType>(OldTy))
if (IntegerType *NewITy = dyn_cast<IntegerType>(NewTy))
if (NewITy->getBitWidth() > OldITy->getBitWidth())
return IRB.CreateZExt(V, NewITy);
if (OldTy->getScalarType()->isIntegerTy() &&
NewTy->getScalarType()->isPointerTy()) {
if (OldTy->isVectorTy() && !NewTy->isVectorTy())
return IRB.CreateIntToPtr(IRB.CreateBitCast(V, DL.getIntPtrType(NewTy)),
NewTy);
if (!OldTy->isVectorTy() && NewTy->isVectorTy())
return IRB.CreateIntToPtr(IRB.CreateBitCast(V, DL.getIntPtrType(NewTy)),
NewTy);
return IRB.CreateIntToPtr(V, NewTy);
}
if (OldTy->getScalarType()->isPointerTy() &&
NewTy->getScalarType()->isIntegerTy()) {
if (OldTy->isVectorTy() && !NewTy->isVectorTy())
return IRB.CreateBitCast(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)),
NewTy);
if (!OldTy->isVectorTy() && NewTy->isVectorTy())
return IRB.CreateBitCast(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)),
NewTy);
return IRB.CreatePtrToInt(V, NewTy);
}
return IRB.CreateBitCast(V, NewTy);
}
static bool isVectorPromotionViableForSlice(
const DataLayout &DL, AllocaSlices &S, uint64_t SliceBeginOffset,
uint64_t SliceEndOffset, VectorType *Ty, uint64_t ElementSize,
AllocaSlices::const_iterator I) {
uint64_t BeginOffset =
std::max(I->beginOffset(), SliceBeginOffset) - SliceBeginOffset;
uint64_t BeginIndex = BeginOffset / ElementSize;
if (BeginIndex * ElementSize != BeginOffset ||
BeginIndex >= Ty->getNumElements())
return false;
uint64_t EndOffset =
std::min(I->endOffset(), SliceEndOffset) - SliceBeginOffset;
uint64_t EndIndex = EndOffset / ElementSize;
if (EndIndex * ElementSize != EndOffset || EndIndex > Ty->getNumElements())
return false;
assert(EndIndex > BeginIndex && "Empty vector!");
uint64_t NumElements = EndIndex - BeginIndex;
Type *SliceTy =
(NumElements == 1) ? Ty->getElementType()
: VectorType::get(Ty->getElementType(), NumElements);
Type *SplitIntTy =
Type::getIntNTy(Ty->getContext(), NumElements * ElementSize * 8);
Use *U = I->getUse();
if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) {
if (MI->isVolatile())
return false;
if (!I->isSplittable())
return false; } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
II->getIntrinsicID() != Intrinsic::lifetime_end)
return false;
} else if (U->get()->getType()->getPointerElementType()->isStructTy()) {
return false;
} else if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
if (LI->isVolatile())
return false;
Type *LTy = LI->getType();
if (SliceBeginOffset > I->beginOffset() ||
SliceEndOffset < I->endOffset()) {
assert(LTy->isIntegerTy());
LTy = SplitIntTy;
}
if (!canConvertValue(DL, SliceTy, LTy))
return false;
} else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
if (SI->isVolatile())
return false;
Type *STy = SI->getValueOperand()->getType();
if (SliceBeginOffset > I->beginOffset() ||
SliceEndOffset < I->endOffset()) {
assert(STy->isIntegerTy());
STy = SplitIntTy;
}
if (!canConvertValue(DL, STy, SliceTy))
return false;
} else {
return false;
}
return true;
}
static bool
isVectorPromotionViable(const DataLayout &DL, Type *AllocaTy, AllocaSlices &S,
uint64_t SliceBeginOffset, uint64_t SliceEndOffset,
AllocaSlices::const_iterator I,
AllocaSlices::const_iterator E,
ArrayRef<AllocaSlices::iterator> SplitUses) {
VectorType *Ty = dyn_cast<VectorType>(AllocaTy);
if (!Ty)
return false;
uint64_t ElementSize = DL.getTypeSizeInBits(Ty->getScalarType());
if (ElementSize % 8)
return false;
assert((DL.getTypeSizeInBits(Ty) % 8) == 0 &&
"vector size not a multiple of element size?");
ElementSize /= 8;
for (; I != E; ++I)
if (!isVectorPromotionViableForSlice(DL, S, SliceBeginOffset,
SliceEndOffset, Ty, ElementSize, I))
return false;
for (ArrayRef<AllocaSlices::iterator>::const_iterator SUI = SplitUses.begin(),
SUE = SplitUses.end();
SUI != SUE; ++SUI)
if (!isVectorPromotionViableForSlice(DL, S, SliceBeginOffset,
SliceEndOffset, Ty, ElementSize, *SUI))
return false;
return true;
}
static bool isIntegerWideningViableForSlice(const DataLayout &DL,
Type *AllocaTy,
uint64_t AllocBeginOffset,
uint64_t Size, AllocaSlices &S,
AllocaSlices::const_iterator I,
bool &WholeAllocaOp) {
uint64_t RelBegin = I->beginOffset() - AllocBeginOffset;
uint64_t RelEnd = I->endOffset() - AllocBeginOffset;
if (RelEnd > Size)
return false;
Use *U = I->getUse();
if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
if (LI->isVolatile())
return false;
if (RelBegin == 0 && RelEnd == Size)
WholeAllocaOp = true;
if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) {
if (ITy->getBitWidth() < DL.getTypeStoreSizeInBits(ITy))
return false;
} else if (RelBegin != 0 || RelEnd != Size ||
!canConvertValue(DL, AllocaTy, LI->getType())) {
return false;
}
} else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
Type *ValueTy = SI->getValueOperand()->getType();
if (SI->isVolatile())
return false;
if (RelBegin == 0 && RelEnd == Size)
WholeAllocaOp = true;
if (IntegerType *ITy = dyn_cast<IntegerType>(ValueTy)) {
if (ITy->getBitWidth() < DL.getTypeStoreSizeInBits(ITy))
return false;
} else if (RelBegin != 0 || RelEnd != Size ||
!canConvertValue(DL, ValueTy, AllocaTy)) {
return false;
}
} else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) {
if (MI->isVolatile() || !isa<Constant>(MI->getLength()))
return false;
if (!I->isSplittable())
return false; } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
II->getIntrinsicID() != Intrinsic::lifetime_end)
return false;
} else {
return false;
}
return true;
}
static bool
isIntegerWideningViable(const DataLayout &DL, Type *AllocaTy,
uint64_t AllocBeginOffset, AllocaSlices &S,
AllocaSlices::const_iterator I,
AllocaSlices::const_iterator E,
ArrayRef<AllocaSlices::iterator> SplitUses) {
uint64_t SizeInBits = DL.getTypeSizeInBits(AllocaTy);
if (SizeInBits > IntegerType::MAX_INT_BITS)
return false;
if (SizeInBits != DL.getTypeStoreSizeInBits(AllocaTy))
return false;
Type *IntTy = Type::getIntNTy(AllocaTy->getContext(), SizeInBits);
if (!canConvertValue(DL, AllocaTy, IntTy) ||
!canConvertValue(DL, IntTy, AllocaTy))
return false;
uint64_t Size = DL.getTypeStoreSize(AllocaTy);
bool WholeAllocaOp = (I != E) ? false : DL.isLegalInteger(SizeInBits);
for (; I != E; ++I)
if (!isIntegerWideningViableForSlice(DL, AllocaTy, AllocBeginOffset, Size,
S, I, WholeAllocaOp))
return false;
for (ArrayRef<AllocaSlices::iterator>::const_iterator SUI = SplitUses.begin(),
SUE = SplitUses.end();
SUI != SUE; ++SUI)
if (!isIntegerWideningViableForSlice(DL, AllocaTy, AllocBeginOffset, Size,
S, *SUI, WholeAllocaOp))
return false;
return WholeAllocaOp;
}
static Value *extractInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
IntegerType *Ty, uint64_t Offset,
const Twine &Name) {
DEBUG(dbgs() << " start: " << *V << "\n");
IntegerType *IntTy = cast<IntegerType>(V->getType());
assert(DL.getTypeStoreSize(Ty) + Offset <= DL.getTypeStoreSize(IntTy) &&
"Element extends past full value");
uint64_t ShAmt = 8*Offset;
if (DL.isBigEndian())
ShAmt = 8*(DL.getTypeStoreSize(IntTy) - DL.getTypeStoreSize(Ty) - Offset);
if (ShAmt) {
V = IRB.CreateLShr(V, ShAmt, Name + ".shift");
DEBUG(dbgs() << " shifted: " << *V << "\n");
}
assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
"Cannot extract to a larger integer!");
if (Ty != IntTy) {
V = IRB.CreateTrunc(V, Ty, Name + ".trunc");
DEBUG(dbgs() << " trunced: " << *V << "\n");
}
return V;
}
static Value *insertInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *Old,
Value *V, uint64_t Offset, const Twine &Name) {
IntegerType *IntTy = cast<IntegerType>(Old->getType());
IntegerType *Ty = cast<IntegerType>(V->getType());
assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
"Cannot insert a larger integer!");
DEBUG(dbgs() << " start: " << *V << "\n");
if (Ty != IntTy) {
V = IRB.CreateZExt(V, IntTy, Name + ".ext");
DEBUG(dbgs() << " extended: " << *V << "\n");
}
assert(DL.getTypeStoreSize(Ty) + Offset <= DL.getTypeStoreSize(IntTy) &&
"Element store outside of alloca store");
uint64_t ShAmt = 8*Offset;
if (DL.isBigEndian())
ShAmt = 8*(DL.getTypeStoreSize(IntTy) - DL.getTypeStoreSize(Ty) - Offset);
if (ShAmt) {
V = IRB.CreateShl(V, ShAmt, Name + ".shift");
DEBUG(dbgs() << " shifted: " << *V << "\n");
}
if (ShAmt || Ty->getBitWidth() < IntTy->getBitWidth()) {
APInt Mask = ~Ty->getMask().zext(IntTy->getBitWidth()).shl(ShAmt);
Old = IRB.CreateAnd(Old, Mask, Name + ".mask");
DEBUG(dbgs() << " masked: " << *Old << "\n");
V = IRB.CreateOr(Old, V, Name + ".insert");
DEBUG(dbgs() << " inserted: " << *V << "\n");
}
return V;
}
static Value *extractVector(IRBuilderTy &IRB, Value *V,
unsigned BeginIndex, unsigned EndIndex,
const Twine &Name) {
VectorType *VecTy = cast<VectorType>(V->getType());
unsigned NumElements = EndIndex - BeginIndex;
assert(NumElements <= VecTy->getNumElements() && "Too many elements!");
if (NumElements == VecTy->getNumElements())
return V;
if (NumElements == 1) {
V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex),
Name + ".extract");
DEBUG(dbgs() << " extract: " << *V << "\n");
return V;
}
SmallVector<Constant*, 8> Mask;
Mask.reserve(NumElements);
for (unsigned i = BeginIndex; i != EndIndex; ++i)
Mask.push_back(IRB.getInt32(i));
V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()),
ConstantVector::get(Mask),
Name + ".extract");
DEBUG(dbgs() << " shuffle: " << *V << "\n");
return V;
}
static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V,
unsigned BeginIndex, const Twine &Name) {
VectorType *VecTy = cast<VectorType>(Old->getType());
assert(VecTy && "Can only insert a vector into a vector");
VectorType *Ty = dyn_cast<VectorType>(V->getType());
if (!Ty) {
V = IRB.CreateInsertElement(Old, V, IRB.getInt32(BeginIndex),
Name + ".insert");
DEBUG(dbgs() << " insert: " << *V << "\n");
return V;
}
assert(Ty->getNumElements() <= VecTy->getNumElements() &&
"Too many elements!");
if (Ty->getNumElements() == VecTy->getNumElements()) {
assert(V->getType() == VecTy && "Vector type mismatch");
return V;
}
unsigned EndIndex = BeginIndex + Ty->getNumElements();
SmallVector<Constant*, 8> Mask;
Mask.reserve(VecTy->getNumElements());
for (unsigned i = 0; i != VecTy->getNumElements(); ++i)
if (i >= BeginIndex && i < EndIndex)
Mask.push_back(IRB.getInt32(i - BeginIndex));
else
Mask.push_back(UndefValue::get(IRB.getInt32Ty()));
V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()),
ConstantVector::get(Mask),
Name + ".expand");
DEBUG(dbgs() << " shuffle: " << *V << "\n");
Mask.clear();
for (unsigned i = 0; i != VecTy->getNumElements(); ++i)
Mask.push_back(IRB.getInt1(i >= BeginIndex && i < EndIndex));
V = IRB.CreateSelect(ConstantVector::get(Mask), V, Old, Name + "blend");
DEBUG(dbgs() << " blend: " << *V << "\n");
return V;
}
namespace {
class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
friend class llvm::InstVisitor<AllocaSliceRewriter, bool>;
typedef llvm::InstVisitor<AllocaSliceRewriter, bool> Base;
const DataLayout &DL;
AllocaSlices &S;
SROA &Pass;
AllocaInst &OldAI, &NewAI;
const uint64_t NewAllocaBeginOffset, NewAllocaEndOffset;
Type *NewAllocaTy;
VectorType *VecTy;
Type *ElementTy;
uint64_t ElementSize;
IntegerType *IntTy;
uint64_t BeginOffset, EndOffset;
uint64_t NewBeginOffset, NewEndOffset;
uint64_t SliceSize;
bool IsSplittable;
bool IsSplit;
Use *OldUse;
Instruction *OldPtr;
SmallPtrSetImpl<PHINode *> &PHIUsers;
SmallPtrSetImpl<SelectInst *> &SelectUsers;
IRBuilderTy IRB;
public:
AllocaSliceRewriter(const DataLayout &DL, AllocaSlices &S, SROA &Pass,
AllocaInst &OldAI, AllocaInst &NewAI,
uint64_t NewAllocaBeginOffset,
uint64_t NewAllocaEndOffset, bool IsVectorPromotable,
bool IsIntegerPromotable,
SmallPtrSetImpl<PHINode *> &PHIUsers,
SmallPtrSetImpl<SelectInst *> &SelectUsers)
: DL(DL), S(S), Pass(Pass), OldAI(OldAI), NewAI(NewAI),
NewAllocaBeginOffset(NewAllocaBeginOffset),
NewAllocaEndOffset(NewAllocaEndOffset),
NewAllocaTy(NewAI.getAllocatedType()),
VecTy(IsVectorPromotable ? cast<VectorType>(NewAllocaTy) : nullptr),
ElementTy(VecTy ? VecTy->getElementType() : nullptr),
ElementSize(VecTy ? DL.getTypeSizeInBits(ElementTy) / 8 : 0),
IntTy(IsIntegerPromotable
? Type::getIntNTy(
NewAI.getContext(),
DL.getTypeSizeInBits(NewAI.getAllocatedType()))
: nullptr),
BeginOffset(), EndOffset(), IsSplittable(), IsSplit(), OldUse(),
OldPtr(), PHIUsers(PHIUsers), SelectUsers(SelectUsers),
IRB(NewAI.getContext(), ConstantFolder()) {
if (VecTy) {
assert((DL.getTypeSizeInBits(ElementTy) % 8) == 0 &&
"Only multiple-of-8 sized vector elements are viable");
++NumVectorized;
}
assert((!IsVectorPromotable && !IsIntegerPromotable) ||
IsVectorPromotable != IsIntegerPromotable);
}
bool visit(AllocaSlices::const_iterator I) {
bool CanSROA = true;
BeginOffset = I->beginOffset();
EndOffset = I->endOffset();
IsSplittable = I->isSplittable();
IsSplit =
BeginOffset < NewAllocaBeginOffset || EndOffset > NewAllocaEndOffset;
assert(BeginOffset < NewAllocaEndOffset);
assert(EndOffset > NewAllocaBeginOffset);
NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset);
NewEndOffset = std::min(EndOffset, NewAllocaEndOffset);
SliceSize = NewEndOffset - NewBeginOffset;
OldUse = I->getUse();
OldPtr = cast<Instruction>(OldUse->get());
Instruction *OldUserI = cast<Instruction>(OldUse->getUser());
IRB.SetInsertPoint(OldUserI);
IRB.SetCurrentDebugLocation(OldUserI->getDebugLoc());
IRB.SetNamePrefix(Twine(NewAI.getName()) + "." + Twine(BeginOffset) + ".");
CanSROA &= visit(cast<Instruction>(OldUse->getUser()));
if (VecTy || IntTy)
assert(CanSROA);
return CanSROA;
}
private:
using Base::visit;
bool visitInstruction(Instruction &I) {
DEBUG(dbgs() << " !!!! Cannot rewrite: " << I << "\n");
llvm_unreachable("No rewrite rule for this instruction!");
}
Value *getNewAllocaSlicePtr(IRBuilderTy &IRB, Type *PointerTy) {
assert(IsSplit || BeginOffset == NewBeginOffset);
uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
#ifndef NDEBUG
StringRef OldName = OldPtr->getName();
size_t LastSROAPrefix = OldName.rfind(".sroa.");
if (LastSROAPrefix != StringRef::npos) {
OldName = OldName.substr(LastSROAPrefix + strlen(".sroa."));
size_t IndexEnd = OldName.find_first_not_of("0123456789");
if (IndexEnd != StringRef::npos && OldName[IndexEnd] == '.') {
OldName = OldName.substr(IndexEnd + 1);
size_t OffsetEnd = OldName.find_first_not_of("0123456789");
if (OffsetEnd != StringRef::npos && OldName[OffsetEnd] == '.')
OldName = OldName.substr(OffsetEnd + 1);
}
}
OldName = OldName.substr(0, OldName.find(".sroa_"));
#endif
return getAdjustedPtr(IRB, DL, &NewAI,
APInt(DL.getPointerSizeInBits(), Offset), PointerTy,
#ifndef NDEBUG
Twine(OldName) + "."
#else
Twine()
#endif
);
}
unsigned getSliceAlign(Type *Ty = nullptr) {
unsigned NewAIAlign = NewAI.getAlignment();
if (!NewAIAlign)
NewAIAlign = DL.getABITypeAlignment(NewAI.getAllocatedType());
unsigned Align = MinAlign(NewAIAlign, NewBeginOffset - NewAllocaBeginOffset);
return (Ty && Align == DL.getABITypeAlignment(Ty)) ? 0 : Align;
}
unsigned getIndex(uint64_t Offset) {
assert(VecTy && "Can only call getIndex when rewriting a vector");
uint64_t RelOffset = Offset - NewAllocaBeginOffset;
assert(RelOffset / ElementSize < UINT32_MAX && "Index out of bounds");
uint32_t Index = RelOffset / ElementSize;
assert(Index * ElementSize == RelOffset);
return Index;
}
void deleteIfTriviallyDead(Value *V) {
Instruction *I = cast<Instruction>(V);
if (isInstructionTriviallyDead(I))
Pass.DeadInsts.insert(I);
}
Value *rewriteVectorizedLoadInst() {
unsigned BeginIndex = getIndex(NewBeginOffset);
unsigned EndIndex = getIndex(NewEndOffset);
assert(EndIndex > BeginIndex && "Empty vector!");
Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
"load");
return extractVector(IRB, V, BeginIndex, EndIndex, "vec");
}
Value *rewriteIntegerLoad(LoadInst &LI) {
assert(IntTy && "We cannot insert an integer to the alloca");
assert(!LI.isVolatile());
Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
"load");
V = convertValue(DL, IRB, V, IntTy);
assert(NewBeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
if (Offset > 0 || NewEndOffset < NewAllocaEndOffset)
V = extractInteger(DL, IRB, V, cast<IntegerType>(LI.getType()), Offset,
"extract");
return V;
}
bool visitLoadInst(LoadInst &LI) {
DEBUG(dbgs() << " original: " << LI << "\n");
Value *OldOp = LI.getOperand(0);
assert(OldOp == OldPtr);
Type *TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), SliceSize * 8)
: LI.getType();
bool IsPtrAdjusted = false;
Value *V;
if (VecTy) {
V = rewriteVectorizedLoadInst();
} else if (IntTy && LI.getType()->isIntegerTy()) {
V = rewriteIntegerLoad(LI);
} else if (NewBeginOffset == NewAllocaBeginOffset &&
canConvertValue(DL, NewAllocaTy, LI.getType())) {
V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
LI.isVolatile(), LI.getName());
} else {
Type *LTy = TargetTy->getPointerTo();
V = IRB.CreateAlignedLoad(getNewAllocaSlicePtr(IRB, LTy),
getSliceAlign(TargetTy), LI.isVolatile(),
LI.getName());
IsPtrAdjusted = true;
}
V = convertValue(DL, IRB, V, TargetTy);
if (IsSplit) {
assert(!LI.isVolatile());
assert(LI.getType()->isIntegerTy() &&
"Only integer type loads and stores are split");
assert(SliceSize < DL.getTypeStoreSize(LI.getType()) &&
"Split load isn't smaller than original load");
assert(LI.getType()->getIntegerBitWidth() ==
DL.getTypeStoreSizeInBits(LI.getType()) &&
"Non-byte-multiple bit width");
IRB.SetInsertPoint(std::next(BasicBlock::iterator(&LI)));
Value *Placeholder
= new LoadInst(UndefValue::get(LI.getType()->getPointerTo()));
V = insertInteger(DL, IRB, Placeholder, V, NewBeginOffset,
"insert");
LI.replaceAllUsesWith(V);
Placeholder->replaceAllUsesWith(&LI);
delete Placeholder;
} else {
LI.replaceAllUsesWith(V);
}
Pass.DeadInsts.insert(&LI);
deleteIfTriviallyDead(OldOp);
DEBUG(dbgs() << " to: " << *V << "\n");
return !LI.isVolatile() && !IsPtrAdjusted;
}
bool rewriteVectorizedStoreInst(Value *V, StoreInst &SI, Value *OldOp) {
if (V->getType() != VecTy) {
unsigned BeginIndex = getIndex(NewBeginOffset);
unsigned EndIndex = getIndex(NewEndOffset);
assert(EndIndex > BeginIndex && "Empty vector!");
unsigned NumElements = EndIndex - BeginIndex;
assert(NumElements <= VecTy->getNumElements() && "Too many elements!");
Type *SliceTy =
(NumElements == 1) ? ElementTy
: VectorType::get(ElementTy, NumElements);
if (V->getType() != SliceTy)
V = convertValue(DL, IRB, V, SliceTy);
Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
"load");
V = insertVector(IRB, Old, V, BeginIndex, "vec");
}
StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
Pass.DeadInsts.insert(&SI);
(void)Store;
DEBUG(dbgs() << " to: " << *Store << "\n");
return true;
}
bool rewriteIntegerStore(Value *V, StoreInst &SI) {
assert(IntTy && "We cannot extract an integer from the alloca");
assert(!SI.isVolatile());
if (DL.getTypeSizeInBits(V->getType()) != IntTy->getBitWidth()) {
Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
"oldload");
Old = convertValue(DL, IRB, Old, IntTy);
assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
V = insertInteger(DL, IRB, Old, SI.getValueOperand(), Offset,
"insert");
}
V = convertValue(DL, IRB, V, NewAllocaTy);
StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
Pass.DeadInsts.insert(&SI);
(void)Store;
DEBUG(dbgs() << " to: " << *Store << "\n");
return true;
}
bool visitStoreInst(StoreInst &SI) {
DEBUG(dbgs() << " original: " << SI << "\n");
Value *OldOp = SI.getOperand(1);
assert(OldOp == OldPtr);
Value *V = SI.getValueOperand();
if (V->getType()->isPointerTy())
if (AllocaInst *AI = dyn_cast<AllocaInst>(V->stripInBoundsOffsets()))
Pass.PostPromotionWorklist.insert(AI);
if (SliceSize < DL.getTypeStoreSize(V->getType())) {
assert(!SI.isVolatile());
assert(V->getType()->isIntegerTy() &&
"Only integer type loads and stores are split");
assert(V->getType()->getIntegerBitWidth() ==
DL.getTypeStoreSizeInBits(V->getType()) &&
"Non-byte-multiple bit width");
IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), SliceSize * 8);
V = extractInteger(DL, IRB, V, NarrowTy, NewBeginOffset,
"extract");
}
if (VecTy)
return rewriteVectorizedStoreInst(V, SI, OldOp);
if (IntTy && V->getType()->isIntegerTy())
return rewriteIntegerStore(V, SI);
StoreInst *NewSI;
if (NewBeginOffset == NewAllocaBeginOffset &&
NewEndOffset == NewAllocaEndOffset &&
canConvertValue(DL, V->getType(), NewAllocaTy)) {
V = convertValue(DL, IRB, V, NewAllocaTy);
NewSI = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(),
SI.isVolatile());
} else {
Value *NewPtr = getNewAllocaSlicePtr(IRB, V->getType()->getPointerTo());
NewSI = IRB.CreateAlignedStore(V, NewPtr, getSliceAlign(V->getType()),
SI.isVolatile());
}
(void)NewSI;
Pass.DeadInsts.insert(&SI);
deleteIfTriviallyDead(OldOp);
DEBUG(dbgs() << " to: " << *NewSI << "\n");
return NewSI->getPointerOperand() == &NewAI && !SI.isVolatile();
}
Value *getIntegerSplat(Value *V, unsigned Size) {
assert(Size > 0 && "Expected a positive number of bytes.");
IntegerType *VTy = cast<IntegerType>(V->getType());
assert(VTy->getBitWidth() == 8 && "Expected an i8 value for the byte");
if (Size == 1)
return V;
Type *SplatIntTy = Type::getIntNTy(VTy->getContext(), Size*8);
V = IRB.CreateMul(IRB.CreateZExt(V, SplatIntTy, "zext"),
ConstantExpr::getUDiv(
Constant::getAllOnesValue(SplatIntTy),
ConstantExpr::getZExt(
Constant::getAllOnesValue(V->getType()),
SplatIntTy)),
"isplat");
return V;
}
Value *getVectorSplat(Value *V, unsigned NumElements) {
V = IRB.CreateVectorSplat(NumElements, V, "vsplat");
DEBUG(dbgs() << " splat: " << *V << "\n");
return V;
}
bool visitMemSetInst(MemSetInst &II) {
DEBUG(dbgs() << " original: " << II << "\n");
assert(II.getRawDest() == OldPtr);
if (!isa<Constant>(II.getLength())) {
assert(!IsSplit);
assert(NewBeginOffset == BeginOffset);
II.setDest(getNewAllocaSlicePtr(IRB, OldPtr->getType()));
Type *CstTy = II.getAlignmentCst()->getType();
II.setAlignment(ConstantInt::get(CstTy, getSliceAlign()));
deleteIfTriviallyDead(OldPtr);
return false;
}
Pass.DeadInsts.insert(&II);
Type *AllocaTy = NewAI.getAllocatedType();
Type *ScalarTy = AllocaTy->getScalarType();
if (!VecTy && !IntTy &&
(BeginOffset > NewAllocaBeginOffset ||
EndOffset < NewAllocaEndOffset ||
SliceSize != DL.getTypeStoreSize(AllocaTy) ||
!AllocaTy->isSingleValueType() ||
!DL.isLegalInteger(DL.getTypeSizeInBits(ScalarTy)) ||
DL.getTypeSizeInBits(ScalarTy)%8 != 0)) {
Type *SizeTy = II.getLength()->getType();
Constant *Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);
CallInst *New = IRB.CreateMemSet(
getNewAllocaSlicePtr(IRB, OldPtr->getType()), II.getValue(), Size,
getSliceAlign(), II.isVolatile());
(void)New;
DEBUG(dbgs() << " to: " << *New << "\n");
return false;
}
Value *V;
if (VecTy) {
assert(ElementTy == ScalarTy);
unsigned BeginIndex = getIndex(NewBeginOffset);
unsigned EndIndex = getIndex(NewEndOffset);
assert(EndIndex > BeginIndex && "Empty vector!");
unsigned NumElements = EndIndex - BeginIndex;
assert(NumElements <= VecTy->getNumElements() && "Too many elements!");
Value *Splat =
getIntegerSplat(II.getValue(), DL.getTypeSizeInBits(ElementTy) / 8);
Splat = convertValue(DL, IRB, Splat, ElementTy);
if (NumElements > 1)
Splat = getVectorSplat(Splat, NumElements);
Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
"oldload");
V = insertVector(IRB, Old, Splat, BeginIndex, "vec");
} else if (IntTy) {
assert(!II.isVolatile());
uint64_t Size = NewEndOffset - NewBeginOffset;
V = getIntegerSplat(II.getValue(), Size);
if (IntTy && (BeginOffset != NewAllocaBeginOffset ||
EndOffset != NewAllocaBeginOffset)) {
Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
"oldload");
Old = convertValue(DL, IRB, Old, IntTy);
uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
V = insertInteger(DL, IRB, Old, V, Offset, "insert");
} else {
assert(V->getType() == IntTy &&
"Wrong type for an alloca wide integer!");
}
V = convertValue(DL, IRB, V, AllocaTy);
} else {
assert(NewBeginOffset == NewAllocaBeginOffset);
assert(NewEndOffset == NewAllocaEndOffset);
V = getIntegerSplat(II.getValue(), DL.getTypeSizeInBits(ScalarTy) / 8);
if (VectorType *AllocaVecTy = dyn_cast<VectorType>(AllocaTy))
V = getVectorSplat(V, AllocaVecTy->getNumElements());
V = convertValue(DL, IRB, V, AllocaTy);
}
Value *New = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(),
II.isVolatile());
(void)New;
DEBUG(dbgs() << " to: " << *New << "\n");
return !II.isVolatile();
}
bool visitMemTransferInst(MemTransferInst &II) {
DEBUG(dbgs() << " original: " << II << "\n");
bool IsDest = &II.getRawDestUse() == OldUse;
assert((IsDest && II.getRawDest() == OldPtr) ||
(!IsDest && II.getRawSource() == OldPtr));
unsigned SliceAlign = getSliceAlign();
if (!IsSplittable) {
Value *AdjustedPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
if (IsDest)
II.setDest(AdjustedPtr);
else
II.setSource(AdjustedPtr);
if (II.getAlignment() > SliceAlign) {
Type *CstTy = II.getAlignmentCst()->getType();
II.setAlignment(
ConstantInt::get(CstTy, MinAlign(II.getAlignment(), SliceAlign)));
}
DEBUG(dbgs() << " to: " << II << "\n");
deleteIfTriviallyDead(OldPtr);
return false;
}
bool EmitMemCpy =
!VecTy && !IntTy &&
(BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset ||
SliceSize != DL.getTypeStoreSize(NewAI.getAllocatedType()) ||
!NewAI.getAllocatedType()->isSingleValueType());
if (EmitMemCpy && &OldAI == &NewAI) {
assert(NewBeginOffset == BeginOffset);
if (NewEndOffset != EndOffset)
II.setLength(ConstantInt::get(II.getLength()->getType(),
NewEndOffset - NewBeginOffset));
return false;
}
Pass.DeadInsts.insert(&II);
Value *OtherPtr = IsDest ? II.getRawSource() : II.getRawDest();
if (AllocaInst *AI
= dyn_cast<AllocaInst>(OtherPtr->stripInBoundsOffsets())) {
assert(AI != &OldAI && AI != &NewAI &&
"Splittable transfers cannot reach the same alloca on both ends.");
Pass.Worklist.insert(AI);
}
Type *OtherPtrTy = OtherPtr->getType();
unsigned OtherAS = OtherPtrTy->getPointerAddressSpace();
unsigned IntPtrWidth = DL.getPointerSizeInBits(OtherAS);
APInt OtherOffset(IntPtrWidth, NewBeginOffset - BeginOffset);
unsigned OtherAlign = MinAlign(II.getAlignment() ? II.getAlignment() : 1,
OtherOffset.zextOrTrunc(64).getZExtValue());
if (EmitMemCpy) {
OtherPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy,
OtherPtr->getName() + ".");
Value *OurPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
Type *SizeTy = II.getLength()->getType();
Constant *Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);
CallInst *New = IRB.CreateMemCpy(
IsDest ? OurPtr : OtherPtr, IsDest ? OtherPtr : OurPtr, Size,
MinAlign(SliceAlign, OtherAlign), II.isVolatile());
(void)New;
DEBUG(dbgs() << " to: " << *New << "\n");
return false;
}
bool IsWholeAlloca = NewBeginOffset == NewAllocaBeginOffset &&
NewEndOffset == NewAllocaEndOffset;
uint64_t Size = NewEndOffset - NewBeginOffset;
unsigned BeginIndex = VecTy ? getIndex(NewBeginOffset) : 0;
unsigned EndIndex = VecTy ? getIndex(NewEndOffset) : 0;
unsigned NumElements = EndIndex - BeginIndex;
IntegerType *SubIntTy
= IntTy ? Type::getIntNTy(IntTy->getContext(), Size*8) : nullptr;
if (VecTy && !IsWholeAlloca) {
if (NumElements == 1)
OtherPtrTy = VecTy->getElementType();
else
OtherPtrTy = VectorType::get(VecTy->getElementType(), NumElements);
OtherPtrTy = OtherPtrTy->getPointerTo(OtherAS);
} else if (IntTy && !IsWholeAlloca) {
OtherPtrTy = SubIntTy->getPointerTo(OtherAS);
} else {
OtherPtrTy = NewAllocaTy->getPointerTo(OtherAS);
}
Value *SrcPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy,
OtherPtr->getName() + ".");
unsigned SrcAlign = OtherAlign;
Value *DstPtr = &NewAI;
unsigned DstAlign = SliceAlign;
if (!IsDest) {
std::swap(SrcPtr, DstPtr);
std::swap(SrcAlign, DstAlign);
}
Value *Src;
if (VecTy && !IsWholeAlloca && !IsDest) {
Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
"load");
Src = extractVector(IRB, Src, BeginIndex, EndIndex, "vec");
} else if (IntTy && !IsWholeAlloca && !IsDest) {
Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
"load");
Src = convertValue(DL, IRB, Src, IntTy);
uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
Src = extractInteger(DL, IRB, Src, SubIntTy, Offset, "extract");
} else {
Src = IRB.CreateAlignedLoad(SrcPtr, SrcAlign, II.isVolatile(),
"copyload");
}
if (VecTy && !IsWholeAlloca && IsDest) {
Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
"oldload");
Src = insertVector(IRB, Old, Src, BeginIndex, "vec");
} else if (IntTy && !IsWholeAlloca && IsDest) {
Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
"oldload");
Old = convertValue(DL, IRB, Old, IntTy);
uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
Src = insertInteger(DL, IRB, Old, Src, Offset, "insert");
Src = convertValue(DL, IRB, Src, NewAllocaTy);
}
StoreInst *Store = cast<StoreInst>(
IRB.CreateAlignedStore(Src, DstPtr, DstAlign, II.isVolatile()));
(void)Store;
DEBUG(dbgs() << " to: " << *Store << "\n");
return !II.isVolatile();
}
bool visitIntrinsicInst(IntrinsicInst &II) {
assert(II.getIntrinsicID() == Intrinsic::lifetime_start ||
II.getIntrinsicID() == Intrinsic::lifetime_end);
DEBUG(dbgs() << " original: " << II << "\n");
assert(II.getArgOperand(1) == OldPtr);
Pass.DeadInsts.insert(&II);
ConstantInt *Size
= ConstantInt::get(cast<IntegerType>(II.getArgOperand(0)->getType()),
NewEndOffset - NewBeginOffset);
Value *Ptr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
Value *New;
if (II.getIntrinsicID() == Intrinsic::lifetime_start)
New = IRB.CreateLifetimeStart(Ptr, Size);
else
New = IRB.CreateLifetimeEnd(Ptr, Size);
(void)New;
DEBUG(dbgs() << " to: " << *New << "\n");
return true;
}
bool visitPHINode(PHINode &PN) {
DEBUG(dbgs() << " original: " << PN << "\n");
assert(BeginOffset >= NewAllocaBeginOffset && "PHIs are unsplittable");
assert(EndOffset <= NewAllocaEndOffset && "PHIs are unsplittable");
IRBuilderTy PtrBuilder(IRB);
if (isa<PHINode>(OldPtr))
PtrBuilder.SetInsertPoint(OldPtr->getParent()->getFirstInsertionPt());
else
PtrBuilder.SetInsertPoint(OldPtr);
PtrBuilder.SetCurrentDebugLocation(OldPtr->getDebugLoc());
Value *NewPtr = getNewAllocaSlicePtr(PtrBuilder, OldPtr->getType());
std::replace(PN.op_begin(), PN.op_end(), cast<Value>(OldPtr), NewPtr);
DEBUG(dbgs() << " to: " << PN << "\n");
deleteIfTriviallyDead(OldPtr);
PHIUsers.insert(&PN);
return true;
}
bool visitSelectInst(SelectInst &SI) {
DEBUG(dbgs() << " original: " << SI << "\n");
assert((SI.getTrueValue() == OldPtr || SI.getFalseValue() == OldPtr) &&
"Pointer isn't an operand!");
assert(BeginOffset >= NewAllocaBeginOffset && "Selects are unsplittable");
assert(EndOffset <= NewAllocaEndOffset && "Selects are unsplittable");
Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
if (SI.getOperand(1) == OldPtr)
SI.setOperand(1, NewPtr);
if (SI.getOperand(2) == OldPtr)
SI.setOperand(2, NewPtr);
DEBUG(dbgs() << " to: " << SI << "\n");
deleteIfTriviallyDead(OldPtr);
SelectUsers.insert(&SI);
return true;
}
};
}
namespace {
class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
friend class llvm::InstVisitor<AggLoadStoreRewriter, bool>;
const DataLayout &DL;
SmallVector<Use *, 8> Queue;
SmallPtrSet<User *, 8> Visited;
Use *U;
public:
AggLoadStoreRewriter(const DataLayout &DL) : DL(DL) {}
bool rewrite(Instruction &I) {
DEBUG(dbgs() << " Rewriting FCA loads and stores...\n");
enqueueUsers(I);
bool Changed = false;
while (!Queue.empty()) {
U = Queue.pop_back_val();
Changed |= visit(cast<Instruction>(U->getUser()));
}
return Changed;
}
private:
void enqueueUsers(Instruction &I) {
for (Use &U : I.uses())
if (Visited.insert(U.getUser()))
Queue.push_back(&U);
}
bool visitInstruction(Instruction &I) { return false; }
template <typename Derived>
class OpSplitter {
protected:
IRBuilderTy IRB;
SmallVector<unsigned, 4> Indices;
SmallVector<Value *, 4> GEPIndices;
Value *Ptr;
OpSplitter(Instruction *InsertionPoint, Value *Ptr)
: IRB(InsertionPoint), GEPIndices(1, IRB.getInt32(0)), Ptr(Ptr) {}
public:
void emitSplitOps(Type *Ty, Value *&Agg, const Twine &Name) {
if (Ty->isSingleValueType())
return static_cast<Derived *>(this)->emitFunc(Ty, Agg, Name);
if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
unsigned OldSize = Indices.size();
(void)OldSize;
for (unsigned Idx = 0, Size = ATy->getNumElements(); Idx != Size;
++Idx) {
assert(Indices.size() == OldSize && "Did not return to the old size");
Indices.push_back(Idx);
GEPIndices.push_back(IRB.getInt32(Idx));
emitSplitOps(ATy->getElementType(), Agg, Name + "." + Twine(Idx));
GEPIndices.pop_back();
Indices.pop_back();
}
return;
}
if (StructType *STy = dyn_cast<StructType>(Ty)) {
unsigned OldSize = Indices.size();
(void)OldSize;
for (unsigned Idx = 0, Size = STy->getNumElements(); Idx != Size;
++Idx) {
assert(Indices.size() == OldSize && "Did not return to the old size");
Indices.push_back(Idx);
GEPIndices.push_back(IRB.getInt32(Idx));
emitSplitOps(STy->getElementType(Idx), Agg, Name + "." + Twine(Idx));
GEPIndices.pop_back();
Indices.pop_back();
}
return;
}
llvm_unreachable("Only arrays and structs are aggregate loadable types");
}
};
struct LoadOpSplitter : public OpSplitter<LoadOpSplitter> {
LoadOpSplitter(Instruction *InsertionPoint, Value *Ptr)
: OpSplitter<LoadOpSplitter>(InsertionPoint, Ptr) {}
void emitFunc(Type *Ty, Value *&Agg, const Twine &Name) {
assert(Ty->isSingleValueType());
Value *GEP = IRB.CreateInBoundsGEP(Ptr, GEPIndices, Name + ".gep");
Value *Load = IRB.CreateLoad(GEP, Name + ".load");
Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert");
DEBUG(dbgs() << " to: " << *Load << "\n");
}
};
bool visitLoadInst(LoadInst &LI) {
assert(LI.getPointerOperand() == *U);
if (!LI.isSimple() || LI.getType()->isSingleValueType())
return false;
DEBUG(dbgs() << " original: " << LI << "\n");
LoadOpSplitter Splitter(&LI, *U);
Value *V = UndefValue::get(LI.getType());
Splitter.emitSplitOps(LI.getType(), V, LI.getName() + ".fca");
LI.replaceAllUsesWith(V);
LI.eraseFromParent();
return true;
}
struct StoreOpSplitter : public OpSplitter<StoreOpSplitter> {
StoreOpSplitter(Instruction *InsertionPoint, Value *Ptr)
: OpSplitter<StoreOpSplitter>(InsertionPoint, Ptr) {}
void emitFunc(Type *Ty, Value *&Agg, const Twine &Name) {
assert(Ty->isSingleValueType());
Value *Store = IRB.CreateStore(
IRB.CreateExtractValue(Agg, Indices, Name + ".extract"),
IRB.CreateInBoundsGEP(Ptr, GEPIndices, Name + ".gep"));
(void)Store;
DEBUG(dbgs() << " to: " << *Store << "\n");
}
};
bool visitStoreInst(StoreInst &SI) {
if (!SI.isSimple() || SI.getPointerOperand() != *U)
return false;
Value *V = SI.getValueOperand();
if (V->getType()->isSingleValueType())
return false;
DEBUG(dbgs() << " original: " << SI << "\n");
StoreOpSplitter Splitter(&SI, *U);
Splitter.emitSplitOps(V->getType(), V, V->getName() + ".fca");
SI.eraseFromParent();
return true;
}
bool visitBitCastInst(BitCastInst &BC) {
enqueueUsers(BC);
return false;
}
bool visitGetElementPtrInst(GetElementPtrInst &GEPI) {
enqueueUsers(GEPI);
return false;
}
bool visitPHINode(PHINode &PN) {
enqueueUsers(PN);
return false;
}
bool visitSelectInst(SelectInst &SI) {
enqueueUsers(SI);
return false;
}
};
}
static Type *stripAggregateTypeWrapping(const DataLayout &DL, Type *Ty) {
if (Ty->isSingleValueType())
return Ty;
uint64_t AllocSize = DL.getTypeAllocSize(Ty);
uint64_t TypeSize = DL.getTypeSizeInBits(Ty);
Type *InnerTy;
if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {
InnerTy = ArrTy->getElementType();
} else if (StructType *STy = dyn_cast<StructType>(Ty)) {
const StructLayout *SL = DL.getStructLayout(STy);
unsigned Index = SL->getElementContainingOffset(0);
InnerTy = STy->getElementType(Index);
} else {
return Ty;
}
if (AllocSize > DL.getTypeAllocSize(InnerTy) ||
TypeSize > DL.getTypeSizeInBits(InnerTy))
return Ty;
return stripAggregateTypeWrapping(DL, InnerTy);
}
static Type *getTypePartition(const DataLayout &DL, Type *Ty,
uint64_t Offset, uint64_t Size) {
if (Offset == 0 && DL.getTypeAllocSize(Ty) == Size)
return stripAggregateTypeWrapping(DL, Ty);
if (Offset > DL.getTypeAllocSize(Ty) ||
(DL.getTypeAllocSize(Ty) - Offset) < Size)
return nullptr;
if (SequentialType *SeqTy = dyn_cast<SequentialType>(Ty)) {
if (SeqTy->isPointerTy())
return nullptr;
Type *ElementTy = SeqTy->getElementType();
uint64_t ElementSize = DL.getTypeAllocSize(ElementTy);
uint64_t NumSkippedElements = Offset / ElementSize;
if (ArrayType *ArrTy = dyn_cast<ArrayType>(SeqTy)) {
if (NumSkippedElements >= ArrTy->getNumElements())
return nullptr;
} else if (VectorType *VecTy = dyn_cast<VectorType>(SeqTy)) {
if (NumSkippedElements >= VecTy->getNumElements())
return nullptr;
}
Offset -= NumSkippedElements * ElementSize;
if (Offset > 0 || Size < ElementSize) {
if ((Offset + Size) > ElementSize)
return nullptr;
return getTypePartition(DL, ElementTy, Offset, Size);
}
assert(Offset == 0);
if (Size == ElementSize)
return stripAggregateTypeWrapping(DL, ElementTy);
assert(Size > ElementSize);
uint64_t NumElements = Size / ElementSize;
if (NumElements * ElementSize != Size)
return nullptr;
return ArrayType::get(ElementTy, NumElements);
}
StructType *STy = dyn_cast<StructType>(Ty);
if (!STy)
return nullptr;
const StructLayout *SL = DL.getStructLayout(STy);
if (Offset >= SL->getSizeInBytes())
return nullptr;
uint64_t EndOffset = Offset + Size;
if (EndOffset > SL->getSizeInBytes())
return nullptr;
unsigned Index = SL->getElementContainingOffset(Offset);
Offset -= SL->getElementOffset(Index);
Type *ElementTy = STy->getElementType(Index);
uint64_t ElementSize = DL.getTypeAllocSize(ElementTy);
if (Offset >= ElementSize)
return nullptr;
if (Offset > 0 || Size < ElementSize) {
if ((Offset + Size) > ElementSize)
return nullptr;
return getTypePartition(DL, ElementTy, Offset, Size);
}
assert(Offset == 0);
if (Size == ElementSize)
return stripAggregateTypeWrapping(DL, ElementTy);
StructType::element_iterator EI = STy->element_begin() + Index,
EE = STy->element_end();
if (EndOffset < SL->getSizeInBytes()) {
unsigned EndIndex = SL->getElementContainingOffset(EndOffset);
if (Index == EndIndex)
return nullptr;
if (SL->getElementOffset(EndIndex) != EndOffset)
return nullptr;
assert(Index < EndIndex);
EE = STy->element_begin() + EndIndex;
}
StructType *SubTy = StructType::get(STy->getContext(), makeArrayRef(EI, EE),
STy->isPacked());
const StructLayout *SubSL = DL.getStructLayout(SubTy);
if (Size != SubSL->getSizeInBytes())
return nullptr;
return SubTy;
}
bool SROA::rewritePartition(AllocaInst &AI, AllocaSlices &S,
AllocaSlices::iterator B, AllocaSlices::iterator E,
int64_t BeginOffset, int64_t EndOffset,
ArrayRef<AllocaSlices::iterator> SplitUses) {
assert(BeginOffset < EndOffset);
uint64_t SliceSize = EndOffset - BeginOffset;
Type *SliceTy = nullptr;
if (Type *CommonUseTy = findCommonType(B, E, EndOffset))
if (DL->getTypeAllocSize(CommonUseTy) >= SliceSize)
SliceTy = CommonUseTy;
if (!SliceTy)
if (Type *TypePartitionTy = getTypePartition(*DL, AI.getAllocatedType(),
BeginOffset, SliceSize))
SliceTy = TypePartitionTy;
if ((!SliceTy || (SliceTy->isArrayTy() &&
SliceTy->getArrayElementType()->isIntegerTy())) &&
DL->isLegalInteger(SliceSize * 8))
SliceTy = Type::getIntNTy(*C, SliceSize * 8);
if (!SliceTy)
SliceTy = ArrayType::get(Type::getInt8Ty(*C), SliceSize);
assert(DL->getTypeAllocSize(SliceTy) >= SliceSize);
bool IsVectorPromotable = isVectorPromotionViable(
*DL, SliceTy, S, BeginOffset, EndOffset, B, E, SplitUses);
bool IsIntegerPromotable =
!IsVectorPromotable &&
isIntegerWideningViable(*DL, SliceTy, BeginOffset, S, B, E, SplitUses);
AllocaInst *NewAI;
if (SliceTy == AI.getAllocatedType()) {
assert(BeginOffset == 0 &&
"Non-zero begin offset but same alloca type");
NewAI = &AI;
} else {
unsigned Alignment = AI.getAlignment();
if (!Alignment) {
Alignment = DL->getABITypeAlignment(AI.getAllocatedType());
}
Alignment = MinAlign(Alignment, BeginOffset);
if (Alignment <= DL->getABITypeAlignment(SliceTy))
Alignment = 0;
NewAI = new AllocaInst(SliceTy, nullptr, Alignment,
AI.getName() + ".sroa." + Twine(B - S.begin()), &AI);
++NumNewAllocas;
}
DEBUG(dbgs() << "Rewriting alloca partition "
<< "[" << BeginOffset << "," << EndOffset << ") to: " << *NewAI
<< "\n");
unsigned PPWOldSize = PostPromotionWorklist.size();
unsigned NumUses = 0;
SmallPtrSet<PHINode *, 8> PHIUsers;
SmallPtrSet<SelectInst *, 8> SelectUsers;
AllocaSliceRewriter Rewriter(*DL, S, *this, AI, *NewAI, BeginOffset,
EndOffset, IsVectorPromotable,
IsIntegerPromotable, PHIUsers, SelectUsers);
bool Promotable = true;
for (ArrayRef<AllocaSlices::iterator>::const_iterator SUI = SplitUses.begin(),
SUE = SplitUses.end();
SUI != SUE; ++SUI) {
DEBUG(dbgs() << " rewriting split ");
DEBUG(S.printSlice(dbgs(), *SUI, ""));
Promotable &= Rewriter.visit(*SUI);
++NumUses;
}
for (AllocaSlices::iterator I = B; I != E; ++I) {
DEBUG(dbgs() << " rewriting ");
DEBUG(S.printSlice(dbgs(), I, ""));
Promotable &= Rewriter.visit(I);
++NumUses;
}
NumAllocaPartitionUses += NumUses;
MaxUsesPerAllocaPartition =
std::max<unsigned>(NumUses, MaxUsesPerAllocaPartition);
for (SmallPtrSetImpl<PHINode *>::iterator I = PHIUsers.begin(),
E = PHIUsers.end();
I != E; ++I)
if (!isSafePHIToSpeculate(**I, DL)) {
Promotable = false;
PHIUsers.clear();
SelectUsers.clear();
break;
}
for (SmallPtrSetImpl<SelectInst *>::iterator I = SelectUsers.begin(),
E = SelectUsers.end();
I != E; ++I)
if (!isSafeSelectToSpeculate(**I, DL)) {
Promotable = false;
PHIUsers.clear();
SelectUsers.clear();
break;
}
if (Promotable) {
if (PHIUsers.empty() && SelectUsers.empty()) {
PromotableAllocas.push_back(NewAI);
} else {
for (SmallPtrSetImpl<PHINode *>::iterator I = PHIUsers.begin(),
E = PHIUsers.end();
I != E; ++I)
SpeculatablePHIs.insert(*I);
for (SmallPtrSetImpl<SelectInst *>::iterator I = SelectUsers.begin(),
E = SelectUsers.end();
I != E; ++I)
SpeculatableSelects.insert(*I);
Worklist.insert(NewAI);
}
} else {
if (NewAI != &AI)
Worklist.insert(NewAI);
while (PostPromotionWorklist.size() > PPWOldSize)
PostPromotionWorklist.pop_back();
}
return true;
}
static void
removeFinishedSplitUses(SmallVectorImpl<AllocaSlices::iterator> &SplitUses,
uint64_t &MaxSplitUseEndOffset, uint64_t Offset) {
if (Offset >= MaxSplitUseEndOffset) {
SplitUses.clear();
MaxSplitUseEndOffset = 0;
return;
}
size_t SplitUsesOldSize = SplitUses.size();
SplitUses.erase(std::remove_if(SplitUses.begin(), SplitUses.end(),
[Offset](const AllocaSlices::iterator &I) {
return I->endOffset() <= Offset;
}),
SplitUses.end());
if (SplitUsesOldSize == SplitUses.size())
return;
MaxSplitUseEndOffset = 0;
for (SmallVectorImpl<AllocaSlices::iterator>::iterator
SUI = SplitUses.begin(),
SUE = SplitUses.end();
SUI != SUE; ++SUI)
MaxSplitUseEndOffset = std::max((*SUI)->endOffset(), MaxSplitUseEndOffset);
}
bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &S) {
if (S.begin() == S.end())
return false;
unsigned NumPartitions = 0;
bool Changed = false;
SmallVector<AllocaSlices::iterator, 4> SplitUses;
uint64_t MaxSplitUseEndOffset = 0;
uint64_t BeginOffset = S.begin()->beginOffset();
for (AllocaSlices::iterator SI = S.begin(), SJ = std::next(SI), SE = S.end();
SI != SE; SI = SJ) {
uint64_t MaxEndOffset = SI->endOffset();
if (!SI->isSplittable()) {
assert(BeginOffset == SI->beginOffset());
while (SJ != SE && SJ->beginOffset() < MaxEndOffset) {
if (!SJ->isSplittable())
MaxEndOffset = std::max(MaxEndOffset, SJ->endOffset());
++SJ;
}
} else {
assert(SI->isSplittable());
while (SJ != SE && SJ->beginOffset() < MaxEndOffset &&
SJ->isSplittable()) {
MaxEndOffset = std::max(MaxEndOffset, SJ->endOffset());
++SJ;
}
if (SJ != SE && SJ->beginOffset() < MaxEndOffset) {
assert(!SJ->isSplittable());
MaxEndOffset = SJ->beginOffset();
}
}
if (BeginOffset < MaxEndOffset) {
Changed |=
rewritePartition(AI, S, SI, SJ, BeginOffset, MaxEndOffset, SplitUses);
++NumPartitions;
removeFinishedSplitUses(SplitUses, MaxSplitUseEndOffset, MaxEndOffset);
}
for (AllocaSlices::iterator SK = SI; SK != SJ; ++SK)
if (SK->isSplittable() && SK->endOffset() > MaxEndOffset) {
SplitUses.push_back(SK);
MaxSplitUseEndOffset = std::max(SK->endOffset(), MaxSplitUseEndOffset);
}
if (SJ == SE && SplitUses.empty())
break;
if (SplitUses.empty() || (SJ != SE && MaxEndOffset == SJ->beginOffset())) {
BeginOffset = SJ->beginOffset();
continue;
}
if (SJ != SE && SJ->isSplittable() &&
MaxSplitUseEndOffset > SJ->beginOffset()) {
BeginOffset = MaxEndOffset;
continue;
}
uint64_t PostSplitEndOffset =
SJ == SE ? MaxSplitUseEndOffset : SJ->beginOffset();
Changed |= rewritePartition(AI, S, SJ, SJ, MaxEndOffset, PostSplitEndOffset,
SplitUses);
++NumPartitions;
if (SJ == SE)
break;
removeFinishedSplitUses(SplitUses, MaxSplitUseEndOffset,
PostSplitEndOffset);
BeginOffset = SJ->beginOffset();
}
NumAllocaPartitions += NumPartitions;
MaxPartitionsPerAlloca =
std::max<unsigned>(NumPartitions, MaxPartitionsPerAlloca);
return Changed;
}
void SROA::clobberUse(Use &U) {
Value *OldV = U;
U = UndefValue::get(OldV->getType());
if (Instruction *OldI = dyn_cast<Instruction>(OldV))
if (isInstructionTriviallyDead(OldI)) {
DeadInsts.insert(OldI);
}
}
bool SROA::runOnAlloca(AllocaInst &AI) {
DEBUG(dbgs() << "SROA alloca: " << AI << "\n");
++NumAllocasAnalyzed;
if (AI.use_empty()) {
AI.eraseFromParent();
return true;
}
if (AI.isArrayAllocation() || !AI.getAllocatedType()->isSized() ||
DL->getTypeAllocSize(AI.getAllocatedType()) == 0)
return false;
bool Changed = false;
AggLoadStoreRewriter AggRewriter(*DL);
Changed |= AggRewriter.rewrite(AI);
AllocaSlices S(*DL, AI);
DEBUG(S.print(dbgs()));
if (S.isEscaped())
return Changed;
for (AllocaSlices::dead_user_iterator DI = S.dead_user_begin(),
DE = S.dead_user_end();
DI != DE; ++DI) {
for (Use &DeadOp : (*DI)->operands())
clobberUse(DeadOp);
(*DI)->replaceAllUsesWith(UndefValue::get((*DI)->getType()));
DeadInsts.insert(*DI);
Changed = true;
}
for (AllocaSlices::dead_op_iterator DO = S.dead_op_begin(),
DE = S.dead_op_end();
DO != DE; ++DO) {
clobberUse(**DO);
Changed = true;
}
if (S.begin() == S.end())
return Changed;
Changed |= splitAlloca(AI, S);
DEBUG(dbgs() << " Speculating PHIs\n");
while (!SpeculatablePHIs.empty())
speculatePHINodeLoads(*SpeculatablePHIs.pop_back_val());
DEBUG(dbgs() << " Speculating Selects\n");
while (!SpeculatableSelects.empty())
speculateSelectInstLoads(*SpeculatableSelects.pop_back_val());
return Changed;
}
void SROA::deleteDeadInstructions(SmallPtrSet<AllocaInst*, 4> &DeletedAllocas) {
while (!DeadInsts.empty()) {
Instruction *I = DeadInsts.pop_back_val();
DEBUG(dbgs() << "Deleting dead instruction: " << *I << "\n");
I->replaceAllUsesWith(UndefValue::get(I->getType()));
for (Use &Operand : I->operands())
if (Instruction *U = dyn_cast<Instruction>(Operand)) {
Operand = nullptr;
if (isInstructionTriviallyDead(U))
DeadInsts.insert(U);
}
if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
DeletedAllocas.insert(AI);
++NumDeleted;
I->eraseFromParent();
}
}
static void enqueueUsersInWorklist(Instruction &I,
SmallVectorImpl<Instruction *> &Worklist,
SmallPtrSet<Instruction *, 8> &Visited) {
for (User *U : I.users())
if (Visited.insert(cast<Instruction>(U)))
Worklist.push_back(cast<Instruction>(U));
}
bool SROA::promoteAllocas(Function &F) {
if (PromotableAllocas.empty())
return false;
NumPromoted += PromotableAllocas.size();
if (DT && !ForceSSAUpdater) {
DEBUG(dbgs() << "Promoting allocas with mem2reg...\n");
PromoteMemToReg(PromotableAllocas, *DT);
PromotableAllocas.clear();
return true;
}
DEBUG(dbgs() << "Promoting allocas with SSAUpdater...\n");
SSAUpdater SSA;
DIBuilder DIB(*F.getParent());
SmallVector<Instruction *, 64> Insts;
SmallVector<Instruction *, 8> Worklist;
SmallPtrSet<Instruction *, 8> Visited;
SmallVector<Instruction *, 32> DeadInsts;
for (unsigned Idx = 0, Size = PromotableAllocas.size(); Idx != Size; ++Idx) {
AllocaInst *AI = PromotableAllocas[Idx];
Insts.clear();
Worklist.clear();
Visited.clear();
enqueueUsersInWorklist(*AI, Worklist, Visited);
while (!Worklist.empty()) {
Instruction *I = Worklist.pop_back_val();
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
assert(II->getIntrinsicID() == Intrinsic::lifetime_start ||
II->getIntrinsicID() == Intrinsic::lifetime_end);
II->eraseFromParent();
continue;
}
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
assert(LI->getType() == AI->getAllocatedType());
Insts.push_back(LI);
continue;
}
if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
assert(SI->getValueOperand()->getType() == AI->getAllocatedType());
Insts.push_back(SI);
continue;
}
DeadInsts.push_back(I);
enqueueUsersInWorklist(*I, Worklist, Visited);
}
AllocaPromoter(Insts, SSA, *AI, DIB).run(Insts);
while (!DeadInsts.empty())
DeadInsts.pop_back_val()->eraseFromParent();
AI->eraseFromParent();
}
PromotableAllocas.clear();
return true;
}
bool SROA::runOnFunction(Function &F) {
if (skipOptnoneFunction(F))
return false;
DEBUG(dbgs() << "SROA function: " << F.getName() << "\n");
C = &F.getContext();
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
if (!DLP) {
DEBUG(dbgs() << " Skipping SROA -- no target data!\n");
return false;
}
DL = &DLP->getDataLayout();
DominatorTreeWrapperPass *DTWP =
getAnalysisIfAvailable<DominatorTreeWrapperPass>();
DT = DTWP ? &DTWP->getDomTree() : nullptr;
BasicBlock &EntryBB = F.getEntryBlock();
for (BasicBlock::iterator I = EntryBB.begin(), E = std::prev(EntryBB.end());
I != E; ++I)
if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
Worklist.insert(AI);
bool Changed = false;
SmallPtrSet<AllocaInst *, 4> DeletedAllocas;
do {
while (!Worklist.empty()) {
Changed |= runOnAlloca(*Worklist.pop_back_val());
deleteDeadInstructions(DeletedAllocas);
if (!DeletedAllocas.empty()) {
auto IsInSet = [&](AllocaInst *AI) {
return DeletedAllocas.count(AI);
};
Worklist.remove_if(IsInSet);
PostPromotionWorklist.remove_if(IsInSet);
PromotableAllocas.erase(std::remove_if(PromotableAllocas.begin(),
PromotableAllocas.end(),
IsInSet),
PromotableAllocas.end());
DeletedAllocas.clear();
}
}
Changed |= promoteAllocas(F);
Worklist = PostPromotionWorklist;
PostPromotionWorklist.clear();
} while (!Worklist.empty());
return Changed;
}
void SROA::getAnalysisUsage(AnalysisUsage &AU) const {
if (RequiresDomTree)
AU.addRequired<DominatorTreeWrapperPass>();
AU.setPreservesCFG();
}