#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/PostDominators.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/ProfileData/SampleProfReader.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include <cctype>
using namespace llvm;
using namespace sampleprof;
#define DEBUG_TYPE "sample-profile"
static cl::opt<std::string> SampleProfileFile(
"sample-profile-file", cl::init(""), cl::value_desc("filename"),
cl::desc("Profile file loaded by -sample-profile"), cl::Hidden);
static cl::opt<unsigned> SampleProfileMaxPropagateIterations(
"sample-profile-max-propagate-iterations", cl::init(100),
cl::desc("Maximum number of iterations to go through when propagating "
"sample block/edge weights through the CFG."));
static cl::opt<unsigned> SampleProfileCoverage(
"sample-profile-check-coverage", cl::init(0), cl::value_desc("N"),
cl::desc("Emit a warning if less than N% of samples in the input profile "
"are matched to the IR."));
namespace {
typedef DenseMap<const BasicBlock *, uint64_t> BlockWeightMap;
typedef DenseMap<const BasicBlock *, const BasicBlock *> EquivalenceClassMap;
typedef std::pair<const BasicBlock *, const BasicBlock *> Edge;
typedef DenseMap<Edge, uint64_t> EdgeWeightMap;
typedef DenseMap<const BasicBlock *, SmallVector<const BasicBlock *, 8>>
BlockEdgeMap;
class SampleProfileLoader : public ModulePass {
public:
static char ID;
SampleProfileLoader(StringRef Name = SampleProfileFile)
: ModulePass(ID), DT(nullptr), PDT(nullptr), LI(nullptr), Reader(),
Samples(nullptr), Filename(Name), ProfileIsValid(false) {
initializeSampleProfileLoaderPass(*PassRegistry::getPassRegistry());
}
bool doInitialization(Module &M) override;
void dump() { Reader->dump(); }
const char *getPassName() const override { return "Sample profile pass"; }
bool runOnModule(Module &M) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
}
protected:
bool runOnFunction(Function &F);
unsigned getFunctionLoc(Function &F);
bool emitAnnotations(Function &F);
ErrorOr<uint64_t> getInstWeight(const Instruction &I) const;
ErrorOr<uint64_t> getBlockWeight(const BasicBlock *BB) const;
const FunctionSamples *findCalleeFunctionSamples(const CallInst &I) const;
const FunctionSamples *findFunctionSamples(const Instruction &I) const;
bool inlineHotFunctions(Function &F);
void printEdgeWeight(raw_ostream &OS, Edge E);
void printBlockWeight(raw_ostream &OS, const BasicBlock *BB) const;
void printBlockEquivalence(raw_ostream &OS, const BasicBlock *BB);
bool computeBlockWeights(Function &F);
void findEquivalenceClasses(Function &F);
void findEquivalencesFor(BasicBlock *BB1,
SmallVector<BasicBlock *, 8> Descendants,
DominatorTreeBase<BasicBlock> *DomTree);
void propagateWeights(Function &F);
uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge);
void buildEdges(Function &F);
bool propagateThroughEdges(Function &F);
void computeDominanceAndLoopInfo(Function &F);
unsigned getOffset(unsigned L, unsigned H) const;
void clearFunctionData();
BlockWeightMap BlockWeights;
EdgeWeightMap EdgeWeights;
SmallPtrSet<const BasicBlock *, 128> VisitedBlocks;
SmallSet<Edge, 128> VisitedEdges;
EquivalenceClassMap EquivalenceClass;
std::unique_ptr<DominatorTree> DT;
std::unique_ptr<DominatorTreeBase<BasicBlock>> PDT;
std::unique_ptr<LoopInfo> LI;
BlockEdgeMap Predecessors;
BlockEdgeMap Successors;
std::unique_ptr<SampleProfileReader> Reader;
FunctionSamples *Samples;
StringRef Filename;
bool ProfileIsValid;
};
class SampleCoverageTracker {
public:
SampleCoverageTracker() : SampleCoverage() {}
bool markSamplesUsed(const FunctionSamples *Samples, uint32_t LineOffset,
uint32_t Discriminator);
unsigned computeCoverage(unsigned Used, unsigned Total) const;
unsigned countUsedSamples(const FunctionSamples *Samples) const;
unsigned countBodySamples(const FunctionSamples *Samples) const;
private:
typedef DenseMap<LineLocation, unsigned> BodySampleCoverageMap;
typedef DenseMap<const FunctionSamples *, BodySampleCoverageMap>
FunctionSamplesCoverageMap;
FunctionSamplesCoverageMap SampleCoverage;
};
SampleCoverageTracker CoverageTracker;
}
bool SampleCoverageTracker::markSamplesUsed(const FunctionSamples *Samples,
uint32_t LineOffset,
uint32_t Discriminator) {
LineLocation Loc(LineOffset, Discriminator);
unsigned &Count = SampleCoverage[Samples][Loc];
return ++Count == 1;
}
unsigned
SampleCoverageTracker::countUsedSamples(const FunctionSamples *Samples) const {
auto I = SampleCoverage.find(Samples);
unsigned Count = (I != SampleCoverage.end()) ? I->second.size() : 0;
for (const auto &I : Samples->getCallsiteSamples())
Count += countUsedSamples(&I.second);
return Count;
}
unsigned
SampleCoverageTracker::countBodySamples(const FunctionSamples *Samples) const {
unsigned Count = Samples->getBodySamples().size();
for (const auto &I : Samples->getCallsiteSamples())
Count += countBodySamples(&I.second);
return Count;
}
unsigned SampleCoverageTracker::computeCoverage(unsigned Used,
unsigned Total) const {
assert(Used <= Total &&
"number of used records cannot exceed the total number of records");
return Total > 0 ? Used * 100 / Total : 100;
}
void SampleProfileLoader::clearFunctionData() {
BlockWeights.clear();
EdgeWeights.clear();
VisitedBlocks.clear();
VisitedEdges.clear();
EquivalenceClass.clear();
DT = nullptr;
PDT = nullptr;
LI = nullptr;
Predecessors.clear();
Successors.clear();
}
unsigned SampleProfileLoader::getOffset(unsigned L, unsigned H) const {
return (L - H) & 0xffff;
}
void SampleProfileLoader::printEdgeWeight(raw_ostream &OS, Edge E) {
OS << "weight[" << E.first->getName() << "->" << E.second->getName()
<< "]: " << EdgeWeights[E] << "\n";
}
void SampleProfileLoader::printBlockEquivalence(raw_ostream &OS,
const BasicBlock *BB) {
const BasicBlock *Equiv = EquivalenceClass[BB];
OS << "equivalence[" << BB->getName()
<< "]: " << ((Equiv) ? EquivalenceClass[BB]->getName() : "NONE") << "\n";
}
void SampleProfileLoader::printBlockWeight(raw_ostream &OS,
const BasicBlock *BB) const {
const auto &I = BlockWeights.find(BB);
uint64_t W = (I == BlockWeights.end() ? 0 : I->second);
OS << "weight[" << BB->getName() << "]: " << W << "\n";
}
ErrorOr<uint64_t>
SampleProfileLoader::getInstWeight(const Instruction &Inst) const {
DebugLoc DLoc = Inst.getDebugLoc();
if (!DLoc)
return std::error_code();
const FunctionSamples *FS = findFunctionSamples(Inst);
if (!FS)
return std::error_code();
const DILocation *DIL = DLoc;
unsigned Lineno = DLoc.getLine();
unsigned HeaderLineno = DIL->getScope()->getSubprogram()->getLine();
uint32_t LineOffset = getOffset(Lineno, HeaderLineno);
uint32_t Discriminator = DIL->getDiscriminator();
ErrorOr<uint64_t> R = FS->findSamplesAt(LineOffset, Discriminator);
if (R) {
bool FirstMark =
CoverageTracker.markSamplesUsed(FS, LineOffset, Discriminator);
if (FirstMark) {
const Function *F = Inst.getParent()->getParent();
LLVMContext &Ctx = F->getContext();
emitOptimizationRemark(Ctx, DEBUG_TYPE, *F, DLoc,
Twine("Applied ") + Twine(*R) +
" samples from profile");
}
DEBUG(dbgs() << " " << Lineno << "." << DIL->getDiscriminator() << ":"
<< Inst << " (line offset: " << Lineno - HeaderLineno << "."
<< DIL->getDiscriminator() << " - weight: " << R.get()
<< ")\n");
}
return R;
}
ErrorOr<uint64_t>
SampleProfileLoader::getBlockWeight(const BasicBlock *BB) const {
bool Found = false;
uint64_t Weight = 0;
for (auto &I : BB->getInstList()) {
const ErrorOr<uint64_t> &R = getInstWeight(I);
if (R && R.get() >= Weight) {
Weight = R.get();
Found = true;
}
}
if (Found)
return Weight;
else
return std::error_code();
}
bool SampleProfileLoader::computeBlockWeights(Function &F) {
bool Changed = false;
DEBUG(dbgs() << "Block weights\n");
for (const auto &BB : F) {
ErrorOr<uint64_t> Weight = getBlockWeight(&BB);
if (Weight) {
BlockWeights[&BB] = Weight.get();
VisitedBlocks.insert(&BB);
Changed = true;
}
DEBUG(printBlockWeight(dbgs(), &BB));
}
return Changed;
}
const FunctionSamples *
SampleProfileLoader::findCalleeFunctionSamples(const CallInst &Inst) const {
const DILocation *DIL = Inst.getDebugLoc();
if (!DIL) {
return nullptr;
}
DISubprogram *SP = DIL->getScope()->getSubprogram();
if (!SP)
return nullptr;
Function *CalleeFunc = Inst.getCalledFunction();
if (!CalleeFunc) {
return nullptr;
}
StringRef CalleeName = CalleeFunc->getName();
const FunctionSamples *FS = findFunctionSamples(Inst);
if (FS == nullptr)
return nullptr;
return FS->findFunctionSamplesAt(
CallsiteLocation(getOffset(DIL->getLine(), SP->getLine()),
DIL->getDiscriminator(), CalleeName));
}
const FunctionSamples *
SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
SmallVector<CallsiteLocation, 10> S;
const DILocation *DIL = Inst.getDebugLoc();
if (!DIL) {
return Samples;
}
StringRef CalleeName;
for (const DILocation *DIL = Inst.getDebugLoc(); DIL;
DIL = DIL->getInlinedAt()) {
DISubprogram *SP = DIL->getScope()->getSubprogram();
if (!SP)
return nullptr;
if (!CalleeName.empty()) {
S.push_back(CallsiteLocation(getOffset(DIL->getLine(), SP->getLine()),
DIL->getDiscriminator(), CalleeName));
}
CalleeName = SP->getLinkageName();
}
if (S.size() == 0)
return Samples;
const FunctionSamples *FS = Samples;
for (int i = S.size() - 1; i >= 0 && FS != nullptr; i--) {
FS = FS->findFunctionSamplesAt(S[i]);
}
return FS;
}
bool SampleProfileLoader::inlineHotFunctions(Function &F) {
bool Changed = false;
LLVMContext &Ctx = F.getContext();
while (true) {
bool LocalChanged = false;
SmallVector<CallInst *, 10> CIS;
for (auto &BB : F) {
for (auto &I : BB.getInstList()) {
CallInst *CI = dyn_cast<CallInst>(&I);
if (CI) {
const FunctionSamples *FS = findCalleeFunctionSamples(*CI);
if (FS && FS->getTotalSamples() > 0) {
CIS.push_back(CI);
}
}
}
}
for (auto CI : CIS) {
InlineFunctionInfo IFI;
Function *CalledFunction = CI->getCalledFunction();
DebugLoc DLoc = CI->getDebugLoc();
uint64_t NumSamples = findCalleeFunctionSamples(*CI)->getTotalSamples();
if (InlineFunction(CI, IFI)) {
LocalChanged = true;
emitOptimizationRemark(Ctx, DEBUG_TYPE, F, DLoc,
Twine("inlined hot callee '") +
CalledFunction->getName() + "' with " +
Twine(NumSamples) + " samples into '" +
F.getName() + "'");
}
}
if (LocalChanged) {
Changed = true;
} else {
break;
}
}
return Changed;
}
void SampleProfileLoader::findEquivalencesFor(
BasicBlock *BB1, SmallVector<BasicBlock *, 8> Descendants,
DominatorTreeBase<BasicBlock> *DomTree) {
const BasicBlock *EC = EquivalenceClass[BB1];
uint64_t Weight = BlockWeights[EC];
for (const auto *BB2 : Descendants) {
bool IsDomParent = DomTree->dominates(BB2, BB1);
bool IsInSameLoop = LI->getLoopFor(BB1) == LI->getLoopFor(BB2);
if (BB1 != BB2 && IsDomParent && IsInSameLoop) {
EquivalenceClass[BB2] = EC;
Weight = std::max(Weight, BlockWeights[BB2]);
}
}
BlockWeights[EC] = Weight;
}
void SampleProfileLoader::findEquivalenceClasses(Function &F) {
SmallVector<BasicBlock *, 8> DominatedBBs;
DEBUG(dbgs() << "\nBlock equivalence classes\n");
for (auto &BB : F) {
BasicBlock *BB1 = &BB;
if (EquivalenceClass.count(BB1)) {
DEBUG(printBlockEquivalence(dbgs(), BB1));
continue;
}
EquivalenceClass[BB1] = BB1;
DominatedBBs.clear();
DT->getDescendants(BB1, DominatedBBs);
findEquivalencesFor(BB1, DominatedBBs, PDT.get());
DEBUG(printBlockEquivalence(dbgs(), BB1));
}
DEBUG(dbgs() << "\nAssign the same weight to all blocks in the same class\n");
for (auto &BI : F) {
const BasicBlock *BB = &BI;
const BasicBlock *EquivBB = EquivalenceClass[BB];
if (BB != EquivBB)
BlockWeights[BB] = BlockWeights[EquivBB];
DEBUG(printBlockWeight(dbgs(), BB));
}
}
uint64_t SampleProfileLoader::visitEdge(Edge E, unsigned *NumUnknownEdges,
Edge *UnknownEdge) {
if (!VisitedEdges.count(E)) {
(*NumUnknownEdges)++;
*UnknownEdge = E;
return 0;
}
return EdgeWeights[E];
}
bool SampleProfileLoader::propagateThroughEdges(Function &F) {
bool Changed = false;
DEBUG(dbgs() << "\nPropagation through edges\n");
for (const auto &BI : F) {
const BasicBlock *BB = &BI;
const BasicBlock *EC = EquivalenceClass[BB];
for (unsigned i = 0; i < 2; i++) {
uint64_t TotalWeight = 0;
unsigned NumUnknownEdges = 0;
Edge UnknownEdge, SelfReferentialEdge;
if (i == 0) {
for (auto *Pred : Predecessors[BB]) {
Edge E = std::make_pair(Pred, BB);
TotalWeight += visitEdge(E, &NumUnknownEdges, &UnknownEdge);
if (E.first == E.second)
SelfReferentialEdge = E;
}
} else {
for (auto *Succ : Successors[BB]) {
Edge E = std::make_pair(BB, Succ);
TotalWeight += visitEdge(E, &NumUnknownEdges, &UnknownEdge);
}
}
if (NumUnknownEdges <= 1) {
uint64_t &BBWeight = BlockWeights[EC];
if (NumUnknownEdges == 0) {
if (TotalWeight > BBWeight) {
BBWeight = TotalWeight;
Changed = true;
DEBUG(dbgs() << "All edge weights for " << BB->getName()
<< " known. Set weight for block: ";
printBlockWeight(dbgs(), BB););
}
if (VisitedBlocks.insert(EC).second)
Changed = true;
} else if (NumUnknownEdges == 1 && VisitedBlocks.count(EC)) {
if (BBWeight >= TotalWeight)
EdgeWeights[UnknownEdge] = BBWeight - TotalWeight;
else
EdgeWeights[UnknownEdge] = 0;
VisitedEdges.insert(UnknownEdge);
Changed = true;
DEBUG(dbgs() << "Set weight for edge: ";
printEdgeWeight(dbgs(), UnknownEdge));
}
} else if (SelfReferentialEdge.first && VisitedBlocks.count(EC)) {
uint64_t &BBWeight = BlockWeights[BB];
if (BBWeight >= TotalWeight)
EdgeWeights[SelfReferentialEdge] = BBWeight - TotalWeight;
else
EdgeWeights[SelfReferentialEdge] = 0;
VisitedEdges.insert(SelfReferentialEdge);
Changed = true;
DEBUG(dbgs() << "Set self-referential edge weight to: ";
printEdgeWeight(dbgs(), SelfReferentialEdge));
}
}
}
return Changed;
}
void SampleProfileLoader::buildEdges(Function &F) {
for (auto &BI : F) {
BasicBlock *B1 = &BI;
SmallPtrSet<BasicBlock *, 16> Visited;
if (!Predecessors[B1].empty())
llvm_unreachable("Found a stale predecessors list in a basic block.");
for (pred_iterator PI = pred_begin(B1), PE = pred_end(B1); PI != PE; ++PI) {
BasicBlock *B2 = *PI;
if (Visited.insert(B2).second)
Predecessors[B1].push_back(B2);
}
Visited.clear();
if (!Successors[B1].empty())
llvm_unreachable("Found a stale successors list in a basic block.");
for (succ_iterator SI = succ_begin(B1), SE = succ_end(B1); SI != SE; ++SI) {
BasicBlock *B2 = *SI;
if (Visited.insert(B2).second)
Successors[B1].push_back(B2);
}
}
}
void SampleProfileLoader::propagateWeights(Function &F) {
bool Changed = true;
unsigned I = 0;
F.setEntryCount(Samples->getHeadSamples());
buildEdges(F);
while (Changed && I++ < SampleProfileMaxPropagateIterations) {
Changed = propagateThroughEdges(F);
}
DEBUG(dbgs() << "\nPropagation complete. Setting branch weights\n");
LLVMContext &Ctx = F.getContext();
MDBuilder MDB(Ctx);
for (auto &BI : F) {
BasicBlock *BB = &BI;
TerminatorInst *TI = BB->getTerminator();
if (TI->getNumSuccessors() == 1)
continue;
if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI))
continue;
DEBUG(dbgs() << "\nGetting weights for branch at line "
<< TI->getDebugLoc().getLine() << ".\n");
SmallVector<uint32_t, 4> Weights;
uint32_t MaxWeight = 0;
DebugLoc MaxDestLoc;
for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) {
BasicBlock *Succ = TI->getSuccessor(I);
Edge E = std::make_pair(BB, Succ);
uint64_t Weight = EdgeWeights[E];
DEBUG(dbgs() << "\t"; printEdgeWeight(dbgs(), E));
if (Weight > std::numeric_limits<uint32_t>::max()) {
DEBUG(dbgs() << " (saturated due to uint32_t overflow)");
Weight = std::numeric_limits<uint32_t>::max();
}
Weights.push_back(static_cast<uint32_t>(Weight));
if (Weight != 0) {
if (Weight > MaxWeight) {
MaxWeight = Weight;
MaxDestLoc = Succ->getFirstNonPHIOrDbgOrLifetime()->getDebugLoc();
}
}
}
if (MaxWeight > 0) {
DEBUG(dbgs() << "SUCCESS. Found non-zero weights.\n");
TI->setMetadata(llvm::LLVMContext::MD_prof,
MDB.createBranchWeights(Weights));
DebugLoc BranchLoc = TI->getDebugLoc();
emitOptimizationRemark(
Ctx, DEBUG_TYPE, F, MaxDestLoc,
Twine("most popular destination for conditional branches at ") +
((BranchLoc) ? Twine(BranchLoc->getFilename() + ":" +
Twine(BranchLoc.getLine()) + ":" +
Twine(BranchLoc.getCol()))
: Twine("<UNKNOWN LOCATION>")));
} else {
DEBUG(dbgs() << "SKIPPED. All branch weights are zero.\n");
}
}
}
unsigned SampleProfileLoader::getFunctionLoc(Function &F) {
if (DISubprogram *S = getDISubprogram(&F))
return S->getLine();
F.getContext().diagnose(DiagnosticInfoSampleProfile(
"No debug information found in function " + F.getName() +
": Function profile not used",
DS_Warning));
return 0;
}
void SampleProfileLoader::computeDominanceAndLoopInfo(Function &F) {
DT.reset(new DominatorTree);
DT->recalculate(F);
PDT.reset(new DominatorTreeBase<BasicBlock>(true));
PDT->recalculate(F);
LI.reset(new LoopInfo);
LI->analyze(*DT);
}
bool SampleProfileLoader::emitAnnotations(Function &F) {
bool Changed = false;
if (getFunctionLoc(F) == 0)
return false;
DEBUG(dbgs() << "Line number for the first instruction in " << F.getName()
<< ": " << getFunctionLoc(F) << "\n");
Changed |= inlineHotFunctions(F);
Changed |= computeBlockWeights(F);
if (Changed) {
computeDominanceAndLoopInfo(F);
findEquivalenceClasses(F);
propagateWeights(F);
}
if (SampleProfileCoverage) {
unsigned Used = CoverageTracker.countUsedSamples(Samples);
unsigned Total = CoverageTracker.countBodySamples(Samples);
unsigned Coverage = CoverageTracker.computeCoverage(Used, Total);
if (Coverage < SampleProfileCoverage) {
F.getContext().diagnose(DiagnosticInfoSampleProfile(
getDISubprogram(&F)->getFilename(), getFunctionLoc(F),
Twine(Used) + " of " + Twine(Total) + " available profile records (" +
Twine(Coverage) + "%) were applied",
DS_Warning));
}
}
return Changed;
}
char SampleProfileLoader::ID = 0;
INITIALIZE_PASS_BEGIN(SampleProfileLoader, "sample-profile",
"Sample Profile loader", false, false)
INITIALIZE_PASS_DEPENDENCY(AddDiscriminators)
INITIALIZE_PASS_END(SampleProfileLoader, "sample-profile",
"Sample Profile loader", false, false)
bool SampleProfileLoader::doInitialization(Module &M) {
auto &Ctx = M.getContext();
auto ReaderOrErr = SampleProfileReader::create(Filename, Ctx);
if (std::error_code EC = ReaderOrErr.getError()) {
std::string Msg = "Could not open profile: " + EC.message();
Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
return false;
}
Reader = std::move(ReaderOrErr.get());
ProfileIsValid = (Reader->read() == sampleprof_error::success);
return true;
}
ModulePass *llvm::createSampleProfileLoaderPass() {
return new SampleProfileLoader(SampleProfileFile);
}
ModulePass *llvm::createSampleProfileLoaderPass(StringRef Name) {
return new SampleProfileLoader(Name);
}
bool SampleProfileLoader::runOnModule(Module &M) {
if (!ProfileIsValid)
return false;
bool retval = false;
for (auto &F : M)
if (!F.isDeclaration()) {
clearFunctionData();
retval |= runOnFunction(F);
}
return retval;
}
bool SampleProfileLoader::runOnFunction(Function &F) {
Samples = Reader->getSamplesFor(F);
if (!Samples->empty())
return emitAnnotations(F);
return false;
}