ScheduleDAGRRList.cpp [plain text]
#define DEBUG_TYPE "pre-RA-sched"
#include "ScheduleDAGSDNodes.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/ADT/PriorityQueue.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <climits>
using namespace llvm;
STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
STATISTIC(NumUnfolds, "Number of nodes unfolded");
STATISTIC(NumDups, "Number of duplicated nodes");
STATISTIC(NumPRCopies, "Number of physical register copies");
static RegisterScheduler
burrListDAGScheduler("list-burr",
"Bottom-up register reduction list scheduling",
createBURRListDAGScheduler);
static RegisterScheduler
tdrListrDAGScheduler("list-tdrr",
"Top-down register reduction list scheduling",
createTDRRListDAGScheduler);
static RegisterScheduler
sourceListDAGScheduler("source",
"Similar to list-burr but schedules in source "
"order when possible",
createSourceListDAGScheduler);
namespace {
class ScheduleDAGRRList : public ScheduleDAGSDNodes {
private:
bool isBottomUp;
SchedulingPriorityQueue *AvailableQueue;
unsigned NumLiveRegs;
std::vector<SUnit*> LiveRegDefs;
std::vector<unsigned> LiveRegCycles;
ScheduleDAGTopologicalSort Topo;
public:
ScheduleDAGRRList(MachineFunction &mf,
bool isbottomup,
SchedulingPriorityQueue *availqueue)
: ScheduleDAGSDNodes(mf), isBottomUp(isbottomup),
AvailableQueue(availqueue), Topo(SUnits) {
}
~ScheduleDAGRRList() {
delete AvailableQueue;
}
void Schedule();
bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
return Topo.IsReachable(SU, TargetSU);
}
bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
return Topo.WillCreateCycle(SU, TargetSU);
}
void AddPred(SUnit *SU, const SDep &D) {
Topo.AddPred(SU, D.getSUnit());
SU->addPred(D);
}
void RemovePred(SUnit *SU, const SDep &D) {
Topo.RemovePred(SU, D.getSUnit());
SU->removePred(D);
}
private:
void ReleasePred(SUnit *SU, const SDep *PredEdge);
void ReleasePredecessors(SUnit *SU, unsigned CurCycle);
void ReleaseSucc(SUnit *SU, const SDep *SuccEdge);
void ReleaseSuccessors(SUnit *SU);
void CapturePred(SDep *PredEdge);
void ScheduleNodeBottomUp(SUnit*, unsigned);
void ScheduleNodeTopDown(SUnit*, unsigned);
void UnscheduleNodeBottomUp(SUnit*);
void BacktrackBottomUp(SUnit*, unsigned, unsigned&);
SUnit *CopyAndMoveSuccessors(SUnit*);
void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
const TargetRegisterClass*,
const TargetRegisterClass*,
SmallVector<SUnit*, 2>&);
bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
void ListScheduleTopDown();
void ListScheduleBottomUp();
SUnit *CreateNewSUnit(SDNode *N) {
unsigned NumSUnits = SUnits.size();
SUnit *NewNode = NewSUnit(N);
if (NewNode->NodeNum >= NumSUnits)
Topo.InitDAGTopologicalSorting();
return NewNode;
}
SUnit *CreateClone(SUnit *N) {
unsigned NumSUnits = SUnits.size();
SUnit *NewNode = Clone(N);
if (NewNode->NodeNum >= NumSUnits)
Topo.InitDAGTopologicalSorting();
return NewNode;
}
bool ForceUnitLatencies() const { return true; }
};
}
void ScheduleDAGRRList::Schedule() {
DEBUG(dbgs() << "********** List Scheduling **********\n");
NumLiveRegs = 0;
LiveRegDefs.resize(TRI->getNumRegs(), NULL);
LiveRegCycles.resize(TRI->getNumRegs(), 0);
BuildSchedGraph(NULL);
DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
SUnits[su].dumpAll(this));
Topo.InitDAGTopologicalSorting();
AvailableQueue->initNodes(SUnits);
if (isBottomUp)
ListScheduleBottomUp();
else
ListScheduleTopDown();
AvailableQueue->releaseState();
}
void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
SUnit *PredSU = PredEdge->getSUnit();
#ifndef NDEBUG
if (PredSU->NumSuccsLeft == 0) {
dbgs() << "*** Scheduling failed! ***\n";
PredSU->dump(this);
dbgs() << " has been released too many times!\n";
llvm_unreachable(0);
}
#endif
--PredSU->NumSuccsLeft;
if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
PredSU->isAvailable = true;
AvailableQueue->push(PredSU);
}
}
void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) {
ReleasePred(SU, &*I);
if (I->isAssignedRegDep()) {
if (!LiveRegDefs[I->getReg()]) {
++NumLiveRegs;
LiveRegDefs[I->getReg()] = I->getSUnit();
LiveRegCycles[I->getReg()] = CurCycle;
}
}
}
}
void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
DEBUG(SU->dump(this));
assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!");
SU->setHeightToAtLeast(CurCycle);
Sequence.push_back(SU);
ReleasePredecessors(SU, CurCycle);
for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
I != E; ++I) {
if (I->isAssignedRegDep()) {
if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) {
assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
assert(LiveRegDefs[I->getReg()] == SU &&
"Physical register dependency violated?");
--NumLiveRegs;
LiveRegDefs[I->getReg()] = NULL;
LiveRegCycles[I->getReg()] = 0;
}
}
}
SU->isScheduled = true;
AvailableQueue->ScheduledNode(SU);
}
void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
SUnit *PredSU = PredEdge->getSUnit();
if (PredSU->isAvailable) {
PredSU->isAvailable = false;
if (!PredSU->isPending)
AvailableQueue->remove(PredSU);
}
assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
++PredSU->NumSuccsLeft;
}
void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
DEBUG(SU->dump(this));
AvailableQueue->UnscheduledNode(SU);
for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) {
CapturePred(&*I);
if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]) {
assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&
"Physical register dependency violated?");
--NumLiveRegs;
LiveRegDefs[I->getReg()] = NULL;
LiveRegCycles[I->getReg()] = 0;
}
}
for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
I != E; ++I) {
if (I->isAssignedRegDep()) {
if (!LiveRegDefs[I->getReg()]) {
LiveRegDefs[I->getReg()] = SU;
++NumLiveRegs;
}
if (I->getSUnit()->getHeight() < LiveRegCycles[I->getReg()])
LiveRegCycles[I->getReg()] = I->getSUnit()->getHeight();
}
}
SU->setHeightDirty();
SU->isScheduled = false;
SU->isAvailable = true;
AvailableQueue->push(SU);
}
void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle,
unsigned &CurCycle) {
SUnit *OldSU = NULL;
while (CurCycle > BtCycle) {
OldSU = Sequence.back();
Sequence.pop_back();
if (SU->isSucc(OldSU))
SU->isAvailable = false;
UnscheduleNodeBottomUp(OldSU);
--CurCycle;
}
assert(!SU->isSucc(OldSU) && "Something is wrong!");
++NumBacktracks;
}
static bool isOperandOf(const SUnit *SU, SDNode *N) {
for (const SDNode *SUNode = SU->getNode(); SUNode;
SUNode = SUNode->getFlaggedNode()) {
if (SUNode->isOperandOf(N))
return true;
}
return false;
}
SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
if (SU->getNode()->getFlaggedNode())
return NULL;
SDNode *N = SU->getNode();
if (!N)
return NULL;
SUnit *NewSU;
bool TryUnfold = false;
for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
EVT VT = N->getValueType(i);
if (VT == MVT::Flag)
return NULL;
else if (VT == MVT::Other)
TryUnfold = true;
}
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
const SDValue &Op = N->getOperand(i);
EVT VT = Op.getNode()->getValueType(Op.getResNo());
if (VT == MVT::Flag)
return NULL;
}
if (TryUnfold) {
SmallVector<SDNode*, 2> NewNodes;
if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
return NULL;
DEBUG(dbgs() << "Unfolding SU # " << SU->NodeNum << "\n");
assert(NewNodes.size() == 2 && "Expected a load folding node!");
N = NewNodes[1];
SDNode *LoadNode = NewNodes[0];
unsigned NumVals = N->getNumValues();
unsigned OldNumVals = SU->getNode()->getNumValues();
for (unsigned i = 0; i != NumVals; ++i)
DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
SDValue(LoadNode, 1));
bool isNewLoad = true;
SUnit *LoadSU;
if (LoadNode->getNodeId() != -1) {
LoadSU = &SUnits[LoadNode->getNodeId()];
isNewLoad = false;
} else {
LoadSU = CreateNewSUnit(LoadNode);
LoadNode->setNodeId(LoadSU->NodeNum);
ComputeLatency(LoadSU);
}
SUnit *NewSU = CreateNewSUnit(N);
assert(N->getNodeId() == -1 && "Node already inserted!");
N->setNodeId(NewSU->NodeNum);
const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
NewSU->isTwoAddress = true;
break;
}
}
if (TID.isCommutable())
NewSU->isCommutable = true;
ComputeLatency(NewSU);
SmallVector<SDep, 4> ChainPreds;
SmallVector<SDep, 4> ChainSuccs;
SmallVector<SDep, 4> LoadPreds;
SmallVector<SDep, 4> NodePreds;
SmallVector<SDep, 4> NodeSuccs;
for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) {
if (I->isCtrl())
ChainPreds.push_back(*I);
else if (isOperandOf(I->getSUnit(), LoadNode))
LoadPreds.push_back(*I);
else
NodePreds.push_back(*I);
}
for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
I != E; ++I) {
if (I->isCtrl())
ChainSuccs.push_back(*I);
else
NodeSuccs.push_back(*I);
}
for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) {
const SDep &Pred = ChainPreds[i];
RemovePred(SU, Pred);
if (isNewLoad)
AddPred(LoadSU, Pred);
}
for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
const SDep &Pred = LoadPreds[i];
RemovePred(SU, Pred);
if (isNewLoad)
AddPred(LoadSU, Pred);
}
for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
const SDep &Pred = NodePreds[i];
RemovePred(SU, Pred);
AddPred(NewSU, Pred);
}
for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
SDep D = NodeSuccs[i];
SUnit *SuccDep = D.getSUnit();
D.setSUnit(SU);
RemovePred(SuccDep, D);
D.setSUnit(NewSU);
AddPred(SuccDep, D);
}
for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
SDep D = ChainSuccs[i];
SUnit *SuccDep = D.getSUnit();
D.setSUnit(SU);
RemovePred(SuccDep, D);
if (isNewLoad) {
D.setSUnit(LoadSU);
AddPred(SuccDep, D);
}
}
AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency));
if (isNewLoad)
AvailableQueue->addNode(LoadSU);
AvailableQueue->addNode(NewSU);
++NumUnfolds;
if (NewSU->NumSuccsLeft == 0) {
NewSU->isAvailable = true;
return NewSU;
}
SU = NewSU;
}
DEBUG(dbgs() << "Duplicating SU # " << SU->NodeNum << "\n");
NewSU = CreateClone(SU);
for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I)
if (!I->isArtificial())
AddPred(NewSU, *I);
SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
I != E; ++I) {
if (I->isArtificial())
continue;
SUnit *SuccSU = I->getSUnit();
if (SuccSU->isScheduled) {
SDep D = *I;
D.setSUnit(NewSU);
AddPred(SuccSU, D);
D.setSUnit(SU);
DelDeps.push_back(std::make_pair(SuccSU, D));
}
}
for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
RemovePred(DelDeps[i].first, DelDeps[i].second);
AvailableQueue->updateNode(SU);
AvailableQueue->addNode(NewSU);
++NumDups;
return NewSU;
}
void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
const TargetRegisterClass *DestRC,
const TargetRegisterClass *SrcRC,
SmallVector<SUnit*, 2> &Copies) {
SUnit *CopyFromSU = CreateNewSUnit(NULL);
CopyFromSU->CopySrcRC = SrcRC;
CopyFromSU->CopyDstRC = DestRC;
SUnit *CopyToSU = CreateNewSUnit(NULL);
CopyToSU->CopySrcRC = DestRC;
CopyToSU->CopyDstRC = SrcRC;
SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
I != E; ++I) {
if (I->isArtificial())
continue;
SUnit *SuccSU = I->getSUnit();
if (SuccSU->isScheduled) {
SDep D = *I;
D.setSUnit(CopyToSU);
AddPred(SuccSU, D);
DelDeps.push_back(std::make_pair(SuccSU, *I));
}
}
for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
RemovePred(DelDeps[i].first, DelDeps[i].second);
AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));
AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));
AvailableQueue->updateNode(SU);
AvailableQueue->addNode(CopyFromSU);
AvailableQueue->addNode(CopyToSU);
Copies.push_back(CopyFromSU);
Copies.push_back(CopyToSU);
++NumPRCopies;
}
static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
const TargetInstrInfo *TII) {
const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!");
unsigned NumRes = TID.getNumDefs();
for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) {
if (Reg == *ImpDef)
break;
++NumRes;
}
return N->getValueType(NumRes);
}
static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg,
std::vector<SUnit*> &LiveRegDefs,
SmallSet<unsigned, 4> &RegAdded,
SmallVector<unsigned, 4> &LRegs,
const TargetRegisterInfo *TRI) {
bool Added = false;
if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != SU) {
if (RegAdded.insert(Reg)) {
LRegs.push_back(Reg);
Added = true;
}
}
for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) {
if (RegAdded.insert(*Alias)) {
LRegs.push_back(*Alias);
Added = true;
}
}
return Added;
}
bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU,
SmallVector<unsigned, 4> &LRegs){
if (NumLiveRegs == 0)
return false;
SmallSet<unsigned, 4> RegAdded;
for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) {
if (I->isAssignedRegDep())
CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
RegAdded, LRegs, TRI);
}
for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) {
if (Node->getOpcode() == ISD::INLINEASM) {
unsigned NumOps = Node->getNumOperands();
if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag)
--NumOps;
for (unsigned i = 2; i != NumOps;) {
unsigned Flags =
cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
unsigned NumVals = (Flags & 0xffff) >> 3;
++i; if ((Flags & 7) == 2 || (Flags & 7) == 6) {
for (; NumVals; --NumVals, ++i) {
unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
if (TargetRegisterInfo::isPhysicalRegister(Reg))
CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
}
} else
i += NumVals;
}
continue;
}
if (!Node->isMachineOpcode())
continue;
const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode());
if (!TID.ImplicitDefs)
continue;
for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg)
CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
}
return !LRegs.empty();
}
void ScheduleDAGRRList::ListScheduleBottomUp() {
unsigned CurCycle = 0;
ReleasePredecessors(&ExitSU, CurCycle);
if (!SUnits.empty()) {
SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
RootSU->isAvailable = true;
AvailableQueue->push(RootSU);
}
SmallVector<SUnit*, 4> NotReady;
DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
Sequence.reserve(SUnits.size());
while (!AvailableQueue->empty()) {
bool Delayed = false;
LRegsMap.clear();
SUnit *CurSU = AvailableQueue->pop();
while (CurSU) {
SmallVector<unsigned, 4> LRegs;
if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
break;
Delayed = true;
LRegsMap.insert(std::make_pair(CurSU, LRegs));
CurSU->isPending = true; NotReady.push_back(CurSU);
CurSU = AvailableQueue->pop();
}
if (Delayed && !CurSU) {
for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
SUnit *TrySU = NotReady[i];
SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
unsigned LiveCycle = CurCycle;
for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) {
unsigned Reg = LRegs[j];
unsigned LCycle = LiveRegCycles[Reg];
LiveCycle = std::min(LiveCycle, LCycle);
}
SUnit *OldSU = Sequence[LiveCycle];
if (!WillCreateCycle(TrySU, OldSU)) {
BacktrackBottomUp(TrySU, LiveCycle, CurCycle);
if (OldSU->isAvailable) {
OldSU->isAvailable = false;
AvailableQueue->remove(OldSU);
}
AddPred(TrySU, SDep(OldSU, SDep::Order, 1,
0, false,
false, true));
if (!TrySU->isAvailable)
CurSU = AvailableQueue->pop();
else {
CurSU = TrySU;
TrySU->isPending = false;
NotReady.erase(NotReady.begin()+i);
}
break;
}
}
if (!CurSU) {
SUnit *TrySU = NotReady[0];
SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
assert(LRegs.size() == 1 && "Can't handle this yet!");
unsigned Reg = LRegs[0];
SUnit *LRDef = LiveRegDefs[Reg];
EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
const TargetRegisterClass *RC =
TRI->getPhysicalRegisterRegClass(Reg, VT);
const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
SUnit *NewDef = 0;
if (DestRC)
NewDef = CopyAndMoveSuccessors(LRDef);
else
DestRC = RC;
if (!NewDef) {
SmallVector<SUnit*, 2> Copies;
InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
DEBUG(dbgs() << "Adding an edge from SU #" << TrySU->NodeNum
<< " to SU #" << Copies.front()->NodeNum << "\n");
AddPred(TrySU, SDep(Copies.front(), SDep::Order, 1,
0, false,
false,
true));
NewDef = Copies.back();
}
DEBUG(dbgs() << "Adding an edge from SU #" << NewDef->NodeNum
<< " to SU #" << TrySU->NodeNum << "\n");
LiveRegDefs[Reg] = NewDef;
AddPred(NewDef, SDep(TrySU, SDep::Order, 1,
0, false,
false,
true));
TrySU->isAvailable = false;
CurSU = NewDef;
}
assert(CurSU && "Unable to resolve live physical register dependencies!");
}
for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
NotReady[i]->isPending = false;
if (NotReady[i]->isAvailable)
AvailableQueue->push(NotReady[i]);
}
NotReady.clear();
if (CurSU)
ScheduleNodeBottomUp(CurSU, CurCycle);
++CurCycle;
}
std::reverse(Sequence.begin(), Sequence.end());
#ifndef NDEBUG
VerifySchedule(isBottomUp);
#endif
}
void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) {
SUnit *SuccSU = SuccEdge->getSUnit();
#ifndef NDEBUG
if (SuccSU->NumPredsLeft == 0) {
dbgs() << "*** Scheduling failed! ***\n";
SuccSU->dump(this);
dbgs() << " has been released too many times!\n";
llvm_unreachable(0);
}
#endif
--SuccSU->NumPredsLeft;
if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) {
SuccSU->isAvailable = true;
AvailableQueue->push(SuccSU);
}
}
void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) {
for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
I != E; ++I) {
assert(!I->isAssignedRegDep() &&
"The list-tdrr scheduler doesn't yet support physreg dependencies!");
ReleaseSucc(SU, &*I);
}
}
void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
DEBUG(SU->dump(this));
assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
SU->setDepthToAtLeast(CurCycle);
Sequence.push_back(SU);
ReleaseSuccessors(SU);
SU->isScheduled = true;
AvailableQueue->ScheduledNode(SU);
}
void ScheduleDAGRRList::ListScheduleTopDown() {
unsigned CurCycle = 0;
ReleaseSuccessors(&EntrySU);
for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
if (SUnits[i].Preds.empty()) {
AvailableQueue->push(&SUnits[i]);
SUnits[i].isAvailable = true;
}
}
Sequence.reserve(SUnits.size());
while (!AvailableQueue->empty()) {
SUnit *CurSU = AvailableQueue->pop();
if (CurSU)
ScheduleNodeTopDown(CurSU, CurCycle);
++CurCycle;
}
#ifndef NDEBUG
VerifySchedule(isBottomUp);
#endif
}
namespace {
template<class SF>
class RegReductionPriorityQueue;
struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ;
bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {}
bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
bool operator()(const SUnit* left, const SUnit* right) const;
};
struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
RegReductionPriorityQueue<td_ls_rr_sort> *SPQ;
td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {}
td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
bool operator()(const SUnit* left, const SUnit* right) const;
};
struct src_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
RegReductionPriorityQueue<src_ls_rr_sort> *SPQ;
src_ls_rr_sort(RegReductionPriorityQueue<src_ls_rr_sort> *spq)
: SPQ(spq) {}
src_ls_rr_sort(const src_ls_rr_sort &RHS)
: SPQ(RHS.SPQ) {}
bool operator()(const SUnit* left, const SUnit* right) const;
};
}
static unsigned
CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
if (SethiUllmanNumber != 0)
return SethiUllmanNumber;
unsigned Extra = 0;
for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) {
if (I->isCtrl()) continue; SUnit *PredSU = I->getSUnit();
unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
if (PredSethiUllman > SethiUllmanNumber) {
SethiUllmanNumber = PredSethiUllman;
Extra = 0;
} else if (PredSethiUllman == SethiUllmanNumber)
++Extra;
}
SethiUllmanNumber += Extra;
if (SethiUllmanNumber == 0)
SethiUllmanNumber = 1;
return SethiUllmanNumber;
}
namespace {
template<class SF>
class RegReductionPriorityQueue : public SchedulingPriorityQueue {
PriorityQueue<SUnit*, std::vector<SUnit*>, SF> Queue;
unsigned currentQueueId;
protected:
std::vector<SUnit> *SUnits;
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
ScheduleDAGRRList *scheduleDAG;
std::vector<unsigned> SethiUllmanNumbers;
public:
RegReductionPriorityQueue(const TargetInstrInfo *tii,
const TargetRegisterInfo *tri)
: Queue(SF(this)), currentQueueId(0),
TII(tii), TRI(tri), scheduleDAG(NULL) {}
void initNodes(std::vector<SUnit> &sunits) {
SUnits = &sunits;
AddPseudoTwoAddrDeps();
PrescheduleNodesWithMultipleUses();
CalculateSethiUllmanNumbers();
}
void addNode(const SUnit *SU) {
unsigned SUSize = SethiUllmanNumbers.size();
if (SUnits->size() > SUSize)
SethiUllmanNumbers.resize(SUSize*2, 0);
CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
}
void updateNode(const SUnit *SU) {
SethiUllmanNumbers[SU->NodeNum] = 0;
CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
}
void releaseState() {
SUnits = 0;
SethiUllmanNumbers.clear();
}
unsigned getNodePriority(const SUnit *SU) const {
assert(SU->NodeNum < SethiUllmanNumbers.size());
unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
return 0;
if (Opc == TargetOpcode::EXTRACT_SUBREG ||
Opc == TargetOpcode::SUBREG_TO_REG ||
Opc == TargetOpcode::INSERT_SUBREG)
return 0;
if (SU->NumSuccs == 0 && SU->NumPreds != 0)
return 0xffff;
if (SU->NumPreds == 0 && SU->NumSuccs != 0)
return 0;
return SethiUllmanNumbers[SU->NodeNum];
}
unsigned getNodeOrdering(const SUnit *SU) const {
return scheduleDAG->DAG->GetOrdering(SU->getNode());
}
unsigned size() const { return Queue.size(); }
bool empty() const { return Queue.empty(); }
void push(SUnit *U) {
assert(!U->NodeQueueId && "Node in the queue already");
U->NodeQueueId = ++currentQueueId;
Queue.push(U);
}
void push_all(const std::vector<SUnit *> &Nodes) {
for (unsigned i = 0, e = Nodes.size(); i != e; ++i)
push(Nodes[i]);
}
SUnit *pop() {
if (empty()) return NULL;
SUnit *V = Queue.top();
Queue.pop();
V->NodeQueueId = 0;
return V;
}
void remove(SUnit *SU) {
assert(!Queue.empty() && "Queue is empty!");
assert(SU->NodeQueueId != 0 && "Not in queue!");
Queue.erase_one(SU);
SU->NodeQueueId = 0;
}
void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
scheduleDAG = scheduleDag;
}
protected:
bool canClobber(const SUnit *SU, const SUnit *Op);
void AddPseudoTwoAddrDeps();
void PrescheduleNodesWithMultipleUses();
void CalculateSethiUllmanNumbers();
};
typedef RegReductionPriorityQueue<bu_ls_rr_sort>
BURegReductionPriorityQueue;
typedef RegReductionPriorityQueue<td_ls_rr_sort>
TDRegReductionPriorityQueue;
typedef RegReductionPriorityQueue<src_ls_rr_sort>
SrcRegReductionPriorityQueue;
}
static unsigned closestSucc(const SUnit *SU) {
unsigned MaxHeight = 0;
for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
I != E; ++I) {
if (I->isCtrl()) continue; unsigned Height = I->getSUnit()->getHeight();
if (I->getSUnit()->getNode() &&
I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
Height = closestSucc(I->getSUnit())+1;
if (Height > MaxHeight)
MaxHeight = Height;
}
return MaxHeight;
}
static unsigned calcMaxScratches(const SUnit *SU) {
unsigned Scratches = 0;
for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) {
if (I->isCtrl()) continue; Scratches++;
}
return Scratches;
}
template <typename RRSort>
static bool BURRSort(const SUnit *left, const SUnit *right,
const RegReductionPriorityQueue<RRSort> *SPQ) {
unsigned LPriority = SPQ->getNodePriority(left);
unsigned RPriority = SPQ->getNodePriority(right);
if (LPriority != RPriority)
return LPriority > RPriority;
unsigned LDist = closestSucc(left);
unsigned RDist = closestSucc(right);
if (LDist != RDist)
return LDist < RDist;
unsigned LScratch = calcMaxScratches(left);
unsigned RScratch = calcMaxScratches(right);
if (LScratch != RScratch)
return LScratch > RScratch;
if (left->getHeight() != right->getHeight())
return left->getHeight() > right->getHeight();
if (left->getDepth() != right->getDepth())
return left->getDepth() < right->getDepth();
assert(left->NodeQueueId && right->NodeQueueId &&
"NodeQueueId cannot be zero");
return (left->NodeQueueId > right->NodeQueueId);
}
bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
return BURRSort(left, right, SPQ);
}
bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{
unsigned LOrder = SPQ->getNodeOrdering(left);
unsigned ROrder = SPQ->getNodeOrdering(right);
if ((LOrder || ROrder) && LOrder != ROrder)
return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
return BURRSort(left, right, SPQ);
}
template<class SF>
bool
RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) {
if (SU->isTwoAddress) {
unsigned Opc = SU->getNode()->getMachineOpcode();
const TargetInstrDesc &TID = TII->get(Opc);
unsigned NumRes = TID.getNumDefs();
unsigned NumOps = TID.getNumOperands() - NumRes;
for (unsigned i = 0; i != NumOps; ++i) {
if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) {
SDNode *DU = SU->getNode()->getOperand(i).getNode();
if (DU->getNodeId() != -1 &&
Op->OrigNode == &(*SUnits)[DU->getNodeId()])
return true;
}
}
}
return false;
}
static bool hasCopyToRegUse(const SUnit *SU) {
for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
I != E; ++I) {
if (I->isCtrl()) continue;
const SUnit *SuccSU = I->getSUnit();
if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg)
return true;
}
return false;
}
static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
const TargetInstrInfo *TII,
const TargetRegisterInfo *TRI) {
SDNode *N = SuccSU->getNode();
unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
assert(ImpDefs && "Caller should check hasPhysRegDefs");
for (const SDNode *SUNode = SU->getNode(); SUNode;
SUNode = SUNode->getFlaggedNode()) {
if (!SUNode->isMachineOpcode())
continue;
const unsigned *SUImpDefs =
TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
if (!SUImpDefs)
return false;
for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
EVT VT = N->getValueType(i);
if (VT == MVT::Flag || VT == MVT::Other)
continue;
if (!N->hasAnyUseOfValue(i))
continue;
unsigned Reg = ImpDefs[i - NumDefs];
for (;*SUImpDefs; ++SUImpDefs) {
unsigned SUReg = *SUImpDefs;
if (TRI->regsOverlap(Reg, SUReg))
return true;
}
}
}
return false;
}
template<class SF>
void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() {
for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
SUnit *SU = &(*SUnits)[i];
if (SU->NumSuccs != 0)
continue;
if (SU->NumPreds != 1)
continue;
if (SDNode *N = SU->getNode())
if (N->getOpcode() == ISD::CopyToReg &&
TargetRegisterInfo::isVirtualRegister
(cast<RegisterSDNode>(N->getOperand(1))->getReg()))
continue;
SUnit *PredSU = 0;
for (SUnit::const_pred_iterator II = SU->Preds.begin(),
EE = SU->Preds.end(); II != EE; ++II)
if (!II->isCtrl()) {
PredSU = II->getSUnit();
break;
}
assert(PredSU);
if (PredSU->hasPhysRegDefs)
continue;
if (PredSU->NumSuccs == 1)
continue;
if (SDNode *N = SU->getNode())
if (N->getOpcode() == ISD::CopyFromReg &&
TargetRegisterInfo::isVirtualRegister
(cast<RegisterSDNode>(N->getOperand(1))->getReg()))
continue;
for (SUnit::const_succ_iterator II = PredSU->Succs.begin(),
EE = PredSU->Succs.end(); II != EE; ++II) {
SUnit *PredSuccSU = II->getSUnit();
if (PredSuccSU == SU) continue;
if (PredSuccSU->NumSuccs == 0)
goto outer_loop_continue;
if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI))
goto outer_loop_continue;
if (scheduleDAG->IsReachable(SU, PredSuccSU))
goto outer_loop_continue;
}
DEBUG(dbgs() << "Prescheduling SU # " << SU->NodeNum
<< " next to PredSU # " << PredSU->NodeNum
<< " to guide scheduling in the presence of multiple uses\n");
for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
SDep Edge = PredSU->Succs[i];
assert(!Edge.isAssignedRegDep());
SUnit *SuccSU = Edge.getSUnit();
if (SuccSU != SU) {
Edge.setSUnit(PredSU);
scheduleDAG->RemovePred(SuccSU, Edge);
scheduleDAG->AddPred(SU, Edge);
Edge.setSUnit(SU);
scheduleDAG->AddPred(SuccSU, Edge);
--i;
}
}
outer_loop_continue:;
}
}
template<class SF>
void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
SUnit *SU = &(*SUnits)[i];
if (!SU->isTwoAddress)
continue;
SDNode *Node = SU->getNode();
if (!Node || !Node->isMachineOpcode() || SU->getNode()->getFlaggedNode())
continue;
unsigned Opc = Node->getMachineOpcode();
const TargetInstrDesc &TID = TII->get(Opc);
unsigned NumRes = TID.getNumDefs();
unsigned NumOps = TID.getNumOperands() - NumRes;
for (unsigned j = 0; j != NumOps; ++j) {
if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1)
continue;
SDNode *DU = SU->getNode()->getOperand(j).getNode();
if (DU->getNodeId() == -1)
continue;
const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
if (!DUSU) continue;
for (SUnit::const_succ_iterator I = DUSU->Succs.begin(),
E = DUSU->Succs.end(); I != E; ++I) {
if (I->isCtrl()) continue;
SUnit *SuccSU = I->getSUnit();
if (SuccSU == SU)
continue;
if (SuccSU->getHeight() < SU->getHeight() &&
(SU->getHeight() - SuccSU->getHeight()) > 1)
continue;
while (SuccSU->Succs.size() == 1 &&
SuccSU->getNode()->isMachineOpcode() &&
SuccSU->getNode()->getMachineOpcode() ==
TargetOpcode::COPY_TO_REGCLASS)
SuccSU = SuccSU->Succs.front().getSUnit();
if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
continue;
if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) {
if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI))
continue;
}
unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
SuccOpc == TargetOpcode::INSERT_SUBREG ||
SuccOpc == TargetOpcode::SUBREG_TO_REG)
continue;
if ((!canClobber(SuccSU, DUSU) ||
(hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) ||
(!SU->isCommutable && SuccSU->isCommutable)) &&
!scheduleDAG->IsReachable(SuccSU, SU)) {
DEBUG(dbgs() << "Adding a pseudo-two-addr edge from SU # "
<< SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, 0,
0, false,
false,
true));
}
}
}
}
}
template<class SF>
void RegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() {
SethiUllmanNumbers.assign(SUnits->size(), 0);
for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
}
static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU,
unsigned Limit) {
unsigned Sum = 0;
for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
I != E; ++I) {
const SUnit *SuccSU = I->getSUnit();
for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(),
EE = SuccSU->Preds.end(); II != EE; ++II) {
SUnit *PredSU = II->getSUnit();
if (!PredSU->isScheduled)
if (++Sum > Limit)
return Sum;
}
}
return Sum;
}
bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
unsigned LPriority = SPQ->getNodePriority(left);
unsigned RPriority = SPQ->getNodePriority(right);
bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode();
bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode();
bool LIsFloater = LIsTarget && left->NumPreds == 0;
bool RIsFloater = RIsTarget && right->NumPreds == 0;
unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0;
unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0;
if (left->NumSuccs == 0 && right->NumSuccs != 0)
return false;
else if (left->NumSuccs != 0 && right->NumSuccs == 0)
return true;
if (LIsFloater)
LBonus -= 2;
if (RIsFloater)
RBonus -= 2;
if (left->NumSuccs == 1)
LBonus += 2;
if (right->NumSuccs == 1)
RBonus += 2;
if (LPriority+LBonus != RPriority+RBonus)
return LPriority+LBonus < RPriority+RBonus;
if (left->getDepth() != right->getDepth())
return left->getDepth() < right->getDepth();
if (left->NumSuccsLeft != right->NumSuccsLeft)
return left->NumSuccsLeft > right->NumSuccsLeft;
assert(left->NodeQueueId && right->NodeQueueId &&
"NodeQueueId cannot be zero");
return (left->NodeQueueId > right->NodeQueueId);
}
llvm::ScheduleDAGSDNodes *
llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
const TargetMachine &TM = IS->TM;
const TargetInstrInfo *TII = TM.getInstrInfo();
const TargetRegisterInfo *TRI = TM.getRegisterInfo();
BURegReductionPriorityQueue *PQ = new BURegReductionPriorityQueue(TII, TRI);
ScheduleDAGRRList *SD =
new ScheduleDAGRRList(*IS->MF, true, PQ);
PQ->setScheduleDAG(SD);
return SD;
}
llvm::ScheduleDAGSDNodes *
llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
const TargetMachine &TM = IS->TM;
const TargetInstrInfo *TII = TM.getInstrInfo();
const TargetRegisterInfo *TRI = TM.getRegisterInfo();
TDRegReductionPriorityQueue *PQ = new TDRegReductionPriorityQueue(TII, TRI);
ScheduleDAGRRList *SD =
new ScheduleDAGRRList(*IS->MF, false, PQ);
PQ->setScheduleDAG(SD);
return SD;
}
llvm::ScheduleDAGSDNodes *
llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
const TargetMachine &TM = IS->TM;
const TargetInstrInfo *TII = TM.getInstrInfo();
const TargetRegisterInfo *TRI = TM.getRegisterInfo();
SrcRegReductionPriorityQueue *PQ = new SrcRegReductionPriorityQueue(TII, TRI);
ScheduleDAGRRList *SD =
new ScheduleDAGRRList(*IS->MF, true, PQ);
PQ->setScheduleDAG(SD);
return SD;
}