Skip to content

Commit

Permalink
Implements postdominator tree
Browse files Browse the repository at this point in the history
- Uses SmallImmediates in vector helper functions where applicable.
- Also fixed replacement of arbitrary values for combined operations.
  • Loading branch information
doe300 committed Jun 29, 2020
1 parent d048366 commit 317c450
Show file tree
Hide file tree
Showing 5 changed files with 93 additions and 54 deletions.
94 changes: 65 additions & 29 deletions src/analysis/DominatorTree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,33 +53,8 @@ FastSet<const DominatorTreeNodeBase*> DominatorTreeNodeBase::getDominatedNodes()
return dominatedNodes;
}

static FastSet<const CFGNode*> getDominatorCandidates(const CFGNode& node)
{
// check all incoming edges that are not back edges or bidirectional (e.g. for small loops)
FastSet<const CFGNode*> possibleDominators;
std::size_t numIncomingEdges = 0;
node.forAllIncomingEdges([&](const CFGNode& predecessor, const CFGEdge& edge) -> bool {
++numIncomingEdges;
if(!edge.data.isBackEdge(predecessor.key) && !edge.data.isWorkGroupLoop &&
edge.getDirection() != Direction::BOTH)
possibleDominators.emplace(&predecessor);
return true;
});

// if there is only exactly 1 incoming edge, this is our dominator, even if we jump back to it at some point
if(numIncomingEdges == 1)
node.forAllIncomingEdges([&](const CFGNode& predecessor, const CFGEdge& edge) -> bool {
possibleDominators.emplace(&predecessor);
return true;
});

// don't use the node itself as dominator (e.g. for single-block loop)
possibleDominators.erase(&node);

return possibleDominators;
}

std::unique_ptr<DominatorTree> DominatorTree::createDominatorTree(ControlFlowGraph& cfg)
static std::unique_ptr<DominatorTree> createTreeInner(
ControlFlowGraph& cfg, FastSet<const CFGNode*> (*getCandidates)(const CFGNode& node), const std::string& treeName)
{
PROFILE_START(createDominatorTree);
std::unique_ptr<DominatorTree> tree(new DominatorTree(cfg.getNodes().size()));
Expand All @@ -91,7 +66,7 @@ std::unique_ptr<DominatorTree> DominatorTree::createDominatorTree(ControlFlowGra
for(const auto& node : cfg.getNodes())
{
auto& entry = tree->getOrCreateNode(&node.second);
auto tmp = getDominatorCandidates(node.second);
auto tmp = getCandidates(node.second);

if(tmp.empty())
{
Expand Down Expand Up @@ -196,11 +171,72 @@ std::unique_ptr<DominatorTree> DominatorTree::createDominatorTree(ControlFlowGra
logging::logLazy(logging::Level::DEBUG, [&]() {
auto nameFunc = [](const CFGNode* node) -> std::string { return node->key->to_string(); };
DebugGraph<const CFGNode*, DominationRelation, Directionality::DIRECTED>::dumpGraph<DominatorTree>(
*tree, "/tmp/vc4c-dominators.dot", nameFunc);
*tree, "/tmp/vc4c-" + treeName + ".dot", nameFunc);
});
LCOV_EXCL_STOP
#endif

PROFILE_END(createDominatorTree);
return tree;
}

static FastSet<const CFGNode*> getDominatorCandidates(const CFGNode& node)
{
// check all incoming edges that are not back edges or bidirectional (e.g. for small loops)
FastSet<const CFGNode*> possibleDominators;
std::size_t numIncomingEdges = 0;
node.forAllIncomingEdges([&](const CFGNode& predecessor, const CFGEdge& edge) -> bool {
++numIncomingEdges;
if(!edge.data.isBackEdge(predecessor.key) && !edge.data.isWorkGroupLoop &&
edge.getDirection() != Direction::BOTH)
possibleDominators.emplace(&predecessor);
return true;
});

// if there is only exactly 1 incoming edge, this is our dominator, even if we jump back to it at some point
if(numIncomingEdges == 1)
node.forAllIncomingEdges([&](const CFGNode& predecessor, const CFGEdge& edge) -> bool {
possibleDominators.emplace(&predecessor);
return true;
});

// don't use the node itself as dominator (e.g. for single-block loop)
possibleDominators.erase(&node);

return possibleDominators;
}

std::unique_ptr<DominatorTree> DominatorTree::createDominatorTree(ControlFlowGraph& cfg)
{
return createTreeInner(cfg, getDominatorCandidates, "dominators");
}

static FastSet<const CFGNode*> getPostdominatorCandidates(const CFGNode& node)
{
// check all outgoing edges that are not back edges or bidirectional (e.g. for small loops)
FastSet<const CFGNode*> possiblePostdominators;
std::size_t numOutgoingEdges = 0;
node.forAllOutgoingEdges([&](const CFGNode& successor, const CFGEdge& edge) -> bool {
++numOutgoingEdges;
if(!edge.data.isBackEdge(node.key) && !edge.data.isWorkGroupLoop && edge.getDirection() != Direction::BOTH)
possiblePostdominators.emplace(&successor);
return true;
});

// if there is only exactly 1 outgoing edge, this is our postdominator, even if we jump back to it at some point
if(numOutgoingEdges == 1)
node.forAllOutgoingEdges([&](const CFGNode& successor, const CFGEdge& edge) -> bool {
possiblePostdominators.emplace(&successor);
return true;
});

// don't use the node itself as postdominator (e.g. for single-block loop)
possiblePostdominators.erase(&node);

return possiblePostdominators;
}

std::unique_ptr<DominatorTree> DominatorTree::createPostdominatorTree(ControlFlowGraph& cfg)
{
return createTreeInner(cfg, getPostdominatorCandidates, "postdominators");
}
1 change: 1 addition & 0 deletions src/analysis/DominatorTree.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ namespace vc4c
explicit DominatorTree(std::size_t numNodes) : Graph(numNodes) {}

static std::unique_ptr<DominatorTree> createDominatorTree(ControlFlowGraph& cfg);
static std::unique_ptr<DominatorTree> createPostdominatorTree(ControlFlowGraph& cfg);
};

} // namespace analysis
Expand Down
11 changes: 4 additions & 7 deletions src/intermediate/IntermediateInstruction.h
Original file line number Diff line number Diff line change
Expand Up @@ -154,10 +154,9 @@ namespace vc4c
const;
virtual bool readsLocal(const Local* local) const;
virtual bool writesLocal(const Local* local) const;
virtual void replaceLocal(
const Local* oldLocal, const Local* newLocal, LocalUse::Type type = LocalUse::Type::BOTH);
virtual void replaceLocal(
const Local* oldLocal, const Value& newValue, LocalUse::Type type = LocalUse::Type::BOTH);
void replaceLocal(const Local* oldLocal, const Local* newLocal, LocalUse::Type type = LocalUse::Type::BOTH);
void replaceLocal(const Local* oldLocal, const Value& newValue, LocalUse::Type type = LocalUse::Type::BOTH);
virtual bool replaceValue(const Value& oldValue, const Value& newValue, LocalUse::Type type);

/*
* Whether this instructions reads the given register
Expand Down Expand Up @@ -324,8 +323,6 @@ namespace vc4c
*/
virtual PrecalculatedValue precalculate(std::size_t numIterations = 1) const;

bool replaceValue(const Value& oldValue, const Value& newValue, LocalUse::Type type);

/* Determine constant instruction, such as
* - load immediate instruction
* - instruction whose all arguments are immediate value and which has output without side effect (its
Expand Down Expand Up @@ -749,7 +746,7 @@ namespace vc4c
const override;
bool readsLocal(const Local* local) const override;
bool writesLocal(const Local* local) const override;
void replaceLocal(const Local* oldLocal, const Local* newLocal, LocalUse::Type type) override;
bool replaceValue(const Value& oldValue, const Value& newValue, LocalUse::Type type) override;

std::string to_string() const override;

Expand Down
7 changes: 4 additions & 3 deletions src/intermediate/Operations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -852,10 +852,11 @@ bool CombinedOperation::writesLocal(const Local* local) const
return (op1 && op1->writesLocal(local)) || (op2 && op2->writesLocal(local));
}

void CombinedOperation::replaceLocal(const Local* oldLocal, const Local* newLocal, const LocalUse::Type type)
bool CombinedOperation::replaceValue(const Value& oldValue, const Value& newValue, LocalUse::Type type)
{
op1->replaceLocal(oldLocal, newLocal, type);
op2->replaceLocal(oldLocal, newLocal, type);
bool replaced1 = op1->replaceValue(oldValue, newValue, type);
bool replaced2 = op2->replaceValue(oldValue, newValue, type);
return replaced1 | replaced2;
}

LCOV_EXCL_START
Expand Down
34 changes: 19 additions & 15 deletions src/intermediate/VectorHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ InstructionWalker intermediate::insertVectorInsertion(
it, value, index, tmp, intermediate::Direction::UP, value.type.isScalarType());
}
// 2) insert element(s) into container
if(value.type.isScalarType())
if(value.type.isScalarType() || value.type.getPointerType())
{
// single element -> create condition only met in given index
auto cond = assignNop(it) = selectSIMDElement(index);
Expand Down Expand Up @@ -256,11 +256,11 @@ static NODISCARD InstructionWalker insertDynamicVectorShuffle(
Value resultTmp = method.addNewLocal(source.type.getElementType(), "%shuffle_tmp");
// Rotate into temporary, because of "An instruction that does a vector rotate by r5 must not immediately follow
// an instruction that writes to r5." - Broadcom Specification, page 37
it = insertVectorRotation(it, mask, Value(Literal(i), TYPE_INT8), offsetTmp0, Direction::DOWN);
it = insertVectorRotation(it, mask, Value(SmallImmediate(i), TYPE_INT8), offsetTmp0, Direction::DOWN);
// TODO reuse insertVectorInsertion/Extraction, but only if 2 rotations by variable + constant are optimized
// into one (like is done here)
// pos 3 -> 1 => rotate up by -2 (14), pos 1 -> 3 => rotate up by 2
Value offsetTmp1 = assign(it, TYPE_INT8, "%shuffle_offset") = Value(Literal(i), TYPE_INT8) - offsetTmp0;
Value offsetTmp1 = assign(it, TYPE_INT8, "%shuffle_offset") = Value(SmallImmediate(i), TYPE_INT8) - offsetTmp0;
it = insertVectorRotation(it, source, offsetTmp1, resultTmp, Direction::UP);

if(i == 0)
Expand Down Expand Up @@ -296,7 +296,7 @@ static NODISCARD InstructionWalker insertDynamicVectorShuffle2Vectors(Instructio
{
// Rotate mask vector index to element 0
Value offsetTmp0 = method.addNewLocal(TYPE_INT8, "%shuffle_offset");
it = insertVectorRotation(it, mask, Value(Literal(index), TYPE_INT8), offsetTmp0, Direction::DOWN);
it = insertVectorRotation(it, mask, Value(SmallImmediate(index), TYPE_INT8), offsetTmp0, Direction::DOWN);
// Only consider 0th element: zero set if first source, zero clear otherwise
auto offsetTmp1 = assign(it, TYPE_INT8, "%vector_selection") =
offsetTmp0 / Literal(static_cast<uint32_t>(NATIVE_VECTOR_SIZE));
Expand All @@ -311,12 +311,12 @@ static NODISCARD InstructionWalker insertDynamicVectorShuffle2Vectors(Instructio
// Extract value from source vector at position determined from mask vector and insert in destination vector
Value valTmp = method.addNewLocal(destination.type.getElementType(), "%vector_shuffle");
it = insertVectorExtraction(it, method, sourceTmp, offsetTmp0, valTmp);
it = insertVectorInsertion(it, method, destination, Value(Literal(index), TYPE_INT8), valTmp);
it = insertVectorInsertion(it, method, destination, Value(SmallImmediate(index), TYPE_INT8), valTmp);
}
return it;
}

static uint32_t toLowestIndex(uint32_t mask)
static uint8_t toLowestIndex(uint32_t mask)
{
uint32_t index = 0;
while(mask != 0)
Expand All @@ -325,7 +325,7 @@ static uint32_t toLowestIndex(uint32_t mask)
++index;
}
// bit set at index 0 is first/only iteration -> index is already incremented to 1
return index - 1;
return static_cast<uint8_t>(index - 1);
}

InstructionWalker intermediate::insertVectorShuffle(InstructionWalker it, Method& method, const Value& destination,
Expand Down Expand Up @@ -401,7 +401,9 @@ InstructionWalker intermediate::insertVectorShuffle(InstructionWalker it, Method
// if the index to be used is not 0, rotate to position 0
tmp = method.addNewLocal(source.type, "%vector_shuffle");
// allow per-quad rotation, since we only care about the 0th element, since we replicate this one
it = insertVectorRotation(it, source, Value(Literal(indexValue), TYPE_INT8), tmp, Direction::DOWN, true);
it = insertVectorRotation(it, source,
Value(SmallImmediate::fromInteger(static_cast<int8_t>(indexValue)).value(), TYPE_INT8), tmp,
Direction::DOWN, true);
}
return insertReplication(it, tmp, destination);
}
Expand Down Expand Up @@ -451,21 +453,23 @@ InstructionWalker intermediate::insertVectorShuffle(InstructionWalker it, Method
if(sources.second.count() == destination.type.getVectorWidth())
{
it = insertVectorRotation(
it, src, Value(Literal(sources.first.first), TYPE_INT8), destination, Direction::DOWN);
it, src, Value(SmallImmediate(sources.first.first), TYPE_INT8), destination, Direction::DOWN);
}
else
{
auto tmp = method.addNewLocal(destination.type, "%vector_shuffle");
// rotate source vector by the given offset
it = insertVectorRotation(it, src, Value(Literal(sources.first.first), TYPE_INT8), tmp, Direction::DOWN);
it = insertVectorRotation(
it, src, Value(SmallImmediate(sources.first.first), TYPE_INT8), tmp, Direction::DOWN);
// set flags only for the selected elements
ConditionCode cond = COND_NEVER;
if(sources.second.count() == 1)
{
// for cosmetic purposes (and possible combination with other instructions), mask single elements via
// xoring small immediates
assign(it, NOP_REGISTER) = (ELEMENT_NUMBER_REGISTER ^
Value(Literal(toLowestIndex(static_cast<uint32_t>(sources.second.to_ulong()))), TYPE_INT8),
Value(
SmallImmediate(toLowestIndex(static_cast<uint32_t>(sources.second.to_ulong()))), TYPE_INT8),
SetFlag::SET_FLAGS);
cond = COND_ZERO_SET;
}
Expand Down Expand Up @@ -497,11 +501,11 @@ NODISCARD InstructionWalker intermediate::insertVectorConcatenation(
// rotate the second vector with the size of the first as offset
Value tmpSource1 = method.addNewLocal(source1.type.toVectorType(16), "%vector_concat");
it = insertVectorRotation(
it, source1, Value(Literal(source0.type.getVectorWidth()), TYPE_INT8), tmpSource1, Direction::UP);
it, source1, Value(SmallImmediate(source0.type.getVectorWidth()), TYPE_INT8), tmpSource1, Direction::UP);
// insert the elements of the second vector with an element-number of higher or equals the size of the first
// vector into the result
assign(it, NOP_REGISTER) =
(ELEMENT_NUMBER_REGISTER - Value(Literal(source0.type.getVectorWidth()), TYPE_INT8), SetFlag::SET_FLAGS);
(ELEMENT_NUMBER_REGISTER - Value(SmallImmediate(source0.type.getVectorWidth()), TYPE_INT8), SetFlag::SET_FLAGS);
assign(it, dest) = (tmpSource1, COND_NEGATIVE_CLEAR, InstructionDecorations::ELEMENT_INSERTION);
return it;
}
Expand Down Expand Up @@ -950,7 +954,7 @@ InstructionWalker intermediate::insertAssembleVector(
uint8_t index = 0;
for(auto& elem : elements)
{
it = insertVectorInsertion(it, method, dest, Value(Literal(index), TYPE_INT8), elem);
it = insertVectorInsertion(it, method, dest, Value(SmallImmediate(index), TYPE_INT8), elem);
++index;
}
return it;
Expand Down Expand Up @@ -998,7 +1002,7 @@ InstructionWalker intermediate::insertFoldVector(InstructionWalker it, Method& m
auto tmpUp = method.addNewLocal(dest.type.toVectorType(halfSize), "%vector_fold.upper");
auto newTmpResult = method.addNewLocal(dest.type.toVectorType(halfSize), "%vector_fold");

it = insertVectorRotation(it, tmpResult, Value(Literal(halfSize), TYPE_INT8), tmpUp, Direction::DOWN);
it = insertVectorRotation(it, tmpResult, Value(SmallImmediate(halfSize), TYPE_INT8), tmpUp, Direction::DOWN);

it.emplace(new Operation(foldingOp, newTmpResult, tmpResult, tmpUp));
it->addDecorations(decorations);
Expand Down

0 comments on commit 317c450

Please sign in to comment.