From 317c4509fd05716dfb0d6b5cbe5831f0c3455ea2 Mon Sep 17 00:00:00 2001 From: doe300 Date: Mon, 29 Jun 2020 16:44:19 +0200 Subject: [PATCH] Implements postdominator tree - Uses SmallImmediates in vector helper functions where applicable. - Also fixed replacement of arbitrary values for combined operations. --- src/analysis/DominatorTree.cpp | 94 +++++++++++++++------- src/analysis/DominatorTree.h | 1 + src/intermediate/IntermediateInstruction.h | 11 +-- src/intermediate/Operations.cpp | 7 +- src/intermediate/VectorHelper.cpp | 34 ++++---- 5 files changed, 93 insertions(+), 54 deletions(-) diff --git a/src/analysis/DominatorTree.cpp b/src/analysis/DominatorTree.cpp index 64940acf..ba59c38e 100644 --- a/src/analysis/DominatorTree.cpp +++ b/src/analysis/DominatorTree.cpp @@ -53,33 +53,8 @@ FastSet DominatorTreeNodeBase::getDominatedNodes() return dominatedNodes; } -static FastSet getDominatorCandidates(const CFGNode& node) -{ - // check all incoming edges that are not back edges or bidirectional (e.g. for small loops) - FastSet possibleDominators; - std::size_t numIncomingEdges = 0; - node.forAllIncomingEdges([&](const CFGNode& predecessor, const CFGEdge& edge) -> bool { - ++numIncomingEdges; - if(!edge.data.isBackEdge(predecessor.key) && !edge.data.isWorkGroupLoop && - edge.getDirection() != Direction::BOTH) - possibleDominators.emplace(&predecessor); - return true; - }); - - // if there is only exactly 1 incoming edge, this is our dominator, even if we jump back to it at some point - if(numIncomingEdges == 1) - node.forAllIncomingEdges([&](const CFGNode& predecessor, const CFGEdge& edge) -> bool { - possibleDominators.emplace(&predecessor); - return true; - }); - - // don't use the node itself as dominator (e.g. for single-block loop) - possibleDominators.erase(&node); - - return possibleDominators; -} - -std::unique_ptr DominatorTree::createDominatorTree(ControlFlowGraph& cfg) +static std::unique_ptr createTreeInner( + ControlFlowGraph& cfg, FastSet (*getCandidates)(const CFGNode& node), const std::string& treeName) { PROFILE_START(createDominatorTree); std::unique_ptr tree(new DominatorTree(cfg.getNodes().size())); @@ -91,7 +66,7 @@ std::unique_ptr DominatorTree::createDominatorTree(ControlFlowGra for(const auto& node : cfg.getNodes()) { auto& entry = tree->getOrCreateNode(&node.second); - auto tmp = getDominatorCandidates(node.second); + auto tmp = getCandidates(node.second); if(tmp.empty()) { @@ -196,7 +171,7 @@ std::unique_ptr DominatorTree::createDominatorTree(ControlFlowGra logging::logLazy(logging::Level::DEBUG, [&]() { auto nameFunc = [](const CFGNode* node) -> std::string { return node->key->to_string(); }; DebugGraph::dumpGraph( - *tree, "/tmp/vc4c-dominators.dot", nameFunc); + *tree, "/tmp/vc4c-" + treeName + ".dot", nameFunc); }); LCOV_EXCL_STOP #endif @@ -204,3 +179,64 @@ std::unique_ptr DominatorTree::createDominatorTree(ControlFlowGra PROFILE_END(createDominatorTree); return tree; } + +static FastSet getDominatorCandidates(const CFGNode& node) +{ + // check all incoming edges that are not back edges or bidirectional (e.g. for small loops) + FastSet possibleDominators; + std::size_t numIncomingEdges = 0; + node.forAllIncomingEdges([&](const CFGNode& predecessor, const CFGEdge& edge) -> bool { + ++numIncomingEdges; + if(!edge.data.isBackEdge(predecessor.key) && !edge.data.isWorkGroupLoop && + edge.getDirection() != Direction::BOTH) + possibleDominators.emplace(&predecessor); + return true; + }); + + // if there is only exactly 1 incoming edge, this is our dominator, even if we jump back to it at some point + if(numIncomingEdges == 1) + node.forAllIncomingEdges([&](const CFGNode& predecessor, const CFGEdge& edge) -> bool { + possibleDominators.emplace(&predecessor); + return true; + }); + + // don't use the node itself as dominator (e.g. for single-block loop) + possibleDominators.erase(&node); + + return possibleDominators; +} + +std::unique_ptr DominatorTree::createDominatorTree(ControlFlowGraph& cfg) +{ + return createTreeInner(cfg, getDominatorCandidates, "dominators"); +} + +static FastSet getPostdominatorCandidates(const CFGNode& node) +{ + // check all outgoing edges that are not back edges or bidirectional (e.g. for small loops) + FastSet possiblePostdominators; + std::size_t numOutgoingEdges = 0; + node.forAllOutgoingEdges([&](const CFGNode& successor, const CFGEdge& edge) -> bool { + ++numOutgoingEdges; + if(!edge.data.isBackEdge(node.key) && !edge.data.isWorkGroupLoop && edge.getDirection() != Direction::BOTH) + possiblePostdominators.emplace(&successor); + return true; + }); + + // if there is only exactly 1 outgoing edge, this is our postdominator, even if we jump back to it at some point + if(numOutgoingEdges == 1) + node.forAllOutgoingEdges([&](const CFGNode& successor, const CFGEdge& edge) -> bool { + possiblePostdominators.emplace(&successor); + return true; + }); + + // don't use the node itself as postdominator (e.g. for single-block loop) + possiblePostdominators.erase(&node); + + return possiblePostdominators; +} + +std::unique_ptr DominatorTree::createPostdominatorTree(ControlFlowGraph& cfg) +{ + return createTreeInner(cfg, getPostdominatorCandidates, "postdominators"); +} diff --git a/src/analysis/DominatorTree.h b/src/analysis/DominatorTree.h index 44978da1..72efbc2a 100644 --- a/src/analysis/DominatorTree.h +++ b/src/analysis/DominatorTree.h @@ -51,6 +51,7 @@ namespace vc4c explicit DominatorTree(std::size_t numNodes) : Graph(numNodes) {} static std::unique_ptr createDominatorTree(ControlFlowGraph& cfg); + static std::unique_ptr createPostdominatorTree(ControlFlowGraph& cfg); }; } // namespace analysis diff --git a/src/intermediate/IntermediateInstruction.h b/src/intermediate/IntermediateInstruction.h index 4f2ff901..38848d75 100644 --- a/src/intermediate/IntermediateInstruction.h +++ b/src/intermediate/IntermediateInstruction.h @@ -154,10 +154,9 @@ namespace vc4c const; virtual bool readsLocal(const Local* local) const; virtual bool writesLocal(const Local* local) const; - virtual void replaceLocal( - const Local* oldLocal, const Local* newLocal, LocalUse::Type type = LocalUse::Type::BOTH); - virtual void replaceLocal( - const Local* oldLocal, const Value& newValue, LocalUse::Type type = LocalUse::Type::BOTH); + void replaceLocal(const Local* oldLocal, const Local* newLocal, LocalUse::Type type = LocalUse::Type::BOTH); + void replaceLocal(const Local* oldLocal, const Value& newValue, LocalUse::Type type = LocalUse::Type::BOTH); + virtual bool replaceValue(const Value& oldValue, const Value& newValue, LocalUse::Type type); /* * Whether this instructions reads the given register @@ -324,8 +323,6 @@ namespace vc4c */ virtual PrecalculatedValue precalculate(std::size_t numIterations = 1) const; - bool replaceValue(const Value& oldValue, const Value& newValue, LocalUse::Type type); - /* Determine constant instruction, such as * - load immediate instruction * - instruction whose all arguments are immediate value and which has output without side effect (its @@ -749,7 +746,7 @@ namespace vc4c const override; bool readsLocal(const Local* local) const override; bool writesLocal(const Local* local) const override; - void replaceLocal(const Local* oldLocal, const Local* newLocal, LocalUse::Type type) override; + bool replaceValue(const Value& oldValue, const Value& newValue, LocalUse::Type type) override; std::string to_string() const override; diff --git a/src/intermediate/Operations.cpp b/src/intermediate/Operations.cpp index 97ed1ecc..c3ac3d91 100644 --- a/src/intermediate/Operations.cpp +++ b/src/intermediate/Operations.cpp @@ -852,10 +852,11 @@ bool CombinedOperation::writesLocal(const Local* local) const return (op1 && op1->writesLocal(local)) || (op2 && op2->writesLocal(local)); } -void CombinedOperation::replaceLocal(const Local* oldLocal, const Local* newLocal, const LocalUse::Type type) +bool CombinedOperation::replaceValue(const Value& oldValue, const Value& newValue, LocalUse::Type type) { - op1->replaceLocal(oldLocal, newLocal, type); - op2->replaceLocal(oldLocal, newLocal, type); + bool replaced1 = op1->replaceValue(oldValue, newValue, type); + bool replaced2 = op2->replaceValue(oldValue, newValue, type); + return replaced1 | replaced2; } LCOV_EXCL_START diff --git a/src/intermediate/VectorHelper.cpp b/src/intermediate/VectorHelper.cpp index fbdc3779..7fef7eed 100644 --- a/src/intermediate/VectorHelper.cpp +++ b/src/intermediate/VectorHelper.cpp @@ -188,7 +188,7 @@ InstructionWalker intermediate::insertVectorInsertion( it, value, index, tmp, intermediate::Direction::UP, value.type.isScalarType()); } // 2) insert element(s) into container - if(value.type.isScalarType()) + if(value.type.isScalarType() || value.type.getPointerType()) { // single element -> create condition only met in given index auto cond = assignNop(it) = selectSIMDElement(index); @@ -256,11 +256,11 @@ static NODISCARD InstructionWalker insertDynamicVectorShuffle( Value resultTmp = method.addNewLocal(source.type.getElementType(), "%shuffle_tmp"); // Rotate into temporary, because of "An instruction that does a vector rotate by r5 must not immediately follow // an instruction that writes to r5." - Broadcom Specification, page 37 - it = insertVectorRotation(it, mask, Value(Literal(i), TYPE_INT8), offsetTmp0, Direction::DOWN); + it = insertVectorRotation(it, mask, Value(SmallImmediate(i), TYPE_INT8), offsetTmp0, Direction::DOWN); // TODO reuse insertVectorInsertion/Extraction, but only if 2 rotations by variable + constant are optimized // into one (like is done here) // pos 3 -> 1 => rotate up by -2 (14), pos 1 -> 3 => rotate up by 2 - Value offsetTmp1 = assign(it, TYPE_INT8, "%shuffle_offset") = Value(Literal(i), TYPE_INT8) - offsetTmp0; + Value offsetTmp1 = assign(it, TYPE_INT8, "%shuffle_offset") = Value(SmallImmediate(i), TYPE_INT8) - offsetTmp0; it = insertVectorRotation(it, source, offsetTmp1, resultTmp, Direction::UP); if(i == 0) @@ -296,7 +296,7 @@ static NODISCARD InstructionWalker insertDynamicVectorShuffle2Vectors(Instructio { // Rotate mask vector index to element 0 Value offsetTmp0 = method.addNewLocal(TYPE_INT8, "%shuffle_offset"); - it = insertVectorRotation(it, mask, Value(Literal(index), TYPE_INT8), offsetTmp0, Direction::DOWN); + it = insertVectorRotation(it, mask, Value(SmallImmediate(index), TYPE_INT8), offsetTmp0, Direction::DOWN); // Only consider 0th element: zero set if first source, zero clear otherwise auto offsetTmp1 = assign(it, TYPE_INT8, "%vector_selection") = offsetTmp0 / Literal(static_cast(NATIVE_VECTOR_SIZE)); @@ -311,12 +311,12 @@ static NODISCARD InstructionWalker insertDynamicVectorShuffle2Vectors(Instructio // Extract value from source vector at position determined from mask vector and insert in destination vector Value valTmp = method.addNewLocal(destination.type.getElementType(), "%vector_shuffle"); it = insertVectorExtraction(it, method, sourceTmp, offsetTmp0, valTmp); - it = insertVectorInsertion(it, method, destination, Value(Literal(index), TYPE_INT8), valTmp); + it = insertVectorInsertion(it, method, destination, Value(SmallImmediate(index), TYPE_INT8), valTmp); } return it; } -static uint32_t toLowestIndex(uint32_t mask) +static uint8_t toLowestIndex(uint32_t mask) { uint32_t index = 0; while(mask != 0) @@ -325,7 +325,7 @@ static uint32_t toLowestIndex(uint32_t mask) ++index; } // bit set at index 0 is first/only iteration -> index is already incremented to 1 - return index - 1; + return static_cast(index - 1); } InstructionWalker intermediate::insertVectorShuffle(InstructionWalker it, Method& method, const Value& destination, @@ -401,7 +401,9 @@ InstructionWalker intermediate::insertVectorShuffle(InstructionWalker it, Method // if the index to be used is not 0, rotate to position 0 tmp = method.addNewLocal(source.type, "%vector_shuffle"); // allow per-quad rotation, since we only care about the 0th element, since we replicate this one - it = insertVectorRotation(it, source, Value(Literal(indexValue), TYPE_INT8), tmp, Direction::DOWN, true); + it = insertVectorRotation(it, source, + Value(SmallImmediate::fromInteger(static_cast(indexValue)).value(), TYPE_INT8), tmp, + Direction::DOWN, true); } return insertReplication(it, tmp, destination); } @@ -451,13 +453,14 @@ InstructionWalker intermediate::insertVectorShuffle(InstructionWalker it, Method if(sources.second.count() == destination.type.getVectorWidth()) { it = insertVectorRotation( - it, src, Value(Literal(sources.first.first), TYPE_INT8), destination, Direction::DOWN); + it, src, Value(SmallImmediate(sources.first.first), TYPE_INT8), destination, Direction::DOWN); } else { auto tmp = method.addNewLocal(destination.type, "%vector_shuffle"); // rotate source vector by the given offset - it = insertVectorRotation(it, src, Value(Literal(sources.first.first), TYPE_INT8), tmp, Direction::DOWN); + it = insertVectorRotation( + it, src, Value(SmallImmediate(sources.first.first), TYPE_INT8), tmp, Direction::DOWN); // set flags only for the selected elements ConditionCode cond = COND_NEVER; if(sources.second.count() == 1) @@ -465,7 +468,8 @@ InstructionWalker intermediate::insertVectorShuffle(InstructionWalker it, Method // for cosmetic purposes (and possible combination with other instructions), mask single elements via // xoring small immediates assign(it, NOP_REGISTER) = (ELEMENT_NUMBER_REGISTER ^ - Value(Literal(toLowestIndex(static_cast(sources.second.to_ulong()))), TYPE_INT8), + Value( + SmallImmediate(toLowestIndex(static_cast(sources.second.to_ulong()))), TYPE_INT8), SetFlag::SET_FLAGS); cond = COND_ZERO_SET; } @@ -497,11 +501,11 @@ NODISCARD InstructionWalker intermediate::insertVectorConcatenation( // rotate the second vector with the size of the first as offset Value tmpSource1 = method.addNewLocal(source1.type.toVectorType(16), "%vector_concat"); it = insertVectorRotation( - it, source1, Value(Literal(source0.type.getVectorWidth()), TYPE_INT8), tmpSource1, Direction::UP); + it, source1, Value(SmallImmediate(source0.type.getVectorWidth()), TYPE_INT8), tmpSource1, Direction::UP); // insert the elements of the second vector with an element-number of higher or equals the size of the first // vector into the result assign(it, NOP_REGISTER) = - (ELEMENT_NUMBER_REGISTER - Value(Literal(source0.type.getVectorWidth()), TYPE_INT8), SetFlag::SET_FLAGS); + (ELEMENT_NUMBER_REGISTER - Value(SmallImmediate(source0.type.getVectorWidth()), TYPE_INT8), SetFlag::SET_FLAGS); assign(it, dest) = (tmpSource1, COND_NEGATIVE_CLEAR, InstructionDecorations::ELEMENT_INSERTION); return it; } @@ -950,7 +954,7 @@ InstructionWalker intermediate::insertAssembleVector( uint8_t index = 0; for(auto& elem : elements) { - it = insertVectorInsertion(it, method, dest, Value(Literal(index), TYPE_INT8), elem); + it = insertVectorInsertion(it, method, dest, Value(SmallImmediate(index), TYPE_INT8), elem); ++index; } return it; @@ -998,7 +1002,7 @@ InstructionWalker intermediate::insertFoldVector(InstructionWalker it, Method& m auto tmpUp = method.addNewLocal(dest.type.toVectorType(halfSize), "%vector_fold.upper"); auto newTmpResult = method.addNewLocal(dest.type.toVectorType(halfSize), "%vector_fold"); - it = insertVectorRotation(it, tmpResult, Value(Literal(halfSize), TYPE_INT8), tmpUp, Direction::DOWN); + it = insertVectorRotation(it, tmpResult, Value(SmallImmediate(halfSize), TYPE_INT8), tmpUp, Direction::DOWN); it.emplace(new Operation(foldingOp, newTmpResult, tmpResult, tmpUp)); it->addDecorations(decorations);