Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Parallel router: net decomposition #2367

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 18 additions & 2 deletions libs/EXTERNAL/libtatum/libtatum/tatum/analyzer_factory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "tatum/TimingGraphFwd.hpp"
#include "tatum/TimingConstraintsFwd.hpp"

#include "tatum/analyzers/AdaptiveSetupHoldTimingAnalyzer.hpp"
#include "tatum/graph_walkers.hpp"
#include "tatum/timing_analyzers.hpp"
#include "tatum/analyzers/full_timing_analyzers.hpp"
Expand Down Expand Up @@ -55,9 +56,9 @@ namespace tatum {
///Factor class to construct timing analyzers
///
///\tparam Visitor The analysis type visitor (e.g. SetupAnalysis)
///\tparam GraphWalker The graph walker to use (defaults to serial traversals)
///\tparam GraphWalker The graph walker to use
template<class Visitor,
class GraphWalker>
class... GraphWalkers>
struct AnalyzerFactory {

//We use the dependent_false template to detect if the un-specialized AnalyzerFactor
Expand Down Expand Up @@ -176,6 +177,21 @@ struct AnalyzerFactory<SetupHoldAnalysis,SerialIncrWalker> {
}
};

template<>
struct AnalyzerFactory<SetupHoldAnalysis,ParallelWalker,SerialIncrWalker> {

static std::unique_ptr<SetupHoldTimingAnalyzer> make(const TimingGraph& timing_graph,
const TimingConstraints& timing_constraints,
const DelayCalculator& delay_calc) {
return std::unique_ptr<SetupHoldTimingAnalyzer>(
new detail::AdaptiveSetupHoldTimingAnalyzer<ParallelWalker, SerialIncrWalker>(
timing_graph,
timing_constraints,
delay_calc)
);
}
};

} //namepsace

#endif
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ namespace tatum {
///\tparam Visitor The analysis type visitor (e.g. SetupAnalysis)
///\tparam GraphWalker The graph walker to use (defaults to serial traversals)
template<class Visitor,
class GraphWalker=SerialWalker>
class... GraphWalkers>
struct AnalyzerFactory;

} //namepsace
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
#pragma once
#include "tatum/TimingGraphFwd.hpp"
#include "tatum/graph_walkers/SerialWalker.hpp"
#include "tatum/graph_walkers/SerialIncrWalker.hpp"
#include "tatum/SetupHoldAnalysis.hpp"
#include "tatum/analyzers/SetupHoldTimingAnalyzer.hpp"
#include "tatum/base/validate_timing_graph_constraints.hpp"
#include "tatum/graph_walkers/TimingGraphWalker.hpp"

namespace tatum { namespace detail {

/** Threshold for AdaptiveSetupHoldTimingAnalyzer to use full updates.
* Expressed as fraction of all edges in timing graph. */
constexpr float full_update_threshold = 0.1;

/**
* A concrete implementation of a SetupHoldTimingAnalyzer.
*
* This is an adaptive analyzer: can do incremental updates if the number of invalidated
* nodes is small, falls back to a full update after a certain threshold to avoid the overhead.
*/
template<class FullWalker=SerialWalker, class IncrWalker=SerialIncrWalker>
class AdaptiveSetupHoldTimingAnalyzer : public SetupHoldTimingAnalyzer {
public:
AdaptiveSetupHoldTimingAnalyzer(const TimingGraph& timing_graph, const TimingConstraints& timing_constraints, const DelayCalculator& delay_calculator)
: SetupHoldTimingAnalyzer()
, timing_graph_(timing_graph)
, timing_constraints_(timing_constraints)
, delay_calculator_(delay_calculator)
, setup_hold_visitor_(timing_graph_.nodes().size(), timing_graph_.edges().size()) {
validate_timing_graph_constraints(timing_graph_, timing_constraints_);

//Initialize profiling data. Use full walker to store data for both
full_walker_.set_profiling_data("total_analysis_sec", 0.);
full_walker_.set_profiling_data("analysis_sec", 0.);
full_walker_.set_profiling_data("num_full_updates", 0.);
full_walker_.set_profiling_data("num_incr_updates", 0.);

mode_ = Mode::INCR;
n_modified_edges_ = 0;
max_modified_edges_ = timing_graph_.edges().size() * full_update_threshold;
}

protected:
//Update both setup and hold simultaneously (this is more efficient than updating them sequentially)
virtual void update_timing_impl() override {
auto start_time = Clock::now();

if(mode_ == Mode::INCR)
update_timing_incr_(setup_hold_visitor_);
else
update_timing_full_(setup_hold_visitor_);

clear_timing_incr_();

double analysis_sec = std::chrono::duration_cast<dsec>(Clock::now() - start_time).count();

//Record profiling data (use full walker to store it) (arbitrary choice)
double total_analysis_sec = analysis_sec + full_walker_.get_profiling_data("total_analysis_sec");
full_walker_.set_profiling_data("total_analysis_sec", total_analysis_sec);
full_walker_.set_profiling_data("analysis_sec", analysis_sec);
if(mode_ == Mode::INCR)
full_walker_.set_profiling_data("num_incr_updates", full_walker_.get_profiling_data("num_incr_updates") + 1);
else
full_walker_.set_profiling_data("num_full_updates", full_walker_.get_profiling_data("num_full_updates") + 1);

mode_ = Mode::INCR; /* We did our update, try to use incr until too many edges are modified */
}

//Update only setup timing
virtual void update_setup_timing_impl() override {
auto& setup_visitor = setup_hold_visitor_.setup_visitor();

if(mode_ == Mode::INCR)
update_timing_incr_(setup_visitor);
else
update_timing_full_(setup_visitor);
}

//Update only hold timing
virtual void update_hold_timing_impl() override {
auto& hold_visitor = setup_hold_visitor_.hold_visitor();

if(mode_ == Mode::INCR)
update_timing_incr_(hold_visitor);
else
update_timing_full_(hold_visitor);
}

virtual void invalidate_edge_impl(const EdgeId edge) override {
if(mode_ == Mode::FULL)
return;
incr_walker_.invalidate_edge(edge);
n_modified_edges_++;
if(n_modified_edges_ > max_modified_edges_)
mode_ = Mode::FULL;
}

virtual node_range modified_nodes_impl() const override {
if(mode_ == Mode::FULL)
return full_walker_.modified_nodes();
else
return incr_walker_.modified_nodes();
}

double get_profiling_data_impl(std::string key) const override {
return full_walker_.get_profiling_data(key);
}

size_t num_unconstrained_startpoints_impl() const override {
if(mode_ == Mode::FULL)
return full_walker_.num_unconstrained_startpoints();
else
return incr_walker_.num_unconstrained_startpoints();
}

size_t num_unconstrained_endpoints_impl() const override {
if(mode_ == Mode::FULL)
return full_walker_.num_unconstrained_endpoints();
else
return incr_walker_.num_unconstrained_endpoints();
}

TimingTags::tag_range setup_tags_impl(NodeId node_id) const override { return setup_hold_visitor_.setup_tags(node_id); }
TimingTags::tag_range setup_tags_impl(NodeId node_id, TagType type) const override { return setup_hold_visitor_.setup_tags(node_id, type); }
#ifdef TATUM_CALCULATE_EDGE_SLACKS
TimingTags::tag_range setup_edge_slacks_impl(EdgeId edge_id) const override { return setup_hold_visitor_.setup_edge_slacks(edge_id); }
#endif
TimingTags::tag_range setup_node_slacks_impl(NodeId node_id) const override { return setup_hold_visitor_.setup_node_slacks(node_id); }

TimingTags::tag_range hold_tags_impl(NodeId node_id) const override { return setup_hold_visitor_.hold_tags(node_id); }
TimingTags::tag_range hold_tags_impl(NodeId node_id, TagType type) const override { return setup_hold_visitor_.hold_tags(node_id, type); }
#ifdef TATUM_CALCULATE_EDGE_SLACKS
TimingTags::tag_range hold_edge_slacks_impl(EdgeId edge_id) const override { return setup_hold_visitor_.hold_edge_slacks(edge_id); }
#endif
TimingTags::tag_range hold_node_slacks_impl(NodeId node_id) const override { return setup_hold_visitor_.hold_node_slacks(node_id); }

private:
/** Update using the full walker */
void update_timing_full_(GraphVisitor& visitor){
full_walker_.do_reset(timing_graph_, visitor);

full_walker_.do_arrival_pre_traversal(timing_graph_, timing_constraints_, visitor);
full_walker_.do_arrival_traversal(timing_graph_, timing_constraints_, delay_calculator_, visitor);

full_walker_.do_required_pre_traversal(timing_graph_, timing_constraints_, visitor);
full_walker_.do_required_traversal(timing_graph_, timing_constraints_, delay_calculator_, visitor);

full_walker_.do_update_slack(timing_graph_, delay_calculator_, visitor);
}

/** Update using the incremental walker */
void update_timing_incr_(GraphVisitor& visitor){
if (never_updated_incr_) {
//Invalidate all edges
for (EdgeId edge : timing_graph_.edges()) {
incr_walker_.invalidate_edge(edge);
}

//Only need to pre-traverse the first update
incr_walker_.do_arrival_pre_traversal(timing_graph_, timing_constraints_, visitor);
}

incr_walker_.do_arrival_traversal(timing_graph_, timing_constraints_, delay_calculator_, visitor);

if (never_updated_incr_) {
//Only need to pre-traverse the first update
incr_walker_.do_required_pre_traversal(timing_graph_, timing_constraints_, visitor);
}

incr_walker_.do_required_traversal(timing_graph_, timing_constraints_, delay_calculator_, visitor);

incr_walker_.do_update_slack(timing_graph_, delay_calculator_, visitor);
}

/* Clear incremental timing info */
void clear_timing_incr_(){
incr_walker_.clear_invalidated_edges();

n_modified_edges_ = 0;
never_updated_incr_ = false;
}

const TimingGraph& timing_graph_;
const TimingConstraints& timing_constraints_;
const DelayCalculator& delay_calculator_;
SetupHoldAnalysis setup_hold_visitor_;

FullWalker full_walker_;
IncrWalker incr_walker_;
enum class Mode { FULL, INCR };
Mode mode_;

bool never_updated_incr_ = true;
size_t max_modified_edges_;
std::atomic_size_t n_modified_edges_ = 0;

typedef std::chrono::duration<double> dsec;
typedef std::chrono::high_resolution_clock Clock;
};

}} //namepsace
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#pragma once
#include "tatum/graph_walkers/SerialWalker.hpp"
#include "tatum/graph_walkers/SerialIncrWalker.hpp"
#include "tatum/HoldAnalysis.hpp"
#include "tatum/analyzers/HoldTimingAnalyzer.hpp"
#include "tatum/base/validate_timing_graph_constraints.hpp"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#pragma once
#include "tatum/graph_walkers/SerialIncrWalker.hpp"
#include "tatum/graph_walkers/SerialWalker.hpp"
#include "tatum/SetupHoldAnalysis.hpp"
#include "tatum/analyzers/SetupHoldTimingAnalyzer.hpp"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#pragma once
#include "tatum/graph_walkers/SerialWalker.hpp"
#include "tatum/graph_walkers/SerialIncrWalker.hpp"
#include "tatum/SetupAnalysis.hpp"
#include "tatum/analyzers/SetupTimingAnalyzer.hpp"
#include "tatum/base/validate_timing_graph_constraints.hpp"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
namespace tatum {

/**
* A parallel timing analyzer which traveres the timing graph in a levelized
* A parallel timing analyzer which traverses the timing graph in a levelized
* manner. However nodes within each level are processed in parallel using
* Thread Building Blocks (TBB). If TBB is not available it operates serially and is
* equivalent to the SerialWalker.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,14 @@ namespace tatum {
*
* If TATUM_INCR_BLOCK_INVALIDATION is defined:
* All of a nodes tags associated with an invalidated edge are invalidated.
* This is a robust but pessimisitc approach (it invalidates more tags than
* This is a robust but pessimistic approach (it invalidates more tags than
* strictly required). As a result all nodes processed will report having been
* modified, meaning their decendents/predecessors will also be invalidated
* even if in reality the recalculated tags are identical to the previous ones
* (i.e. nothing has really changed).
*
* Ohterwise, the analyzer performs edge invalidation:
* Only node tags which are dominanted by an invalidated edge are invalidated.
* Only node tags which are dominated by an invalidated edge are invalidated.
* This is a less pessimistic approach, and means when processed nodes which
* don't have any changed tags will report as being unmodified. This significantly
* prunes the amount of the timing graph which needs to be updated (as unmodified
Expand All @@ -37,7 +37,7 @@ namespace tatum {
* manner. Unlike SerialWalker it attempts to incrementally (rather than
* fully) update based on invalidated edges.
*
* To performan an incremental traversal, the st of invalidated edges
* To perform an incremental traversal, the set of invalidated edges
* is processed to identify nodes which will need to be re-evaluated for
* the arrival and/or required traversals.
*
Expand Down
5 changes: 0 additions & 5 deletions libs/librrgraph/src/base/rr_graph_storage.h
Original file line number Diff line number Diff line change
Expand Up @@ -667,11 +667,6 @@ class t_rr_graph_storage {
static inline Direction get_node_direction(
vtr::array_view_id<RRNodeId, const t_rr_node_data> node_storage,
RRNodeId id) {
auto& node_data = node_storage[id];
if (node_data.type_ != CHANX && node_data.type_ != CHANY) {
VTR_LOG_ERROR("Attempted to access RR node 'direction' for non-channel type '%s'",
rr_node_typename[node_data.type_]);
}
return node_storage[id].dir_side_.direction;
}

Expand Down
12 changes: 12 additions & 0 deletions libs/libvtrutil/src/vtr_math.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include <map>
#include <vector>
#include <cmath>
#include <cstdint>

#include "vtr_assert.h"

Expand Down Expand Up @@ -163,6 +164,17 @@ bool isclose(T a, T b) {
return isclose<T>(a, b, DEFAULT_REL_TOL, DEFAULT_ABS_TOL);
}

/** Log2, round down.
* From https://stackoverflow.com/a/51351885 */
static inline uint64_t log2_floor(uint64_t x) {
return 63U - __builtin_clzl(x);
}

/** Log2, round up */
static inline uint64_t log2_ceil(uint64_t x) {
return log2_floor(x - 1) + 1;
}

} // namespace vtr

#endif
17 changes: 10 additions & 7 deletions utils/route_diag/src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,13 +117,16 @@ static void do_one_route(const Netlist<>& net_list,
-1,
false,
std::unordered_map<RRNodeId, int>());
std::tie(found_path, std::ignore, cheapest) = router.timing_driven_route_connection_from_route_tree(tree.root(),
sink_node,
cost_params,
bounding_box,
router_stats,
conn_params,
true);
std::tie(found_path, std::ignore, cheapest) = router.timing_driven_route_connection_from_route_tree(
tree.root(),
tree.root().inode,
sink_node,
cost_params,
bounding_box,
router_stats,
conn_params,
true
);

if (found_path) {
VTR_ASSERT(cheapest.index == sink_node);
Expand Down
3 changes: 3 additions & 0 deletions vpr/src/base/SetupVPR.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,9 @@ void SetupVPR(const t_options* Options,
/* Set seed for pseudo-random placement, default seed to 1 */
vtr::srandom(PlacerOpts->seed);

/* Make num_workers available to the router */
RouterOpts->num_workers = vpr_setup->num_workers;

{
vtr::ScopedStartFinishTimer t("Building complex block graph");
alloc_and_load_all_pb_graphs(PowerOpts->do_power, RouterOpts->flat_routing);
Expand Down
2 changes: 2 additions & 0 deletions vpr/src/base/read_netlist.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1057,11 +1057,13 @@ static void load_external_nets_and_cb(ClusteredNetlist& clb_nlist) {
int logical_pin = clb_nlist.pin_logical_index(pin_id);
int physical_pin = get_physical_pin(tile_type, block_type, logical_pin);

/* XXX: Silence warning
if (tile_type->is_ignored_pin[physical_pin] != is_ignored_net) {
VTR_LOG_WARN(
"Netlist connects net %s to both global and non-global pins.\n",
clb_nlist.net_name(net_id).c_str());
}
*/
}
}

Expand Down
Loading
Loading