diff --git a/vpr/src/base/SetupVPR.cpp b/vpr/src/base/SetupVPR.cpp index efef48ed4c1..8a8135d1400 100644 --- a/vpr/src/base/SetupVPR.cpp +++ b/vpr/src/base/SetupVPR.cpp @@ -597,6 +597,9 @@ void SetupPackerOpts(const t_options& Options, PackerOpts->timing_update_type = Options.timing_update_type; PackerOpts->pack_num_moves = Options.pack_num_moves; PackerOpts->pack_move_type = Options.pack_move_type; + PackerOpts->use_partitioning_in_pack = Options.use_partitioning_in_pack; + PackerOpts->partitioner_path = Options.partitioner_path; + PackerOpts->number_of_molecules_in_partition = Options.number_of_molecules_in_partition; } static void SetupNetlistOpts(const t_options& Options, t_netlist_opts& NetlistOpts) { diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp index ad935c44faa..a79e12f89f0 100644 --- a/vpr/src/base/read_options.cpp +++ b/vpr/src/base/read_options.cpp @@ -1824,6 +1824,22 @@ argparse::ArgumentParser create_arg_parser(std::string prog_name, t_options& arg "The available values are: randomSwap, semiDirectedSwap, semiDirectedSameTypeSwap") .default_value("semiDirectedSwap") .show_in(argparse::ShowIn::HELP_ONLY); + + pack_grp.add_argument(args.use_partitioning_in_pack, "--use_partitioning_in_pack") + .help("Whether to use hmetis partitioning in pack.") + .help("Whether to use partitioning in pack.") + .default_value("off") + .show_in(argparse::ShowIn::HELP_ONLY); + + pack_grp.add_argument(args.partitioner_path, "--partitioner_path") + .help("The path to the partitioner (Mt-KaHyPar) executable.") + .default_value("~/bin/MtKaHyPar") + .show_in(argparse::ShowIn::HELP_ONLY); + + pack_grp.add_argument(args.number_of_molecules_in_partition, "--number_of_molecules_in_partition") + .help("Average number of molecules in each cluster. It should be used when --use_partitioning_in_pack is on.") + .default_value("64") + .show_in(argparse::ShowIn::HELP_ONLY); auto& place_grp = parser.add_argument_group("placement options"); diff --git a/vpr/src/base/read_options.h b/vpr/src/base/read_options.h index 97645367680..e0f4c0f6896 100644 --- a/vpr/src/base/read_options.h +++ b/vpr/src/base/read_options.h @@ -101,6 +101,10 @@ struct t_options { argparse::ArgValue use_attraction_groups; argparse::ArgValue pack_num_moves; argparse::ArgValue pack_move_type; + argparse::ArgValue use_partitioning_in_pack; + argparse::ArgValue partitioner_path; + argparse::ArgValue number_of_molecules_in_partition; + /* Placement options */ argparse::ArgValue Seed; argparse::ArgValue ShowPlaceTiming; diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h index 7b98cc2c0e0..e9dcca19401 100644 --- a/vpr/src/base/vpr_types.h +++ b/vpr/src/base/vpr_types.h @@ -917,6 +917,9 @@ struct t_packer_opts { bool use_attraction_groups; int pack_num_moves; std::string pack_move_type; + bool use_partitioning_in_pack; + std::string partitioner_path; + int number_of_molecules_in_partition; }; /** diff --git a/vpr/src/pack/cluster.cpp b/vpr/src/pack/cluster.cpp index 4f1382a990d..86391fcaf87 100644 --- a/vpr/src/pack/cluster.cpp +++ b/vpr/src/pack/cluster.cpp @@ -41,6 +41,8 @@ #include #include +#include + #include "vtr_assert.h" #include "vtr_log.h" #include "vtr_math.h" @@ -74,6 +76,8 @@ #include "re_cluster_util.h" #include "constraints_report.h" +#include "config.h" + /* * When attraction groups are created, the purpose is to pack more densely by adding more molecules * from the cluster's attraction group to the cluster. In a normal flow, (when attraction groups are @@ -234,147 +238,716 @@ std::map do_clustering(const t_packer_opts& pa * Clustering *****************************************************************/ - while (istart != nullptr) { - is_cluster_legal = false; - savedseedindex = seedindex; - for (detailed_routing_stage = (int)E_DETAILED_ROUTE_AT_END_ONLY; !is_cluster_legal && detailed_routing_stage != (int)E_DETAILED_ROUTE_INVALID; detailed_routing_stage++) { - ClusterBlockId clb_index(helper_ctx.total_clb_num); - - VTR_LOGV(verbosity > 2, "Complex block %d:\n", helper_ctx.total_clb_num); - - /*Used to store cluster's PartitionRegion as primitives are added to it. - * Since some of the primitives might fail legality, this structure temporarily - * stores PartitionRegion information while the cluster is packed*/ - PartitionRegion temp_cluster_pr; - - start_new_cluster(helper_ctx.cluster_placement_stats, helper_ctx.primitives_list, - clb_index, istart, - num_used_type_instances, - packer_opts.target_device_utilization, - num_models, helper_ctx.max_cluster_size, - arch, packer_opts.device_layout, - lb_type_rr_graphs, &router_data, - detailed_routing_stage, &cluster_ctx.clb_nlist, - primitive_candidate_block_types, - verbosity, - packer_opts.enable_pin_feasibility_filter, - balance_block_type_utilization, - packer_opts.feasible_block_array_size, - temp_cluster_pr); - - //initial molecule in cluster has been processed - cluster_stats.num_molecules_processed++; - cluster_stats.mols_since_last_print++; - print_pack_status(helper_ctx.total_clb_num, - cluster_stats.num_molecules, - cluster_stats.num_molecules_processed, - cluster_stats.mols_since_last_print, - device_ctx.grid.width(), - device_ctx.grid.height(), - attraction_groups); - - VTR_LOGV(verbosity > 2, - "Complex block %d: '%s' (%s) ", helper_ctx.total_clb_num, - cluster_ctx.clb_nlist.block_name(clb_index).c_str(), - cluster_ctx.clb_nlist.block_type(clb_index)->name); - VTR_LOGV(verbosity > 2, "."); - //Progress dot for seed-block - fflush(stdout); - - t_ext_pin_util target_ext_pin_util = ext_pin_util_targets.get_pin_util(cluster_ctx.clb_nlist.block_type(clb_index)->name); - int high_fanout_threshold = high_fanout_thresholds.get_threshold(cluster_ctx.clb_nlist.block_type(clb_index)->name); - update_cluster_stats(istart, clb_index, - is_clock, //Set of clock nets - is_clock, //Set of global nets (currently all clocks) - packer_opts.global_clocks, - packer_opts.alpha, packer_opts.beta, - packer_opts.timing_driven, packer_opts.connection_driven, - high_fanout_threshold, - *timing_info, - attraction_groups, - net_output_feeds_driving_block_input); - helper_ctx.total_clb_num++; - - if (packer_opts.timing_driven) { - cluster_stats.blocks_since_last_analysis++; - /*it doesn't make sense to do a timing analysis here since there* - *is only one atom block clustered it would not change anything */ + // Used to check if the "for loop" will find a legal solution ("failed_for_loop" == true) + // (T.Besson) + // + bool failed_for_loop = false; + int max_nb_molecule; + int nb_packed_molecules = 0; + + if(packer_opts.use_partitioning_in_pack){ + + AtomNetlist myNetlist = g_vpr_ctx.atom().nlist; + + std::vector molecules; + int numberOfMolecules = 0; + for (auto cur_molecule = molecule_head; cur_molecule != nullptr; cur_molecule = cur_molecule->next) { + molecules.push_back(cur_molecule); + numberOfMolecules++; + } + + int numberOfNets = 0; + int numberOfAtoms = 0; + for (auto netId : myNetlist.nets()) { + numberOfNets++; + } + + for (auto blockId : myNetlist.blocks()) { + numberOfAtoms++; + } + + int* atomBlockIdToMoleculeId = new int[numberOfAtoms]; + + for (int i = 0; i < numberOfAtoms; i++) { + atomBlockIdToMoleculeId[i] = -1; + } + + { + int i = 0; + for (auto molecule : molecules) { + for (auto abid : molecule->atom_block_ids) { + int bid = size_t(abid); + if (bid < 0 || bid >= numberOfAtoms) continue; + atomBlockIdToMoleculeId[bid] = i; + } + i++; } - cur_cluster_placement_stats_ptr = &(helper_ctx.cluster_placement_stats[cluster_ctx.clb_nlist.block_type(clb_index)->index]); - cluster_stats.num_unrelated_clustering_attempts = 0; - next_molecule = get_molecule_for_cluster(cluster_ctx.clb_nlist.block_pb(clb_index), - attraction_groups, - allow_unrelated_clustering, - packer_opts.prioritize_transitive_connectivity, - packer_opts.transitive_fanout_threshold, - packer_opts.feasible_block_array_size, - &cluster_stats.num_unrelated_clustering_attempts, - cur_cluster_placement_stats_ptr, - clb_inter_blk_nets, - clb_index, - verbosity, - clustering_data.unclustered_list_head, - unclustered_list_head_size, - primitive_candidate_block_types); - prev_molecule = istart; - - /* - * When attraction groups are created, the purpose is to pack more densely by adding more molecules - * from the cluster's attraction group to the cluster. In a normal flow, (when attraction groups are - * not on), the cluster keeps being packed until the get_molecule routines return either a repeated - * molecule or a nullptr. When attraction groups are on, we want to keep exploring molecules for the - * cluster until a nullptr is returned. So, the number of repeated molecules allowed is increased to a - * large value. - */ - int max_num_repeated_molecules = 0; - if (attraction_groups.num_attraction_groups() > 0) { - max_num_repeated_molecules = ATTRACTION_GROUPS_MAX_REPEATED_MOLECULES; - } else { - max_num_repeated_molecules = 1; + } + + + + + std::ofstream hmetisFile; + hmetisFile.open("hmetis.txt", std::ofstream::out); + //hmetisFile << numberOfNets << " " << numberOfAtoms << std::endl; + + //std::vector> netLines; + std::vector uniqueLines; + std::vector lineCounts; + + //int nextIndexForMolecule = numberOfMolecules + 1; + + //bool addNewNodeAndUseWeightsInHmetis = false; + + for (auto netId : myNetlist.nets()) { + //VTR_LOG("NET ID: %d\n", size_t(netId)); + std::vector newLine; + AtomBlockId driverBlockId = myNetlist.net_driver_block(netId); + int moleculeId = atomBlockIdToMoleculeId[size_t(driverBlockId)]; + newLine.push_back(moleculeId + 1); + double maxCriticality = 0; + for (auto pin_id : myNetlist.net_pins(netId)) { + + double pinCriticality = timing_info->setup_pin_criticality(pin_id); + if(pinCriticality > maxCriticality){ + maxCriticality = pinCriticality; + } + + //VTR_LOG("pin criticality: %f\n", pinCriticality); + + + auto port_id = myNetlist.pin_port(pin_id); + auto blk_id = myNetlist.port_block(port_id); + if (blk_id == driverBlockId) continue; + //VTR_LOG("%d ", size_t(blk_id)); + //hmetisFile << size_t(blk_id)+1 << " "; + moleculeId = atomBlockIdToMoleculeId[size_t(blk_id)]; + if (std::find(newLine.begin(), newLine.end(), moleculeId + 1) == newLine.end()) { + newLine.push_back(moleculeId + 1); + } + } + int lineWeight = 1; + + if (newLine.size() > 1) { + + std::sort(newLine.begin(), newLine.end()); + std::string s = std::accumulate(newLine.begin() + 1, newLine.end(), std::to_string(newLine[0]), + [](const std::string& a, int b) { + return a + " " + std::to_string(b); + }); + auto pointer = std::find(uniqueLines.begin(), uniqueLines.end(), s); + if (pointer == uniqueLines.end()) { + //netLines.push_back(newLine); + lineCounts.push_back(lineWeight); + uniqueLines.push_back(s); + } else { + lineCounts[pointer - uniqueLines.begin()]+= lineWeight; + } + } - int num_repeated_molecules = 0; - - while (next_molecule != nullptr && num_repeated_molecules < max_num_repeated_molecules) { - prev_molecule = next_molecule; - - try_fill_cluster(packer_opts, - cur_cluster_placement_stats_ptr, - prev_molecule, - next_molecule, - num_repeated_molecules, - helper_ctx.primitives_list, - cluster_stats, - helper_ctx.total_clb_num, - num_models, - helper_ctx.max_cluster_size, - clb_index, - detailed_routing_stage, - attraction_groups, - clb_inter_blk_nets, - allow_unrelated_clustering, - high_fanout_threshold, - is_clock, - timing_info, - router_data, - target_ext_pin_util, - temp_cluster_pr, - block_pack_status, - clustering_data.unclustered_list_head, - unclustered_list_head_size, - net_output_feeds_driving_block_input, - primitive_candidate_block_types); + } + + hmetisFile << uniqueLines.size() << " " << numberOfMolecules << " " << 11 << std::endl; + { + int i = 0; + for (auto line : uniqueLines) { + hmetisFile << lineCounts[i] << " " << line << std::endl; + i++; } + for (auto molecule : molecules) { + hmetisFile << molecule->atom_block_ids.size() << std::endl; + } + } + + hmetisFile.close(); - is_cluster_legal = check_cluster_legality(verbosity, detailed_routing_stage, router_data); + int numberOfClusters = ceil(numberOfMolecules *1.0 / packer_opts.number_of_molecules_in_partition); + if (numberOfClusters <= 1) numberOfClusters = 2; - if (is_cluster_legal) { - istart = save_cluster_routing_and_pick_new_seed(packer_opts, helper_ctx.total_clb_num, seed_atoms, num_blocks_hill_added, clustering_data.intra_lb_routing, seedindex, cluster_stats, router_data); - store_cluster_info_and_free(packer_opts, clb_index, logic_block_type, le_pb_type, le_count, clb_inter_blk_nets); + char* commandToExecute = new char[1000]; + unsigned num_cpus = std::thread::hardware_concurrency(); + int num_threads = (int)(num_cpus / 2) > 1? (int)(num_cpus / 2) : 1; + // sprintf(commandToExecute, "%s hmetis.txt %d 3 10 4 1 3 0 0 ", packer_opts.hmetis_path.c_str(), numberOfClusters); + sprintf(commandToExecute, + "%s -h hmetis.txt --preset-type=quality -t %d -k %d -e 3 -o soed --enable-progress-bar=true --show-detailed-timings=true --verbose=true --write-partition-file=true", + packer_opts.partitioner_path.c_str(), num_threads, numberOfClusters); + VTR_LOG("MtKaHPar COMMAND: %s\n", commandToExecute); + + int code = system(commandToExecute); + VTR_ASSERT_MSG(code == 0, "Running MtKaHyPar failed with code 0"); + + delete commandToExecute; + + char* newFilename = new char[100]; + // sprintf(newFilename, "hmetis.txt.part.%d", numberOfClusters); + sprintf(newFilename, "hmetis.txt.part%d.epsilon3..seed0.KaHyPar", numberOfClusters); + + int* atomBlockIdToCluster = new int[numberOfAtoms]; + + std::ifstream hmetisOutFile; + hmetisOutFile.open(newFilename); + + int* clusterOfMolecule = new int[numberOfMolecules]; + + for (int i = 0; i < numberOfMolecules; i++) { + int clusterId; + hmetisOutFile >> clusterId; + + clusterOfMolecule[i] = clusterId; + + for (AtomBlockId abid : molecules[i]->atom_block_ids) { + if (size_t(abid) >= numberOfAtoms) continue; + atomBlockIdToCluster[size_t(abid)] = clusterId; + } + + } + hmetisOutFile.close(); + + std::vector* clusterMoleculeOrder = new std::vector[numberOfClusters]; + + for (int i = 0; i < numberOfClusters; i++) { + clusterMoleculeOrder[i] = std::vector(); + } + + for (auto atom : seed_atoms) { + int id = size_t(atom); + int clusterId = atomBlockIdToCluster[id]; + if (clusterId < 0 || clusterId >= numberOfClusters) { + VTR_LOG("BAD CLUSTER ID: %d, atomBlockId: %d\n", clusterId, id); + clusterId = 1; + } + VTR_ASSERT(clusterId >= 0 && clusterId < numberOfClusters); + int moleculeId = atomBlockIdToMoleculeId[id]; + VTR_ASSERT(moleculeId >= 0 && moleculeId < numberOfMolecules); + clusterMoleculeOrder[clusterId].push_back(moleculeId); + } + + + for (int partId = 0; partId < numberOfClusters; partId++) { + int currentIndexOfBestMolecule = 0; + + for (int moldId = 0; moldId < numberOfMolecules; moldId++) { + if (clusterOfMolecule[moldId] == partId) { + molecules[moldId]->valid = true; + //istart = molecules[moldId]; + } else { + molecules[moldId]->valid = false; + } + } + + if (currentIndexOfBestMolecule >= clusterMoleculeOrder[partId].size()) { + istart = nullptr; } else { - free_data_and_requeue_used_mols_if_illegal(clb_index, savedseedindex, num_used_type_instances, helper_ctx.total_clb_num, seedindex); + istart = molecules[clusterMoleculeOrder[partId][currentIndexOfBestMolecule]]; + currentIndexOfBestMolecule++; + while (!istart->valid && currentIndexOfBestMolecule < clusterMoleculeOrder[partId].size()) { + istart = molecules[clusterMoleculeOrder[partId][currentIndexOfBestMolecule]]; + currentIndexOfBestMolecule++; + } + if (!istart->valid) { + istart = nullptr; + } } - free_router_data(router_data); - router_data = nullptr; + + while (istart != nullptr) { + + is_cluster_legal = false; + savedseedindex = seedindex; + + // If in the previous "for (detailed_routing_stage ..." call we failed all the + // time (e.g 'failed_for_loop' is false) then when we re-enter it but we reduce the + // calls to "try_fill_cluster" by half in order to expect a feasible solution. + // + // This case can happen with unit test case "mult_seq" where we stuck on a mode4/mode5 + // conflict because of the calls to "try_fill_cluster" that always fail. In this case the + // only feasible solution is to stick to the original "start_new_cluster" solution and + // avoid any call to "try_fill_cluster". To do this, "nb_max_molecule" needs to reduce + // to 0 to avoid calling "try_fill_cluster". That's why we decrease "max_nb_molecule" in + // the iteration process to get down to 0. We decrease it by half each time but it could + // have been another scheme to decrease it. This one looks to give good QoR results. + // (T.Besson, Rapid Silicon) + // + if (failed_for_loop) { + max_nb_molecule = max_nb_molecule / 1.4; // decrease by 1.4 looks to be a good strategy. + // The packer is hyper sensitive to this number. + // A change by 0.1 currently may generate big + // difference on some designs like "axil_crossbar". + // More investigation to understand why ? (T.Besson) + } else { + max_nb_molecule = 512; // important starting number. The packer is again sensitive + // to that number. This number needs to be high to guarantee some + // stable behavior. (T.Besson). + } + + // Expect that we will not find a legal solution in the below "for loop" + // (T.Besson) + // + failed_for_loop = true; + + for (detailed_routing_stage = (int)E_DETAILED_ROUTE_AT_END_ONLY; + !is_cluster_legal && detailed_routing_stage != (int)E_DETAILED_ROUTE_INVALID; + detailed_routing_stage++) { + + ClusterBlockId clb_index(helper_ctx.total_clb_num); + + VTR_LOGV(verbosity > 2, "Complex block %d:\n", helper_ctx.total_clb_num); + + /*Used to store cluster's PartitionRegion as primitives are added to it. + * Since some of the primitives might fail legality, this structure temporarily + * stores PartitionRegion information while the cluster is packed*/ + PartitionRegion temp_cluster_pr; + + start_new_cluster(helper_ctx.cluster_placement_stats, helper_ctx.primitives_list, + clb_index, istart, + num_used_type_instances, + packer_opts.target_device_utilization, + num_models, helper_ctx.max_cluster_size, + arch, packer_opts.device_layout, + lb_type_rr_graphs, &router_data, + detailed_routing_stage, &cluster_ctx.clb_nlist, + primitive_candidate_block_types, + verbosity, + packer_opts.enable_pin_feasibility_filter, + balance_block_type_utilization, + packer_opts.feasible_block_array_size, + temp_cluster_pr); + + //initial molecule in cluster has been processed + cluster_stats.num_molecules_processed++; + cluster_stats.mols_since_last_print++; + print_pack_status(helper_ctx.total_clb_num, + cluster_stats.num_molecules, + cluster_stats.num_molecules_processed, + cluster_stats.mols_since_last_print, + device_ctx.grid.width(), + device_ctx.grid.height(), + attraction_groups); + + VTR_LOGV(verbosity > 2, + "Complex block %d: '%s' (%s) ", helper_ctx.total_clb_num, + cluster_ctx.clb_nlist.block_name(clb_index).c_str(), + cluster_ctx.clb_nlist.block_type(clb_index)->name); + VTR_LOGV(verbosity > 2, "."); + //Progress dot for seed-block + fflush(stdout); + + t_ext_pin_util target_ext_pin_util = ext_pin_util_targets.get_pin_util(cluster_ctx.clb_nlist.block_type(clb_index)->name); + int high_fanout_threshold = high_fanout_thresholds.get_threshold(cluster_ctx.clb_nlist.block_type(clb_index)->name); + update_cluster_stats(istart, clb_index, + is_clock, //Set of clock nets + is_clock, //Set of global nets (currently all clocks) + packer_opts.global_clocks, + packer_opts.alpha, packer_opts.beta, + packer_opts.timing_driven, packer_opts.connection_driven, + high_fanout_threshold, + *timing_info, + attraction_groups, + net_output_feeds_driving_block_input); + helper_ctx.total_clb_num++; + + if (packer_opts.timing_driven) { + cluster_stats.blocks_since_last_analysis++; + /*it doesn't make sense to do a timing analysis here since there* + *is only one atom block clustered it would not change anything */ + } + cur_cluster_placement_stats_ptr = &(helper_ctx.cluster_placement_stats[cluster_ctx.clb_nlist.block_type(clb_index)->index]); + cluster_stats.num_unrelated_clustering_attempts = 0; + next_molecule = get_molecule_for_cluster(cluster_ctx.clb_nlist.block_pb(clb_index), + attraction_groups, + allow_unrelated_clustering, + packer_opts.prioritize_transitive_connectivity, + packer_opts.transitive_fanout_threshold, + packer_opts.feasible_block_array_size, + &cluster_stats.num_unrelated_clustering_attempts, + cur_cluster_placement_stats_ptr, + clb_inter_blk_nets, + clb_index, + verbosity, + clustering_data.unclustered_list_head, + unclustered_list_head_size, + primitive_candidate_block_types); + prev_molecule = istart; + + /* + * When attraction groups are created, the purpose is to pack more densely by adding more molecules + * from the cluster's attraction group to the cluster. In a normal flow, (when attraction groups are + * not on), the cluster keeps being packed until the get_molecule routines return either a repeated + * molecule or a nullptr. When attraction groups are on, we want to keep exploring molecules for the + * cluster until a nullptr is returned. So, the number of repeated molecules allowed is increased to a + * large value. + */ + int max_num_repeated_molecules = 0; + if (attraction_groups.num_attraction_groups() > 0) { + max_num_repeated_molecules = ATTRACTION_GROUPS_MAX_REPEATED_MOLECULES; + } else { + max_num_repeated_molecules = 1; + } + int num_repeated_molecules = 0; + + int i = 0; + + while (next_molecule != nullptr && num_repeated_molecules < max_num_repeated_molecules) { + prev_molecule = next_molecule; + + if (i == max_nb_molecule) { + break; + } + + try_fill_cluster(packer_opts, + cur_cluster_placement_stats_ptr, + prev_molecule, + next_molecule, + num_repeated_molecules, + helper_ctx.primitives_list, + cluster_stats, + helper_ctx.total_clb_num, + num_models, + helper_ctx.max_cluster_size, + clb_index, + detailed_routing_stage, + attraction_groups, + clb_inter_blk_nets, + allow_unrelated_clustering, + high_fanout_threshold, + is_clock, + timing_info, + router_data, + target_ext_pin_util, + temp_cluster_pr, + block_pack_status, + clustering_data.unclustered_list_head, + unclustered_list_head_size, + net_output_feeds_driving_block_input, + primitive_candidate_block_types); + + i++; + + } + + max_nb_molecule = i; + + is_cluster_legal = check_cluster_legality(verbosity, detailed_routing_stage, router_data); + + if (is_cluster_legal) { + + // Calls the extra check for "clb" only (may need to revisit this check) (T.Besson) + // + if (!strcmp(cluster_ctx.clb_nlist.block_type(clb_index)->name, "clb")) { + + // Temporary fix : make sure that the solution has no mode confict. This check is + // performed by initiating a first xml kind of output work. There may be a tricky + // conflict with some lb routing so we need to store temporary the lb nets in the + // cluster data structure to make the check inside "check_if_xml_mode_conflict". + // (T.Besson, Rapid Silicon) + // + (clustering_data.intra_lb_routing).push_back(router_data->saved_lb_nets); + + // Call the check as if we would output the final packing ... and see if there is any + // mode conflict. (T.Besson) + // 'is_cluster_legal' turns to false if there is a mode conflict. + // + is_cluster_legal = check_if_xml_mode_conflict(packer_opts, arch, + clustering_data.intra_lb_routing); + + // Remove the previous pushed "intra_lb_routing_solution" to clean up + // the place. (T.Besson) + // + (clustering_data.intra_lb_routing).pop_back(); + + if (!is_cluster_legal && + (detailed_routing_stage == (int)E_DETAILED_ROUTE_FOR_EACH_ATOM)) { + + VTR_LOGV(verbosity > 0, "Info: rejected cluster packing solution with modes conflict [%d]\n", + max_nb_molecule); + } + } + + if (is_cluster_legal) { + + istart = save_cluster_routing_and_pick_new_seed(packer_opts, helper_ctx.total_clb_num, + seed_atoms, num_blocks_hill_added, clustering_data.intra_lb_routing, + seedindex, cluster_stats, router_data); + + store_cluster_info_and_free(packer_opts, clb_index, logic_block_type, le_pb_type, + le_count, clb_inter_blk_nets); + + nb_packed_molecules += max_nb_molecule; + + VTR_LOGV(verbosity > 0, "Successfully packed Logic Block [%d]\n", max_nb_molecule); + + failed_for_loop = false; // tell the outer loop that we succeeded within this loop + + } else { + free_data_and_requeue_used_mols_if_illegal(clb_index, savedseedindex, + num_used_type_instances, helper_ctx.total_clb_num, seedindex); + } + + + } else { + free_data_and_requeue_used_mols_if_illegal(clb_index, savedseedindex, + num_used_type_instances, helper_ctx.total_clb_num, seedindex); + } + + for (int index = 0; index < clusterMoleculeOrder[partId].size(); index++) { + istart = molecules[clusterMoleculeOrder[partId][index]]; + if (istart->valid) { + break; + } + } + if (!istart->valid) { + istart = nullptr; + } + + free_router_data(router_data); + router_data = nullptr; + } + + } + } // for(int partId=0;partId < numberOfClusters;partId++){ + }else{ + while (istart != nullptr) { + + is_cluster_legal = false; + savedseedindex = seedindex; + + // If in the previous "for (detailed_routing_stage ..." call we failed all the + // time (e.g 'failed_for_loop' is false) then when we re-enter it but we reduce the + // calls to "try_fill_cluster" by half in order to expect a feasible solution. + // + // This case can happen with unit test case "mult_seq" where we stuck on a mode4/mode5 + // conflict because of the calls to "try_fill_cluster" that always fail. In this case the + // only feasible solution is to stick to the original "start_new_cluster" solution and + // avoid any call to "try_fill_cluster". To do this, "nb_max_molecule" needs to reduce + // to 0 to avoid calling "try_fill_cluster". That's why we decrease "max_nb_molecule" in + // the iteration process to get down to 0. We decrease it by half each time but it could + // have been another scheme to decrease it. This one looks to give good QoR results. + // (T.Besson, Rapid Silicon) + // + if (failed_for_loop) { + max_nb_molecule = max_nb_molecule / 1.4; // decrease by 1.4 looks to be a good strategy. + // The packer is hyper sensitive to this number. + // A change by 0.1 currently may generate big + // difference on some designs like "axil_crossbar". + // More investigation to understand why ? (T.Besson) + } else { + max_nb_molecule = 512; // important starting number. The packer is again sensitive + // to that number. This number needs to be high to guarantee some + // stable behavior. (T.Besson). + } + + // Expect that we will not find a legal solution in the below "for loop" + // (T.Besson) + // + failed_for_loop = true; + + for (detailed_routing_stage = (int)E_DETAILED_ROUTE_AT_END_ONLY; + !is_cluster_legal && detailed_routing_stage != (int)E_DETAILED_ROUTE_INVALID; + detailed_routing_stage++) { + + ClusterBlockId clb_index(helper_ctx.total_clb_num); + + VTR_LOGV(verbosity > 2, "Complex block %d:\n", helper_ctx.total_clb_num); + + /*Used to store cluster's PartitionRegion as primitives are added to it. + * Since some of the primitives might fail legality, this structure temporarily + * stores PartitionRegion information while the cluster is packed*/ + PartitionRegion temp_cluster_pr; + + start_new_cluster(helper_ctx.cluster_placement_stats, helper_ctx.primitives_list, + clb_index, istart, + num_used_type_instances, + packer_opts.target_device_utilization, + num_models, helper_ctx.max_cluster_size, + arch, packer_opts.device_layout, + lb_type_rr_graphs, &router_data, + detailed_routing_stage, &cluster_ctx.clb_nlist, + primitive_candidate_block_types, + verbosity, + packer_opts.enable_pin_feasibility_filter, + balance_block_type_utilization, + packer_opts.feasible_block_array_size, + temp_cluster_pr); + + //initial molecule in cluster has been processed + cluster_stats.num_molecules_processed++; + cluster_stats.mols_since_last_print++; + print_pack_status(helper_ctx.total_clb_num, + cluster_stats.num_molecules, + cluster_stats.num_molecules_processed, + cluster_stats.mols_since_last_print, + device_ctx.grid.width(), + device_ctx.grid.height(), + attraction_groups); + + VTR_LOGV(verbosity > 2, + "Complex block %d: '%s' (%s) ", helper_ctx.total_clb_num, + cluster_ctx.clb_nlist.block_name(clb_index).c_str(), + cluster_ctx.clb_nlist.block_type(clb_index)->name); + VTR_LOGV(verbosity > 2, "."); + //Progress dot for seed-block + fflush(stdout); + + t_ext_pin_util target_ext_pin_util = ext_pin_util_targets.get_pin_util(cluster_ctx.clb_nlist.block_type(clb_index)->name); + int high_fanout_threshold = high_fanout_thresholds.get_threshold(cluster_ctx.clb_nlist.block_type(clb_index)->name); + update_cluster_stats(istart, clb_index, + is_clock, //Set of clock nets + is_clock, //Set of global nets (currently all clocks) + packer_opts.global_clocks, + packer_opts.alpha, packer_opts.beta, + packer_opts.timing_driven, packer_opts.connection_driven, + high_fanout_threshold, + *timing_info, + attraction_groups, + net_output_feeds_driving_block_input); + helper_ctx.total_clb_num++; + + if (packer_opts.timing_driven) { + cluster_stats.blocks_since_last_analysis++; + /*it doesn't make sense to do a timing analysis here since there* + *is only one atom block clustered it would not change anything */ + } + cur_cluster_placement_stats_ptr = &(helper_ctx.cluster_placement_stats[cluster_ctx.clb_nlist.block_type(clb_index)->index]); + cluster_stats.num_unrelated_clustering_attempts = 0; + next_molecule = get_molecule_for_cluster(cluster_ctx.clb_nlist.block_pb(clb_index), + attraction_groups, + allow_unrelated_clustering, + packer_opts.prioritize_transitive_connectivity, + packer_opts.transitive_fanout_threshold, + packer_opts.feasible_block_array_size, + &cluster_stats.num_unrelated_clustering_attempts, + cur_cluster_placement_stats_ptr, + clb_inter_blk_nets, + clb_index, + verbosity, + clustering_data.unclustered_list_head, + unclustered_list_head_size, + primitive_candidate_block_types); + prev_molecule = istart; + + /* + * When attraction groups are created, the purpose is to pack more densely by adding more molecules + * from the cluster's attraction group to the cluster. In a normal flow, (when attraction groups are + * not on), the cluster keeps being packed until the get_molecule routines return either a repeated + * molecule or a nullptr. When attraction groups are on, we want to keep exploring molecules for the + * cluster until a nullptr is returned. So, the number of repeated molecules allowed is increased to a + * large value. + */ + int max_num_repeated_molecules = 0; + if (attraction_groups.num_attraction_groups() > 0) { + max_num_repeated_molecules = ATTRACTION_GROUPS_MAX_REPEATED_MOLECULES; + } else { + max_num_repeated_molecules = 1; + } + int num_repeated_molecules = 0; + + int i = 0; + + while (next_molecule != nullptr && num_repeated_molecules < max_num_repeated_molecules) { + prev_molecule = next_molecule; + + if (i == max_nb_molecule) { + break; + } + + try_fill_cluster(packer_opts, + cur_cluster_placement_stats_ptr, + prev_molecule, + next_molecule, + num_repeated_molecules, + helper_ctx.primitives_list, + cluster_stats, + helper_ctx.total_clb_num, + num_models, + helper_ctx.max_cluster_size, + clb_index, + detailed_routing_stage, + attraction_groups, + clb_inter_blk_nets, + allow_unrelated_clustering, + high_fanout_threshold, + is_clock, + timing_info, + router_data, + target_ext_pin_util, + temp_cluster_pr, + block_pack_status, + clustering_data.unclustered_list_head, + unclustered_list_head_size, + net_output_feeds_driving_block_input, + primitive_candidate_block_types); + + i++; + + } + + max_nb_molecule = i; + + is_cluster_legal = check_cluster_legality(verbosity, detailed_routing_stage, router_data); + + if (is_cluster_legal) { + + // Calls the extra check for "clb" only (may need to revisit this check) (T.Besson) + // + if (!strcmp(cluster_ctx.clb_nlist.block_type(clb_index)->name, "clb")) { + + // Temporary fix : make sure that the solution has no mode confict. This check is + // performed by initiating a first xml kind of output work. There may be a tricky + // conflict with some lb routing so we need to store temporary the lb nets in the + // cluster data structure to make the check inside "check_if_xml_mode_conflict". + // (T.Besson, Rapid Silicon) + // + (clustering_data.intra_lb_routing).push_back(router_data->saved_lb_nets); + + // Call the check as if we would output the final packing ... and see if there is any + // mode conflict. (T.Besson) + // 'is_cluster_legal' turns to false if there is a mode conflict. + // + is_cluster_legal = check_if_xml_mode_conflict(packer_opts, arch, + clustering_data.intra_lb_routing); + + // Remove the previous pushed "intra_lb_routing_solution" to clean up + // the place. (T.Besson) + // + (clustering_data.intra_lb_routing).pop_back(); + + if (!is_cluster_legal && + (detailed_routing_stage == (int)E_DETAILED_ROUTE_FOR_EACH_ATOM)) { + + VTR_LOGV(verbosity > 0, "Info: rejected cluster packing solution with modes conflict [%d]\n", + max_nb_molecule); + } + } + + if (is_cluster_legal) { + + istart = save_cluster_routing_and_pick_new_seed(packer_opts, helper_ctx.total_clb_num, + seed_atoms, num_blocks_hill_added, clustering_data.intra_lb_routing, + seedindex, cluster_stats, router_data); + + store_cluster_info_and_free(packer_opts, clb_index, logic_block_type, le_pb_type, + le_count, clb_inter_blk_nets); + + nb_packed_molecules += max_nb_molecule; + + VTR_LOGV(verbosity > 0, "Successfully packed Logic Block [%d]\n", max_nb_molecule); + + failed_for_loop = false; // tell the outer loop that we succeeded within this loop + + } else { + free_data_and_requeue_used_mols_if_illegal(clb_index, savedseedindex, + num_used_type_instances, helper_ctx.total_clb_num, seedindex); + } + + + } else { + free_data_and_requeue_used_mols_if_illegal(clb_index, savedseedindex, + num_used_type_instances, helper_ctx.total_clb_num, seedindex); + } + + free_router_data(router_data); + router_data = nullptr; + } + } } diff --git a/vpr/src/pack/cluster_util.cpp b/vpr/src/pack/cluster_util.cpp index 0e12305dc70..73bb6e55360 100644 --- a/vpr/src/pack/cluster_util.cpp +++ b/vpr/src/pack/cluster_util.cpp @@ -7,6 +7,26 @@ #include "vtr_math.h" #include "SetupGrid.h" +// #include "vtr_assert.h" +// #include "vtr_log.h" +// #include "vtr_digest.h" +// #include "vtr_memory.h" + +// #include "vpr_types.h" +// #include "vpr_error.h" + +// #include "pugixml.hpp" + +#include "globals.h" +#include "atom_netlist.h" +#include "pack_types.h" +#include "pb_type_graph.h" +#include "output_clustering.h" +#include "read_xml_arch_file.h" +#include "vpr_utils.h" +#include "pack.h" + + /**********************************/ /* Global variables in clustering */ /**********************************/ @@ -268,6 +288,16 @@ void check_and_output_clustering(const t_packer_opts& packer_opts, VTR_ASSERT(cluster_ctx.clb_nlist.blocks().size() == intra_lb_routing.size()); } +bool check_if_xml_mode_conflict(const t_packer_opts& packer_opts, + const t_arch* arch, + const vtr::vector*>& intra_lb_routing) { + + bool legal = check_output_clustering(intra_lb_routing, arch->architecture_id, packer_opts.output_file.c_str()); + + return legal; +} + + void get_max_cluster_size_and_pb_depth(int& max_cluster_size, int& max_pb_depth) { auto& device_ctx = g_vpr_ctx.mutable_device(); @@ -1639,7 +1669,9 @@ t_pack_molecule* save_cluster_routing_and_pick_new_seed(const t_packer_opts& pac router_data->saved_lb_nets = nullptr; //Pick a new seed - next_seed = get_highest_gain_seed_molecule(&seedindex, seed_atoms); + if(!packer_opts.use_partitioning_in_pack){ + next_seed = get_highest_gain_seed_molecule(&seedindex, seed_atoms); + } if (packer_opts.timing_driven) { if (num_blocks_hill_added > 0) { @@ -3688,4 +3720,4 @@ void init_clb_atoms_lookup(vtr::vector*>& intra_lb_routing); +bool check_if_xml_mode_conflict(const t_packer_opts& packer_opts, + const t_arch* arch, + const vtr::vector*>& intra_lb_routing); + + void get_max_cluster_size_and_pb_depth(int& max_cluster_size, int& max_pb_depth); @@ -452,4 +458,4 @@ bool cleanup_pb(t_pb* pb); void alloc_and_load_pb_stats(t_pb* pb, const int feasible_block_array_size); void init_clb_atoms_lookup(vtr::vector>& atoms_lookup); -#endif \ No newline at end of file +#endif diff --git a/vpr/src/pack/output_clustering.cpp b/vpr/src/pack/output_clustering.cpp index 084898322b9..dbf9732af3f 100644 --- a/vpr/src/pack/output_clustering.cpp +++ b/vpr/src/pack/output_clustering.cpp @@ -38,7 +38,7 @@ static void print_clustering_stats(char* block_name, int num_block_type, float n /* Prints out one cluster (clb). Both the external pins and the * * internal connections are printed out. */ -static void print_stats() { +void print_stats() { int ipin; unsigned int itype; int total_nets_absorbed; @@ -136,7 +136,7 @@ static void print_stats() { } static void print_clustering_stats_header() { - VTR_LOG("Final Clustering Statistics: \n"); + VTR_LOG("Clustering Statistics: \n"); VTR_LOG("---------- -------- ------------------------------------ --------------------------\n"); VTR_LOG("Block Type # Blocks Avg. # of input clocks and pins used Avg. # of output pins used\n"); VTR_LOG("---------- -------- ------------------------------------ --------------------------\n"); @@ -238,10 +238,13 @@ static void clustering_xml_open_block(pugi::xml_node parent_node, t_logical_bloc /* Determine mode if applicable */ port_index = 0; for (i = 0; i < pb_type->num_ports; i++) { + if (pb_type->ports[i].type == OUT_PORT) { VTR_ASSERT(!pb_type->ports[i].is_clock); + for (j = 0; j < pb_type->ports[i].num_pins; j++) { const t_pb_graph_pin* pin = &pb_graph_node->output_pins[port_index][j]; + node_index = pin->pin_count_in_cluster; if (pb_type->num_modes > 0 && pb_route.count(node_index) && pb_route[node_index].atom_net_id) { prev_node = pb_route[node_index].driver_pb_pin_id; @@ -250,6 +253,7 @@ static void clustering_xml_open_block(pugi::xml_node parent_node, t_logical_bloc VTR_ASSERT(edge != nullptr); mode_of_edge = edge->interconnect->parent_mode_index; + if (mode != nullptr && &pb_type->modes[mode_of_edge] != mode) { VPR_FATAL_ERROR(VPR_ERROR_PACK, "Differing modes for block. Got %s previously and %s for edge %d (interconnect %s).", @@ -259,6 +263,8 @@ static void clustering_xml_open_block(pugi::xml_node parent_node, t_logical_bloc } VTR_ASSERT(mode == nullptr || &pb_type->modes[mode_of_edge] == mode); mode = &pb_type->modes[mode_of_edge]; + + } else { } } port_index++; @@ -361,6 +367,171 @@ static void clustering_xml_open_block(pugi::xml_node parent_node, t_logical_bloc } } + +static bool check_clustering_xml_open_block(pugi::xml_node parent_node, t_logical_block_type_ptr type, + const IntraLbPbPinLookup& pb_graph_pin_lookup_from_index_by_type, + t_pb_graph_node* pb_graph_node, int pb_index, + bool is_used, const t_pb_routes& pb_route) { + int i, j, k, m; + const t_pb_type *pb_type, *child_pb_type; + t_mode* mode = nullptr; + int prev_node; + int mode_of_edge, port_index, node_index; + + mode_of_edge = UNDEFINED; + + pb_type = pb_graph_node->pb_type; + + pugi::xml_node block_node = parent_node.append_child("block"); + block_node.append_attribute("name") = "open"; + block_node.append_attribute("instance") = vtr::string_fmt("%s[%d]", pb_graph_node->pb_type->name, pb_index).c_str(); + std::vector block_modes; + + if (is_used) { + /* Determine mode if applicable */ + port_index = 0; + for (i = 0; i < pb_type->num_ports; i++) { + + if (pb_type->ports[i].type == OUT_PORT) { + VTR_ASSERT(!pb_type->ports[i].is_clock); + + for (j = 0; j < pb_type->ports[i].num_pins; j++) { + const t_pb_graph_pin* pin = &pb_graph_node->output_pins[port_index][j]; + + node_index = pin->pin_count_in_cluster; + if (pb_type->num_modes > 0 && pb_route.count(node_index) && pb_route[node_index].atom_net_id) { + prev_node = pb_route[node_index].driver_pb_pin_id; + const t_pb_graph_pin* prev_pin = pb_graph_pin_lookup_from_index_by_type.pb_gpin(type->index, prev_node); + const t_pb_graph_edge* edge = get_edge_between_pins(prev_pin, pin); + + VTR_ASSERT(edge != nullptr); + mode_of_edge = edge->interconnect->parent_mode_index; + + if (mode != nullptr && &pb_type->modes[mode_of_edge] != mode) { + + // we return false because we do see a mode conflict in the final solution. + // Ideally we would need to fix the root cause of this mode conflict. + // Will do it when more time (Rapid Silicon, T.Besson). + // + + // Handle the error message at the caller level +#if 0 + VTR_LOG("Info: modes conflict : pb mode = %s, edge mode = %s!\n", mode->name, + pb_type->modes[mode_of_edge].name); +#endif + return false; + } + VTR_ASSERT(mode == nullptr || &pb_type->modes[mode_of_edge] == mode); + mode = &pb_type->modes[mode_of_edge]; + + } else { + } + } + port_index++; + } + } + + VTR_ASSERT(mode != nullptr && mode_of_edge != UNDEFINED); + + block_node.append_attribute("mode") = mode->name; + block_node.append_attribute("pb_type_num_modes") = pb_type->num_modes; + + pugi::xml_node inputs_node = block_node.append_child("inputs"); + + port_index = 0; + for (i = 0; i < pb_type->num_ports; i++) { + if (!pb_type->ports[i].is_clock && pb_type->ports[i].type == IN_PORT) { + pugi::xml_node port_node = inputs_node.append_child("port"); + port_node.append_attribute("name") = pb_graph_node->pb_type->ports[i].name; + + std::vector pins; + for (j = 0; j < pb_type->ports[i].num_pins; j++) { + node_index = pb_graph_node->input_pins[port_index][j].pin_count_in_cluster; + + if (pb_type->parent_mode == nullptr) { + pins.push_back(clustering_xml_net_text(pb_route[node_index].atom_net_id)); + } else { + pins.push_back(clustering_xml_interconnect_text(type, pb_graph_pin_lookup_from_index_by_type, node_index, pb_route)); + } + } + port_node.text().set(vtr::join(pins.begin(), pins.end(), " ").c_str()); + port_index++; + } + } + + pugi::xml_node outputs_node = block_node.append_child("outputs"); + + port_index = 0; + for (i = 0; i < pb_type->num_ports; i++) { + if (pb_type->ports[i].type == OUT_PORT) { + VTR_ASSERT(!pb_type->ports[i].is_clock); + + pugi::xml_node port_node = outputs_node.append_child("port"); + port_node.append_attribute("name") = pb_graph_node->pb_type->ports[i].name; + std::vector pins; + for (j = 0; j < pb_type->ports[i].num_pins; j++) { + node_index = pb_graph_node->output_pins[port_index][j].pin_count_in_cluster; + pins.push_back(clustering_xml_interconnect_text(type, pb_graph_pin_lookup_from_index_by_type, node_index, pb_route)); + } + port_node.text().set(vtr::join(pins.begin(), pins.end(), " ").c_str()); + port_index++; + } + } + + pugi::xml_node clock_node = block_node.append_child("clocks"); + + port_index = 0; + for (i = 0; i < pb_type->num_ports; i++) { + if (pb_type->ports[i].is_clock && pb_type->ports[i].type == IN_PORT) { + pugi::xml_node port_node = clock_node.append_child("port"); + port_node.append_attribute("name") = pb_graph_node->pb_type->ports[i].name; + + std::vector pins; + for (j = 0; j < pb_type->ports[i].num_pins; j++) { + node_index = pb_graph_node->clock_pins[port_index][j].pin_count_in_cluster; + if (pb_type->parent_mode == nullptr) { + pins.push_back(clustering_xml_net_text(pb_route[node_index].atom_net_id)); + } else { + pins.push_back(clustering_xml_interconnect_text(type, pb_graph_pin_lookup_from_index_by_type, node_index, pb_route)); + } + } + port_node.text().set(vtr::join(pins.begin(), pins.end(), " ").c_str()); + port_index++; + } + } + + if (pb_type->num_modes > 0) { + for (i = 0; i < mode->num_pb_type_children; i++) { + child_pb_type = &mode->pb_type_children[i]; + for (j = 0; j < mode->pb_type_children[i].num_pb; j++) { + port_index = 0; + is_used = false; + for (k = 0; k < child_pb_type->num_ports && !is_used; k++) { + if (child_pb_type->ports[k].type == OUT_PORT) { + for (m = 0; m < child_pb_type->ports[k].num_pins; m++) { + node_index = pb_graph_node->child_pb_graph_nodes[mode_of_edge][i][j].output_pins[port_index][m].pin_count_in_cluster; + if (pb_route.count(node_index) && pb_route[node_index].atom_net_id) { + is_used = true; + break; + } + } + port_index++; + } + } + bool legal = check_clustering_xml_open_block(block_node, type, pb_graph_pin_lookup_from_index_by_type, + &pb_graph_node->child_pb_graph_nodes[mode_of_edge][i][j], + j, is_used, pb_route); + if (!legal) { + return false; + } + } + } + } + } + + return true; // everything is fine +} + /* outputs a block that is used (i.e. has configuration) and all of its child blocks */ static void clustering_xml_block(pugi::xml_node parent_node, t_logical_block_type_ptr type, const IntraLbPbPinLookup& pb_graph_pin_lookup_from_index_by_type, t_pb* pb, int pb_index, const t_pb_routes& pb_route) { int i, j, k, m; @@ -559,6 +730,256 @@ static void clustering_xml_block(pugi::xml_node parent_node, t_logical_block_typ } } +/* outputs a block that is used (i.e. has configuration) and all of its child blocks */ +static bool check_clustering_xml_block(pugi::xml_node parent_node, t_logical_block_type_ptr type, const IntraLbPbPinLookup& pb_graph_pin_lookup_from_index_by_type, t_pb* pb, int pb_index, const t_pb_routes& pb_route) { + int i, j, k, m; + const t_pb_type *pb_type, *child_pb_type; + t_pb_graph_node* pb_graph_node; + t_mode* mode; + int port_index, node_index; + bool is_used; + + pb_type = pb->pb_graph_node->pb_type; + pb_graph_node = pb->pb_graph_node; + mode = &pb_type->modes[pb->mode]; + + pugi::xml_node block_node = parent_node.append_child("block"); + block_node.append_attribute("name") = pb->name; + block_node.append_attribute("instance") = vtr::string_fmt("%s[%d]", pb_type->name, pb_index).c_str(); + + if (pb_type->num_modes > 0) { + block_node.append_attribute("mode") = mode->name; + } else { + const auto& atom_ctx = g_vpr_ctx.atom(); + AtomBlockId atom_blk = atom_ctx.nlist.find_block(pb->name); + VTR_ASSERT(atom_blk); + + pugi::xml_node attrs_node = block_node.append_child("attributes"); + for (const auto& attr : atom_ctx.nlist.block_attrs(atom_blk)) { + pugi::xml_node attr_node = attrs_node.append_child("attribute"); + attr_node.append_attribute("name") = attr.first.c_str(); + attr_node.text().set(attr.second.c_str()); + } + + pugi::xml_node params_node = block_node.append_child("parameters"); + for (const auto& param : atom_ctx.nlist.block_params(atom_blk)) { + pugi::xml_node param_node = params_node.append_child("parameter"); + param_node.append_attribute("name") = param.first.c_str(); + param_node.text().set(param.second.c_str()); + } + } + + pugi::xml_node inputs_node = block_node.append_child("inputs"); + + port_index = 0; + for (i = 0; i < pb_type->num_ports; i++) { + if (!pb_type->ports[i].is_clock && pb_type->ports[i].type == IN_PORT) { + pugi::xml_node port_node = inputs_node.append_child("port"); + port_node.append_attribute("name") = pb_graph_node->pb_type->ports[i].name; + + std::vector pins; + for (j = 0; j < pb_type->ports[i].num_pins; j++) { + node_index = pb->pb_graph_node->input_pins[port_index][j].pin_count_in_cluster; + + if (pb_type->parent_mode == nullptr) { + if (pb_route.count(node_index)) { + pins.push_back(clustering_xml_net_text(pb_route[node_index].atom_net_id)); + } else { + pins.push_back(clustering_xml_net_text(AtomNetId::INVALID())); + } + } else { + pins.push_back(clustering_xml_interconnect_text(type, pb_graph_pin_lookup_from_index_by_type, node_index, pb_route)); + } + } + port_node.text().set(vtr::join(pins.begin(), pins.end(), " ").c_str()); + + //The cluster router may have rotated equivalent pins (e.g. LUT inputs), + //record the resulting rotation here so it can be unambigously mapped + //back to the atom netlist + if (pb_type->ports[i].equivalent != PortEquivalence::NONE && pb_type->parent_mode != nullptr && pb_type->num_modes == 0) { + //This is a primitive with equivalent inputs + + auto& atom_ctx = g_vpr_ctx.atom(); + AtomBlockId atom_blk = atom_ctx.nlist.find_block(pb->name); + VTR_ASSERT(atom_blk); + + AtomPortId atom_port = atom_ctx.nlist.find_atom_port(atom_blk, pb_type->ports[i].model_port); + + if (atom_port) { //Port exists (some LUTs may have no input and hence no port in the atom netlist) + + pugi::xml_node port_rotation_node = inputs_node.append_child("port_rotation_map"); + port_rotation_node.append_attribute("name") = pb_graph_node->pb_type->ports[i].name; + + std::set recorded_pins; + std::vector pin_map_list; + + for (j = 0; j < pb_type->ports[i].num_pins; j++) { + node_index = pb->pb_graph_node->input_pins[port_index][j].pin_count_in_cluster; + + if (pb_route.count(node_index)) { + AtomNetId atom_net = pb_route[node_index].atom_net_id; + + VTR_ASSERT(atom_net); + + //This physical pin is in use, find the original pin in the atom netlist + AtomPinId orig_pin; + for (AtomPinId atom_pin : atom_ctx.nlist.port_pins(atom_port)) { + if (recorded_pins.count(atom_pin)) continue; //Don't add pins twice + + AtomNetId atom_pin_net = atom_ctx.nlist.pin_net(atom_pin); + + if (atom_pin_net == atom_net) { + recorded_pins.insert(atom_pin); + orig_pin = atom_pin; + break; + } + } + + VTR_ASSERT(orig_pin); + //The physical pin j, maps to a pin in the atom netlist + pin_map_list.push_back(vtr::string_fmt("%d", atom_ctx.nlist.pin_port_bit(orig_pin))); + } else { + //The physical pin is disconnected + pin_map_list.push_back("open"); + } + } + port_rotation_node.text().set(vtr::join(pin_map_list.begin(), pin_map_list.end(), " ").c_str()); + } + } + + port_index++; + } + } + + pugi::xml_node outputs_node = block_node.append_child("outputs"); + + port_index = 0; + for (i = 0; i < pb_type->num_ports; i++) { + if (pb_type->ports[i].type == OUT_PORT) { + VTR_ASSERT(!pb_type->ports[i].is_clock); + + pugi::xml_node port_node = outputs_node.append_child("port"); + port_node.append_attribute("name") = pb_graph_node->pb_type->ports[i].name; + std::vector pins; + for (j = 0; j < pb_type->ports[i].num_pins; j++) { + node_index = pb->pb_graph_node->output_pins[port_index][j].pin_count_in_cluster; + pins.push_back(clustering_xml_interconnect_text(type, pb_graph_pin_lookup_from_index_by_type, node_index, pb_route)); + } + port_node.text().set(vtr::join(pins.begin(), pins.end(), " ").c_str()); + port_index++; + } + } + + pugi::xml_node clock_node = block_node.append_child("clocks"); + + port_index = 0; + for (i = 0; i < pb_type->num_ports; i++) { + if (pb_type->ports[i].is_clock && pb_type->ports[i].type == IN_PORT) { + pugi::xml_node port_node = clock_node.append_child("port"); + port_node.append_attribute("name") = pb_graph_node->pb_type->ports[i].name; + + std::vector pins; + for (j = 0; j < pb_type->ports[i].num_pins; j++) { + node_index = pb->pb_graph_node->clock_pins[port_index][j].pin_count_in_cluster; + if (pb_type->parent_mode == nullptr) { + if (pb_route.count(node_index)) { + pins.push_back(clustering_xml_net_text(pb_route[node_index].atom_net_id)); + } else { + pins.push_back(clustering_xml_net_text(AtomNetId::INVALID())); + } + } else { + pins.push_back(clustering_xml_interconnect_text(type, pb_graph_pin_lookup_from_index_by_type, node_index, pb_route)); + } + } + port_node.text().set(vtr::join(pins.begin(), pins.end(), " ").c_str()); + port_index++; + } + } + + if (pb_type->num_modes > 0) { + for (i = 0; i < mode->num_pb_type_children; i++) { + for (j = 0; j < mode->pb_type_children[i].num_pb; j++) { + /* If child pb is not used but routing is used, I must print things differently */ + if ((pb->child_pbs[i] != nullptr) && (pb->child_pbs[i][j].name != nullptr)) { + bool legal = check_clustering_xml_block(block_node, type, pb_graph_pin_lookup_from_index_by_type, &pb->child_pbs[i][j], j, pb_route); + if (!legal) { + return false; + } + } else { + is_used = false; + child_pb_type = &mode->pb_type_children[i]; + port_index = 0; + + for (k = 0; k < child_pb_type->num_ports && !is_used; k++) { + if (child_pb_type->ports[k].type == OUT_PORT) { + for (m = 0; m < child_pb_type->ports[k].num_pins; m++) { + node_index = pb_graph_node->child_pb_graph_nodes[pb->mode][i][j].output_pins[port_index][m].pin_count_in_cluster; + if (pb_route.count(node_index) && pb_route[node_index].atom_net_id) { + is_used = true; + break; + } + } + port_index++; + } + } + bool legal = check_clustering_xml_open_block(block_node, type, pb_graph_pin_lookup_from_index_by_type, + &pb_graph_node->child_pb_graph_nodes[pb->mode][i][j], + j, is_used, pb_route); + if (!legal) { + return false; + } + } + } + } + } + + return true; +} + +bool check_output_clustering(const vtr::vector*>& intra_lb_routing, + const std::string& architecture_id, const char* out_fname) { + + auto& device_ctx = g_vpr_ctx.device(); + auto& atom_ctx = g_vpr_ctx.atom(); + auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); + + // work around : pick up the last block ID to check only the last created block + // + ClusterBlockId last_id; + for (auto blk_id : cluster_ctx.clb_nlist.blocks()) { + last_id = blk_id; + } + + if (!intra_lb_routing.empty()) { + cluster_ctx.clb_nlist.block_pb(last_id)->pb_route = alloc_and_load_pb_route(intra_lb_routing[last_id], + cluster_ctx.clb_nlist.block_pb(last_id)->pb_graph_node); + } + + IntraLbPbPinLookup pb_graph_pin_lookup_from_index_by_type(device_ctx.logical_block_types); + + pugi::xml_document out_xml; + + pugi::xml_node block_node = out_xml.append_child("block"); + block_node.append_attribute("name") = out_fname; + block_node.append_attribute("instance") = "FPGA_packed_netlist[0]"; + block_node.append_attribute("architecture_id") = architecture_id.c_str(); + block_node.append_attribute("atom_netlist_id") = atom_ctx.nlist.netlist_id().c_str(); + + + // Check only the last_id block + // + if (!check_clustering_xml_block(block_node, cluster_ctx.clb_nlist.block_type(last_id), pb_graph_pin_lookup_from_index_by_type, + cluster_ctx.clb_nlist.block_pb(last_id), size_t(last_id), cluster_ctx.clb_nlist.block_pb(last_id)->pb_route)) { + return false; + } + + if (!intra_lb_routing.empty()) { + cluster_ctx.clb_nlist.block_pb(last_id)->pb_route.clear(); + } + + return true; // check was legal +} + /* This routine dumps out the output netlist in a format suitable for * * input to vpr. This routine also dumps out the internal structure of * * the cluster, in essentially a graph based format. */ diff --git a/vpr/src/pack/output_clustering.h b/vpr/src/pack/output_clustering.h index 509690e4934..7b1819a179f 100644 --- a/vpr/src/pack/output_clustering.h +++ b/vpr/src/pack/output_clustering.h @@ -9,4 +9,8 @@ void output_clustering(const vtr::vector*>& intra_lb_routing, const std::string& architecture_id, const char* out_fname); + +void print_stats(); + #endif