diff --git a/vpr/src/base/SetupVPR.cpp b/vpr/src/base/SetupVPR.cpp
index efef48ed4c1..8a8135d1400 100644
--- a/vpr/src/base/SetupVPR.cpp
+++ b/vpr/src/base/SetupVPR.cpp
@@ -597,6 +597,9 @@ void SetupPackerOpts(const t_options& Options,
     PackerOpts->timing_update_type = Options.timing_update_type;
     PackerOpts->pack_num_moves = Options.pack_num_moves;
     PackerOpts->pack_move_type = Options.pack_move_type;
+    PackerOpts->use_partitioning_in_pack = Options.use_partitioning_in_pack;
+    PackerOpts->partitioner_path = Options.partitioner_path;
+    PackerOpts->number_of_molecules_in_partition = Options.number_of_molecules_in_partition;
 }
 
 static void SetupNetlistOpts(const t_options& Options, t_netlist_opts& NetlistOpts) {
diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp
index ad935c44faa..a79e12f89f0 100644
--- a/vpr/src/base/read_options.cpp
+++ b/vpr/src/base/read_options.cpp
@@ -1824,6 +1824,22 @@ argparse::ArgumentParser create_arg_parser(std::string prog_name, t_options& arg
             "The available values are: randomSwap, semiDirectedSwap, semiDirectedSameTypeSwap")
         .default_value("semiDirectedSwap")
         .show_in(argparse::ShowIn::HELP_ONLY);
+    
+    pack_grp.add_argument<bool, ParseOnOff>(args.use_partitioning_in_pack, "--use_partitioning_in_pack")
+        .help("Whether to use hmetis partitioning in pack.")
+        .help("Whether to use partitioning in pack.")
+        .default_value("off")
+        .show_in(argparse::ShowIn::HELP_ONLY);
+    
+    pack_grp.add_argument<std::string>(args.partitioner_path, "--partitioner_path")
+        .help("The path to the partitioner (Mt-KaHyPar) executable.")
+        .default_value("~/bin/MtKaHyPar")
+        .show_in(argparse::ShowIn::HELP_ONLY);
+    
+    pack_grp.add_argument<int>(args.number_of_molecules_in_partition, "--number_of_molecules_in_partition")
+        .help("Average number of molecules in each cluster. It should be used when --use_partitioning_in_pack is on.")
+        .default_value("64")
+        .show_in(argparse::ShowIn::HELP_ONLY);
 
     auto& place_grp = parser.add_argument_group("placement options");
 
diff --git a/vpr/src/base/read_options.h b/vpr/src/base/read_options.h
index 97645367680..e0f4c0f6896 100644
--- a/vpr/src/base/read_options.h
+++ b/vpr/src/base/read_options.h
@@ -101,6 +101,10 @@ struct t_options {
     argparse::ArgValue<bool> use_attraction_groups;
     argparse::ArgValue<int> pack_num_moves;
     argparse::ArgValue<std::string> pack_move_type;
+    argparse::ArgValue<bool> use_partitioning_in_pack;
+    argparse::ArgValue<std::string> partitioner_path;
+    argparse::ArgValue<int> number_of_molecules_in_partition;
+    
     /* Placement options */
     argparse::ArgValue<int> Seed;
     argparse::ArgValue<bool> ShowPlaceTiming;
diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index 7b98cc2c0e0..e9dcca19401 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -917,6 +917,9 @@ struct t_packer_opts {
     bool use_attraction_groups;
     int pack_num_moves;
     std::string pack_move_type;
+    bool use_partitioning_in_pack;
+    std::string partitioner_path;
+    int number_of_molecules_in_partition;
 };
 
 /**
diff --git a/vpr/src/pack/cluster.cpp b/vpr/src/pack/cluster.cpp
index 4f1382a990d..86391fcaf87 100644
--- a/vpr/src/pack/cluster.cpp
+++ b/vpr/src/pack/cluster.cpp
@@ -41,6 +41,8 @@
 #include <algorithm>
 #include <fstream>
 
+#include <thread>
+
 #include "vtr_assert.h"
 #include "vtr_log.h"
 #include "vtr_math.h"
@@ -74,6 +76,8 @@
 #include "re_cluster_util.h"
 #include "constraints_report.h"
 
+#include "config.h"
+
 /*
  * When attraction groups are created, the purpose is to pack more densely by adding more molecules
  * from the cluster's attraction group to the cluster. In a normal flow, (when attraction groups are
@@ -234,147 +238,716 @@ std::map<t_logical_block_type_ptr, size_t> do_clustering(const t_packer_opts& pa
      * Clustering
      *****************************************************************/
 
-    while (istart != nullptr) {
-        is_cluster_legal = false;
-        savedseedindex = seedindex;
-        for (detailed_routing_stage = (int)E_DETAILED_ROUTE_AT_END_ONLY; !is_cluster_legal && detailed_routing_stage != (int)E_DETAILED_ROUTE_INVALID; detailed_routing_stage++) {
-            ClusterBlockId clb_index(helper_ctx.total_clb_num);
-
-            VTR_LOGV(verbosity > 2, "Complex block %d:\n", helper_ctx.total_clb_num);
-
-            /*Used to store cluster's PartitionRegion as primitives are added to it.
-             * Since some of the primitives might fail legality, this structure temporarily
-             * stores PartitionRegion information while the cluster is packed*/
-            PartitionRegion temp_cluster_pr;
-
-            start_new_cluster(helper_ctx.cluster_placement_stats, helper_ctx.primitives_list,
-                              clb_index, istart,
-                              num_used_type_instances,
-                              packer_opts.target_device_utilization,
-                              num_models, helper_ctx.max_cluster_size,
-                              arch, packer_opts.device_layout,
-                              lb_type_rr_graphs, &router_data,
-                              detailed_routing_stage, &cluster_ctx.clb_nlist,
-                              primitive_candidate_block_types,
-                              verbosity,
-                              packer_opts.enable_pin_feasibility_filter,
-                              balance_block_type_utilization,
-                              packer_opts.feasible_block_array_size,
-                              temp_cluster_pr);
-
-            //initial molecule in cluster has been processed
-            cluster_stats.num_molecules_processed++;
-            cluster_stats.mols_since_last_print++;
-            print_pack_status(helper_ctx.total_clb_num,
-                              cluster_stats.num_molecules,
-                              cluster_stats.num_molecules_processed,
-                              cluster_stats.mols_since_last_print,
-                              device_ctx.grid.width(),
-                              device_ctx.grid.height(),
-                              attraction_groups);
-
-            VTR_LOGV(verbosity > 2,
-                     "Complex block %d: '%s' (%s) ", helper_ctx.total_clb_num,
-                     cluster_ctx.clb_nlist.block_name(clb_index).c_str(),
-                     cluster_ctx.clb_nlist.block_type(clb_index)->name);
-            VTR_LOGV(verbosity > 2, ".");
-            //Progress dot for seed-block
-            fflush(stdout);
-
-            t_ext_pin_util target_ext_pin_util = ext_pin_util_targets.get_pin_util(cluster_ctx.clb_nlist.block_type(clb_index)->name);
-            int high_fanout_threshold = high_fanout_thresholds.get_threshold(cluster_ctx.clb_nlist.block_type(clb_index)->name);
-            update_cluster_stats(istart, clb_index,
-                                 is_clock, //Set of clock nets
-                                 is_clock, //Set of global nets (currently all clocks)
-                                 packer_opts.global_clocks,
-                                 packer_opts.alpha, packer_opts.beta,
-                                 packer_opts.timing_driven, packer_opts.connection_driven,
-                                 high_fanout_threshold,
-                                 *timing_info,
-                                 attraction_groups,
-                                 net_output_feeds_driving_block_input);
-            helper_ctx.total_clb_num++;
-
-            if (packer_opts.timing_driven) {
-                cluster_stats.blocks_since_last_analysis++;
-                /*it doesn't make sense to do a timing analysis here since there*
-                 *is only one atom block clustered it would not change anything      */
+    // Used to check if the "for loop" will find a legal solution ("failed_for_loop" == true)
+    // (T.Besson)
+    //
+    bool failed_for_loop = false;
+    int max_nb_molecule;
+    int nb_packed_molecules = 0;
+
+    if(packer_opts.use_partitioning_in_pack){
+
+        AtomNetlist myNetlist = g_vpr_ctx.atom().nlist;
+
+        std::vector<t_pack_molecule*> molecules;
+        int numberOfMolecules = 0;
+        for (auto cur_molecule = molecule_head; cur_molecule != nullptr; cur_molecule = cur_molecule->next) {
+            molecules.push_back(cur_molecule);
+            numberOfMolecules++;
+        }
+
+        int numberOfNets = 0;
+        int numberOfAtoms = 0;
+        for (auto netId : myNetlist.nets()) {
+            numberOfNets++;
+        }
+
+        for (auto blockId : myNetlist.blocks()) {
+            numberOfAtoms++;
+        }
+
+        int* atomBlockIdToMoleculeId = new int[numberOfAtoms];
+
+        for (int i = 0; i < numberOfAtoms; i++) {
+            atomBlockIdToMoleculeId[i] = -1;
+        }
+
+        {
+            int i = 0;
+            for (auto molecule : molecules) {
+                for (auto abid : molecule->atom_block_ids) {
+                    int bid = size_t(abid);
+                    if (bid < 0 || bid >= numberOfAtoms) continue;
+                    atomBlockIdToMoleculeId[bid] = i;
+                }
+                i++;
             }
-            cur_cluster_placement_stats_ptr = &(helper_ctx.cluster_placement_stats[cluster_ctx.clb_nlist.block_type(clb_index)->index]);
-            cluster_stats.num_unrelated_clustering_attempts = 0;
-            next_molecule = get_molecule_for_cluster(cluster_ctx.clb_nlist.block_pb(clb_index),
-                                                     attraction_groups,
-                                                     allow_unrelated_clustering,
-                                                     packer_opts.prioritize_transitive_connectivity,
-                                                     packer_opts.transitive_fanout_threshold,
-                                                     packer_opts.feasible_block_array_size,
-                                                     &cluster_stats.num_unrelated_clustering_attempts,
-                                                     cur_cluster_placement_stats_ptr,
-                                                     clb_inter_blk_nets,
-                                                     clb_index,
-                                                     verbosity,
-                                                     clustering_data.unclustered_list_head,
-                                                     unclustered_list_head_size,
-                                                     primitive_candidate_block_types);
-            prev_molecule = istart;
-
-            /*
-             * When attraction groups are created, the purpose is to pack more densely by adding more molecules
-             * from the cluster's attraction group to the cluster. In a normal flow, (when attraction groups are
-             * not on), the cluster keeps being packed until the get_molecule routines return either a repeated
-             * molecule or a nullptr. When attraction groups are on, we want to keep exploring molecules for the
-             * cluster until a nullptr is returned. So, the number of repeated molecules allowed is increased to a
-             * large value.
-             */
-            int max_num_repeated_molecules = 0;
-            if (attraction_groups.num_attraction_groups() > 0) {
-                max_num_repeated_molecules = ATTRACTION_GROUPS_MAX_REPEATED_MOLECULES;
-            } else {
-                max_num_repeated_molecules = 1;
+        }
+
+
+
+
+        std::ofstream hmetisFile;
+        hmetisFile.open("hmetis.txt", std::ofstream::out);
+        //hmetisFile << numberOfNets << " " << numberOfAtoms  << std::endl;
+
+        //std::vector<std::vector<int>> netLines;
+        std::vector<std::string> uniqueLines;
+        std::vector<int> lineCounts;
+
+        //int nextIndexForMolecule = numberOfMolecules + 1;
+
+        //bool addNewNodeAndUseWeightsInHmetis = false;
+
+        for (auto netId : myNetlist.nets()) {
+            //VTR_LOG("NET ID: %d\n", size_t(netId));
+            std::vector<int> newLine;
+            AtomBlockId driverBlockId = myNetlist.net_driver_block(netId);
+            int moleculeId = atomBlockIdToMoleculeId[size_t(driverBlockId)];
+            newLine.push_back(moleculeId + 1);
+            double maxCriticality = 0;
+            for (auto pin_id : myNetlist.net_pins(netId)) {
+
+                double pinCriticality = timing_info->setup_pin_criticality(pin_id);
+                if(pinCriticality > maxCriticality){
+                    maxCriticality = pinCriticality;
+                }
+                
+                //VTR_LOG("pin criticality: %f\n", pinCriticality);
+
+
+                auto port_id = myNetlist.pin_port(pin_id);
+                auto blk_id = myNetlist.port_block(port_id);
+                if (blk_id == driverBlockId) continue;
+                //VTR_LOG("%d ", size_t(blk_id));
+                //hmetisFile << size_t(blk_id)+1 << " ";
+                moleculeId = atomBlockIdToMoleculeId[size_t(blk_id)];
+                if (std::find(newLine.begin(), newLine.end(), moleculeId + 1) == newLine.end()) {
+                    newLine.push_back(moleculeId + 1);
+                }
+            }
+            int lineWeight = 1;
+            
+            if (newLine.size() > 1) {
+
+                std::sort(newLine.begin(), newLine.end());
+                std::string s = std::accumulate(newLine.begin() + 1, newLine.end(), std::to_string(newLine[0]),
+                                                [](const std::string& a, int b) {
+                                                    return a + " " + std::to_string(b);
+                                                });
+                auto pointer = std::find(uniqueLines.begin(), uniqueLines.end(), s);
+                if (pointer == uniqueLines.end()) {
+                    //netLines.push_back(newLine);
+                    lineCounts.push_back(lineWeight);
+                    uniqueLines.push_back(s);
+                } else {
+                    lineCounts[pointer - uniqueLines.begin()]+= lineWeight;
+                }
+                
             }
-            int num_repeated_molecules = 0;
-
-            while (next_molecule != nullptr && num_repeated_molecules < max_num_repeated_molecules) {
-                prev_molecule = next_molecule;
-
-                try_fill_cluster(packer_opts,
-                                 cur_cluster_placement_stats_ptr,
-                                 prev_molecule,
-                                 next_molecule,
-                                 num_repeated_molecules,
-                                 helper_ctx.primitives_list,
-                                 cluster_stats,
-                                 helper_ctx.total_clb_num,
-                                 num_models,
-                                 helper_ctx.max_cluster_size,
-                                 clb_index,
-                                 detailed_routing_stage,
-                                 attraction_groups,
-                                 clb_inter_blk_nets,
-                                 allow_unrelated_clustering,
-                                 high_fanout_threshold,
-                                 is_clock,
-                                 timing_info,
-                                 router_data,
-                                 target_ext_pin_util,
-                                 temp_cluster_pr,
-                                 block_pack_status,
-                                 clustering_data.unclustered_list_head,
-                                 unclustered_list_head_size,
-                                 net_output_feeds_driving_block_input,
-                                 primitive_candidate_block_types);
+        }
+
+        hmetisFile << uniqueLines.size() << " " << numberOfMolecules << " " << 11 << std::endl;
+        {
+            int i = 0;
+            for (auto line : uniqueLines) {
+                hmetisFile << lineCounts[i] << " " << line << std::endl;
+                i++;
             }
+            for (auto molecule : molecules) {
+                hmetisFile << molecule->atom_block_ids.size() << std::endl;
+            }
+        }
+        
+        hmetisFile.close();
 
-            is_cluster_legal = check_cluster_legality(verbosity, detailed_routing_stage, router_data);
+        int numberOfClusters = ceil(numberOfMolecules *1.0 / packer_opts.number_of_molecules_in_partition);
+        if (numberOfClusters <= 1) numberOfClusters = 2;
 
-            if (is_cluster_legal) {
-                istart = save_cluster_routing_and_pick_new_seed(packer_opts, helper_ctx.total_clb_num, seed_atoms, num_blocks_hill_added, clustering_data.intra_lb_routing, seedindex, cluster_stats, router_data);
-                store_cluster_info_and_free(packer_opts, clb_index, logic_block_type, le_pb_type, le_count, clb_inter_blk_nets);
+        char* commandToExecute = new char[1000];
+        unsigned num_cpus = std::thread::hardware_concurrency();
+        int num_threads = (int)(num_cpus / 2) > 1? (int)(num_cpus / 2) : 1;
+        // sprintf(commandToExecute, "%s hmetis.txt %d 3 10 4 1 3 0 0 ", packer_opts.hmetis_path.c_str(), numberOfClusters);
+        sprintf(commandToExecute, 
+                "%s -h hmetis.txt --preset-type=quality -t %d -k %d -e 3 -o soed --enable-progress-bar=true --show-detailed-timings=true --verbose=true --write-partition-file=true", 
+                packer_opts.partitioner_path.c_str(), num_threads, numberOfClusters);
+        VTR_LOG("MtKaHPar COMMAND: %s\n", commandToExecute);
+
+        int code = system(commandToExecute);
+        VTR_ASSERT_MSG(code == 0, "Running MtKaHyPar failed with code 0");
+
+        delete commandToExecute;
+
+        char* newFilename = new char[100];
+        // sprintf(newFilename, "hmetis.txt.part.%d", numberOfClusters);
+        sprintf(newFilename, "hmetis.txt.part%d.epsilon3..seed0.KaHyPar", numberOfClusters);
+
+        int* atomBlockIdToCluster = new int[numberOfAtoms];
+
+        std::ifstream hmetisOutFile;
+        hmetisOutFile.open(newFilename);
+
+        int* clusterOfMolecule = new int[numberOfMolecules];
+
+        for (int i = 0; i < numberOfMolecules; i++) {
+            int clusterId;
+            hmetisOutFile >> clusterId;
+
+            clusterOfMolecule[i] = clusterId;
+
+            for (AtomBlockId abid : molecules[i]->atom_block_ids) {
+                if (size_t(abid) >= numberOfAtoms) continue;
+                atomBlockIdToCluster[size_t(abid)] = clusterId;
+            }
+
+        }
+        hmetisOutFile.close();
+
+        std::vector<int>* clusterMoleculeOrder = new std::vector<int>[numberOfClusters];
+
+        for (int i = 0; i < numberOfClusters; i++) {
+            clusterMoleculeOrder[i] = std::vector<int>();
+        }
+
+        for (auto atom : seed_atoms) {
+            int id = size_t(atom);
+            int clusterId = atomBlockIdToCluster[id];
+            if (clusterId < 0 || clusterId >= numberOfClusters) {
+                VTR_LOG("BAD CLUSTER ID: %d, atomBlockId: %d\n", clusterId, id);
+                clusterId = 1;
+            }
+            VTR_ASSERT(clusterId >= 0 && clusterId < numberOfClusters);
+            int moleculeId = atomBlockIdToMoleculeId[id];
+            VTR_ASSERT(moleculeId >= 0 && moleculeId < numberOfMolecules);
+            clusterMoleculeOrder[clusterId].push_back(moleculeId);
+        }
+
+
+        for (int partId = 0; partId < numberOfClusters; partId++) {
+            int currentIndexOfBestMolecule = 0;
+
+            for (int moldId = 0; moldId < numberOfMolecules; moldId++) {
+                if (clusterOfMolecule[moldId] == partId) {
+                    molecules[moldId]->valid = true;
+                    //istart = molecules[moldId];
+                } else {
+                    molecules[moldId]->valid = false;
+                }
+            }
+
+            if (currentIndexOfBestMolecule >= clusterMoleculeOrder[partId].size()) {
+                istart = nullptr;
             } else {
-                free_data_and_requeue_used_mols_if_illegal(clb_index, savedseedindex, num_used_type_instances, helper_ctx.total_clb_num, seedindex);
+                istart = molecules[clusterMoleculeOrder[partId][currentIndexOfBestMolecule]];
+                currentIndexOfBestMolecule++;
+                while (!istart->valid && currentIndexOfBestMolecule < clusterMoleculeOrder[partId].size()) {
+                    istart = molecules[clusterMoleculeOrder[partId][currentIndexOfBestMolecule]];
+                    currentIndexOfBestMolecule++;
+                }
+                if (!istart->valid) {
+                    istart = nullptr;
+                }
             }
-            free_router_data(router_data);
-            router_data = nullptr;
+
+            while (istart != nullptr) {
+
+                is_cluster_legal = false;
+                savedseedindex = seedindex;
+
+                // If in the previous "for (detailed_routing_stage ..." call we failed all the 
+                // time (e.g 'failed_for_loop' is false) then when we re-enter it but we reduce the 
+                // calls to "try_fill_cluster" by half in order to expect a feasible solution. 
+                // 
+                // This case can happen with unit test case "mult_seq" where we stuck on a mode4/mode5 
+                // conflict because of the calls to "try_fill_cluster" that always fail. In this case the 
+                // only feasible solution is to stick to the original "start_new_cluster" solution and
+                // avoid any call to "try_fill_cluster". To do this, "nb_max_molecule" needs to reduce 
+                // to 0 to avoid calling "try_fill_cluster". That's why we decrease "max_nb_molecule" in
+                // the iteration process to get down to 0. We decrease it by half each time but it could
+                // have been another scheme to decrease it. This one looks to give good QoR results.
+                // (T.Besson, Rapid Silicon)
+                //
+                if (failed_for_loop) {
+                max_nb_molecule = max_nb_molecule / 1.4; // decrease by 1.4 looks to be a good strategy. 
+                                                        // The packer is hyper sensitive to this number.
+                                                        // A change by 0.1 currently may generate big
+                                                        // difference on some designs like "axil_crossbar".
+                                                        // More investigation to understand why ? (T.Besson)
+                } else {
+                max_nb_molecule = 512; // important starting number. The packer is again sensitive
+                                        // to that number. This number needs to be high to guarantee some
+                                        // stable behavior. (T.Besson).
+                }
+
+                // Expect that we will not find a legal solution in the below "for loop"
+                // (T.Besson)
+                //
+                failed_for_loop = true;
+
+                for (detailed_routing_stage = (int)E_DETAILED_ROUTE_AT_END_ONLY; 
+                    !is_cluster_legal && detailed_routing_stage != (int)E_DETAILED_ROUTE_INVALID; 
+                    detailed_routing_stage++) {
+
+                    ClusterBlockId clb_index(helper_ctx.total_clb_num);
+
+                    VTR_LOGV(verbosity > 2, "Complex block %d:\n", helper_ctx.total_clb_num);
+
+                    /*Used to store cluster's PartitionRegion as primitives are added to it.
+                    * Since some of the primitives might fail legality, this structure temporarily
+                    * stores PartitionRegion information while the cluster is packed*/
+                    PartitionRegion temp_cluster_pr;
+
+                    start_new_cluster(helper_ctx.cluster_placement_stats, helper_ctx.primitives_list,
+                                    clb_index, istart,
+                                    num_used_type_instances,
+                                    packer_opts.target_device_utilization,
+                                    num_models, helper_ctx.max_cluster_size,
+                                    arch, packer_opts.device_layout,
+                                    lb_type_rr_graphs, &router_data,
+                                    detailed_routing_stage, &cluster_ctx.clb_nlist,
+                                    primitive_candidate_block_types,
+                                    verbosity,
+                                    packer_opts.enable_pin_feasibility_filter,
+                                    balance_block_type_utilization,
+                                    packer_opts.feasible_block_array_size,
+                                    temp_cluster_pr);
+
+                    //initial molecule in cluster has been processed
+                    cluster_stats.num_molecules_processed++;
+                    cluster_stats.mols_since_last_print++;
+                    print_pack_status(helper_ctx.total_clb_num,
+                                    cluster_stats.num_molecules,
+                                    cluster_stats.num_molecules_processed,
+                                    cluster_stats.mols_since_last_print,
+                                    device_ctx.grid.width(),
+                                    device_ctx.grid.height(),
+                                    attraction_groups);
+
+                    VTR_LOGV(verbosity > 2,
+                            "Complex block %d: '%s' (%s) ", helper_ctx.total_clb_num,
+                            cluster_ctx.clb_nlist.block_name(clb_index).c_str(),
+                            cluster_ctx.clb_nlist.block_type(clb_index)->name);
+                    VTR_LOGV(verbosity > 2, ".");
+                    //Progress dot for seed-block
+                    fflush(stdout);
+
+                    t_ext_pin_util target_ext_pin_util = ext_pin_util_targets.get_pin_util(cluster_ctx.clb_nlist.block_type(clb_index)->name);
+                    int high_fanout_threshold = high_fanout_thresholds.get_threshold(cluster_ctx.clb_nlist.block_type(clb_index)->name);
+                    update_cluster_stats(istart, clb_index,
+                                        is_clock, //Set of clock nets
+                                        is_clock, //Set of global nets (currently all clocks)
+                                        packer_opts.global_clocks,
+                                        packer_opts.alpha, packer_opts.beta,
+                                        packer_opts.timing_driven, packer_opts.connection_driven,
+                                        high_fanout_threshold,
+                                        *timing_info,
+                                        attraction_groups,
+                                        net_output_feeds_driving_block_input);
+                    helper_ctx.total_clb_num++;
+
+                    if (packer_opts.timing_driven) {
+                        cluster_stats.blocks_since_last_analysis++;
+                        /*it doesn't make sense to do a timing analysis here since there*
+                        *is only one atom block clustered it would not change anything      */
+                    }
+                    cur_cluster_placement_stats_ptr = &(helper_ctx.cluster_placement_stats[cluster_ctx.clb_nlist.block_type(clb_index)->index]);
+                    cluster_stats.num_unrelated_clustering_attempts = 0;
+                    next_molecule = get_molecule_for_cluster(cluster_ctx.clb_nlist.block_pb(clb_index),
+                                                            attraction_groups,
+                                                            allow_unrelated_clustering,
+                                                            packer_opts.prioritize_transitive_connectivity,
+                                                            packer_opts.transitive_fanout_threshold,
+                                                            packer_opts.feasible_block_array_size,
+                                                            &cluster_stats.num_unrelated_clustering_attempts,
+                                                            cur_cluster_placement_stats_ptr,
+                                                            clb_inter_blk_nets,
+                                                            clb_index,
+                                                            verbosity,
+                                                            clustering_data.unclustered_list_head,
+                                                            unclustered_list_head_size,
+                                                            primitive_candidate_block_types);
+                    prev_molecule = istart;
+
+                    /*
+                    * When attraction groups are created, the purpose is to pack more densely by adding more molecules
+                    * from the cluster's attraction group to the cluster. In a normal flow, (when attraction groups are
+                    * not on), the cluster keeps being packed until the get_molecule routines return either a repeated
+                    * molecule or a nullptr. When attraction groups are on, we want to keep exploring molecules for the
+                    * cluster until a nullptr is returned. So, the number of repeated molecules allowed is increased to a
+                    * large value.
+                    */
+                    int max_num_repeated_molecules = 0;
+                    if (attraction_groups.num_attraction_groups() > 0) {
+                        max_num_repeated_molecules = ATTRACTION_GROUPS_MAX_REPEATED_MOLECULES;
+                    } else {
+                        max_num_repeated_molecules = 1;
+                    }
+                    int num_repeated_molecules = 0;
+
+                    int i = 0;
+
+                    while (next_molecule != nullptr && num_repeated_molecules < max_num_repeated_molecules) {
+                        prev_molecule = next_molecule;
+
+                        if (i == max_nb_molecule) {
+                        break;
+                        }
+
+                        try_fill_cluster(packer_opts,
+                                        cur_cluster_placement_stats_ptr,
+                                        prev_molecule,
+                                        next_molecule,
+                                        num_repeated_molecules,
+                                        helper_ctx.primitives_list,
+                                        cluster_stats,
+                                        helper_ctx.total_clb_num,
+                                        num_models,
+                                        helper_ctx.max_cluster_size,
+                                        clb_index,
+                                        detailed_routing_stage,
+                                        attraction_groups,
+                                        clb_inter_blk_nets,
+                                        allow_unrelated_clustering,
+                                        high_fanout_threshold,
+                                        is_clock,
+                                        timing_info,
+                                        router_data,
+                                        target_ext_pin_util,
+                                        temp_cluster_pr,
+                                        block_pack_status,
+                                        clustering_data.unclustered_list_head,
+                                        unclustered_list_head_size,
+                                        net_output_feeds_driving_block_input,
+                                        primitive_candidate_block_types);
+
+                        i++;
+
+                    }
+
+                    max_nb_molecule = i;
+
+                    is_cluster_legal = check_cluster_legality(verbosity, detailed_routing_stage, router_data);
+
+                    if (is_cluster_legal) {
+
+                        // Calls the extra check for "clb" only (may need to revisit this check) (T.Besson)
+                        //
+                        if (!strcmp(cluster_ctx.clb_nlist.block_type(clb_index)->name, "clb")) {
+
+                        // Temporary fix : make sure that the solution has no mode confict. This check is 
+                        // performed by initiating a first xml kind of output work. There may be a tricky 
+                        // conflict with some lb routing so we need to store temporary the lb nets in the 
+                        // cluster data structure to make the check inside "check_if_xml_mode_conflict".
+                        // (T.Besson, Rapid Silicon)
+                        //
+                        (clustering_data.intra_lb_routing).push_back(router_data->saved_lb_nets);
+
+                        // Call the check as if we would output the final packing ... and see if there is any 
+                        // mode conflict. (T.Besson)
+                        // 'is_cluster_legal' turns to false if there is a mode conflict.
+                        //
+                        is_cluster_legal = check_if_xml_mode_conflict(packer_opts, arch, 
+                                                                        clustering_data.intra_lb_routing);
+
+                        // Remove the previous pushed "intra_lb_routing_solution" to clean up 
+                        // the place. (T.Besson)
+                        //
+                        (clustering_data.intra_lb_routing).pop_back();
+
+                        if (!is_cluster_legal &&
+                            (detailed_routing_stage == (int)E_DETAILED_ROUTE_FOR_EACH_ATOM)) {  
+
+                            VTR_LOGV(verbosity > 0, "Info: rejected cluster packing solution with modes conflict [%d]\n", 
+                                    max_nb_molecule);
+                        }
+                        }
+
+                        if (is_cluster_legal) {
+
+                            istart = save_cluster_routing_and_pick_new_seed(packer_opts, helper_ctx.total_clb_num, 
+                                                        seed_atoms, num_blocks_hill_added, clustering_data.intra_lb_routing, 
+                                                        seedindex, cluster_stats, router_data);
+
+                            store_cluster_info_and_free(packer_opts, clb_index, logic_block_type, le_pb_type, 
+                                                        le_count, clb_inter_blk_nets);
+
+                            nb_packed_molecules += max_nb_molecule;
+
+                            VTR_LOGV(verbosity > 0, "Successfully packed Logic Block [%d]\n", max_nb_molecule);
+
+                            failed_for_loop = false; // tell the outer loop that we succeeded within this loop
+
+                        } else {
+                        free_data_and_requeue_used_mols_if_illegal(clb_index, savedseedindex, 
+                                                    num_used_type_instances, helper_ctx.total_clb_num, seedindex);
+                        }
+
+
+                    } else {
+                        free_data_and_requeue_used_mols_if_illegal(clb_index, savedseedindex, 
+                                                    num_used_type_instances, helper_ctx.total_clb_num, seedindex);
+                    }
+                    
+                    for (int index = 0; index < clusterMoleculeOrder[partId].size(); index++) {
+                        istart = molecules[clusterMoleculeOrder[partId][index]];
+                        if (istart->valid) {
+                            break;
+                        }
+                    }
+                    if (!istart->valid) {
+                        istart = nullptr;
+                    }
+
+                    free_router_data(router_data);
+                    router_data = nullptr;
+                }
+
+            }
+        } // for(int partId=0;partId < numberOfClusters;partId++){    
+    }else{
+        while (istart != nullptr) {
+
+            is_cluster_legal = false;
+            savedseedindex = seedindex;
+
+            // If in the previous "for (detailed_routing_stage ..." call we failed all the 
+            // time (e.g 'failed_for_loop' is false) then when we re-enter it but we reduce the 
+            // calls to "try_fill_cluster" by half in order to expect a feasible solution. 
+            // 
+            // This case can happen with unit test case "mult_seq" where we stuck on a mode4/mode5 
+            // conflict because of the calls to "try_fill_cluster" that always fail. In this case the 
+            // only feasible solution is to stick to the original "start_new_cluster" solution and
+            // avoid any call to "try_fill_cluster". To do this, "nb_max_molecule" needs to reduce 
+            // to 0 to avoid calling "try_fill_cluster". That's why we decrease "max_nb_molecule" in
+            // the iteration process to get down to 0. We decrease it by half each time but it could
+            // have been another scheme to decrease it. This one looks to give good QoR results.
+            // (T.Besson, Rapid Silicon)
+            //
+            if (failed_for_loop) {
+            max_nb_molecule = max_nb_molecule / 1.4; // decrease by 1.4 looks to be a good strategy. 
+                                                    // The packer is hyper sensitive to this number.
+                                                    // A change by 0.1 currently may generate big
+                                                    // difference on some designs like "axil_crossbar".
+                                                    // More investigation to understand why ? (T.Besson)
+            } else {
+            max_nb_molecule = 512; // important starting number. The packer is again sensitive
+                                    // to that number. This number needs to be high to guarantee some
+                                    // stable behavior. (T.Besson).
+            }
+
+            // Expect that we will not find a legal solution in the below "for loop"
+            // (T.Besson)
+            //
+            failed_for_loop = true;
+
+            for (detailed_routing_stage = (int)E_DETAILED_ROUTE_AT_END_ONLY; 
+                !is_cluster_legal && detailed_routing_stage != (int)E_DETAILED_ROUTE_INVALID; 
+                detailed_routing_stage++) {
+
+                ClusterBlockId clb_index(helper_ctx.total_clb_num);
+
+                VTR_LOGV(verbosity > 2, "Complex block %d:\n", helper_ctx.total_clb_num);
+
+                /*Used to store cluster's PartitionRegion as primitives are added to it.
+                * Since some of the primitives might fail legality, this structure temporarily
+                * stores PartitionRegion information while the cluster is packed*/
+                PartitionRegion temp_cluster_pr;
+
+                start_new_cluster(helper_ctx.cluster_placement_stats, helper_ctx.primitives_list,
+                                clb_index, istart,
+                                num_used_type_instances,
+                                packer_opts.target_device_utilization,
+                                num_models, helper_ctx.max_cluster_size,
+                                arch, packer_opts.device_layout,
+                                lb_type_rr_graphs, &router_data,
+                                detailed_routing_stage, &cluster_ctx.clb_nlist,
+                                primitive_candidate_block_types,
+                                verbosity,
+                                packer_opts.enable_pin_feasibility_filter,
+                                balance_block_type_utilization,
+                                packer_opts.feasible_block_array_size,
+                                temp_cluster_pr);
+
+                //initial molecule in cluster has been processed
+                cluster_stats.num_molecules_processed++;
+                cluster_stats.mols_since_last_print++;
+                print_pack_status(helper_ctx.total_clb_num,
+                                cluster_stats.num_molecules,
+                                cluster_stats.num_molecules_processed,
+                                cluster_stats.mols_since_last_print,
+                                device_ctx.grid.width(),
+                                device_ctx.grid.height(),
+                                attraction_groups);
+
+                VTR_LOGV(verbosity > 2,
+                        "Complex block %d: '%s' (%s) ", helper_ctx.total_clb_num,
+                        cluster_ctx.clb_nlist.block_name(clb_index).c_str(),
+                        cluster_ctx.clb_nlist.block_type(clb_index)->name);
+                VTR_LOGV(verbosity > 2, ".");
+                //Progress dot for seed-block
+                fflush(stdout);
+
+                t_ext_pin_util target_ext_pin_util = ext_pin_util_targets.get_pin_util(cluster_ctx.clb_nlist.block_type(clb_index)->name);
+                int high_fanout_threshold = high_fanout_thresholds.get_threshold(cluster_ctx.clb_nlist.block_type(clb_index)->name);
+                update_cluster_stats(istart, clb_index,
+                                    is_clock, //Set of clock nets
+                                    is_clock, //Set of global nets (currently all clocks)
+                                    packer_opts.global_clocks,
+                                    packer_opts.alpha, packer_opts.beta,
+                                    packer_opts.timing_driven, packer_opts.connection_driven,
+                                    high_fanout_threshold,
+                                    *timing_info,
+                                    attraction_groups,
+                                    net_output_feeds_driving_block_input);
+                helper_ctx.total_clb_num++;
+
+                if (packer_opts.timing_driven) {
+                    cluster_stats.blocks_since_last_analysis++;
+                    /*it doesn't make sense to do a timing analysis here since there*
+                    *is only one atom block clustered it would not change anything      */
+                }
+                cur_cluster_placement_stats_ptr = &(helper_ctx.cluster_placement_stats[cluster_ctx.clb_nlist.block_type(clb_index)->index]);
+                cluster_stats.num_unrelated_clustering_attempts = 0;
+                next_molecule = get_molecule_for_cluster(cluster_ctx.clb_nlist.block_pb(clb_index),
+                                                        attraction_groups,
+                                                        allow_unrelated_clustering,
+                                                        packer_opts.prioritize_transitive_connectivity,
+                                                        packer_opts.transitive_fanout_threshold,
+                                                        packer_opts.feasible_block_array_size,
+                                                        &cluster_stats.num_unrelated_clustering_attempts,
+                                                        cur_cluster_placement_stats_ptr,
+                                                        clb_inter_blk_nets,
+                                                        clb_index,
+                                                        verbosity,
+                                                        clustering_data.unclustered_list_head,
+                                                        unclustered_list_head_size,
+                                                        primitive_candidate_block_types);
+                prev_molecule = istart;
+
+                /*
+                * When attraction groups are created, the purpose is to pack more densely by adding more molecules
+                * from the cluster's attraction group to the cluster. In a normal flow, (when attraction groups are
+                * not on), the cluster keeps being packed until the get_molecule routines return either a repeated
+                * molecule or a nullptr. When attraction groups are on, we want to keep exploring molecules for the
+                * cluster until a nullptr is returned. So, the number of repeated molecules allowed is increased to a
+                * large value.
+                */
+                int max_num_repeated_molecules = 0;
+                if (attraction_groups.num_attraction_groups() > 0) {
+                    max_num_repeated_molecules = ATTRACTION_GROUPS_MAX_REPEATED_MOLECULES;
+                } else {
+                    max_num_repeated_molecules = 1;
+                }
+                int num_repeated_molecules = 0;
+
+                int i = 0;
+
+                while (next_molecule != nullptr && num_repeated_molecules < max_num_repeated_molecules) {
+                    prev_molecule = next_molecule;
+
+                    if (i == max_nb_molecule) {
+                    break;
+                    }
+
+                    try_fill_cluster(packer_opts,
+                                    cur_cluster_placement_stats_ptr,
+                                    prev_molecule,
+                                    next_molecule,
+                                    num_repeated_molecules,
+                                    helper_ctx.primitives_list,
+                                    cluster_stats,
+                                    helper_ctx.total_clb_num,
+                                    num_models,
+                                    helper_ctx.max_cluster_size,
+                                    clb_index,
+                                    detailed_routing_stage,
+                                    attraction_groups,
+                                    clb_inter_blk_nets,
+                                    allow_unrelated_clustering,
+                                    high_fanout_threshold,
+                                    is_clock,
+                                    timing_info,
+                                    router_data,
+                                    target_ext_pin_util,
+                                    temp_cluster_pr,
+                                    block_pack_status,
+                                    clustering_data.unclustered_list_head,
+                                    unclustered_list_head_size,
+                                    net_output_feeds_driving_block_input,
+                                    primitive_candidate_block_types);
+
+                    i++;
+
+                }
+
+                max_nb_molecule = i;
+
+                is_cluster_legal = check_cluster_legality(verbosity, detailed_routing_stage, router_data);
+
+                if (is_cluster_legal) {
+
+                    // Calls the extra check for "clb" only (may need to revisit this check) (T.Besson)
+                    //
+                    if (!strcmp(cluster_ctx.clb_nlist.block_type(clb_index)->name, "clb")) {
+
+                    // Temporary fix : make sure that the solution has no mode confict. This check is 
+                    // performed by initiating a first xml kind of output work. There may be a tricky 
+                    // conflict with some lb routing so we need to store temporary the lb nets in the 
+                    // cluster data structure to make the check inside "check_if_xml_mode_conflict".
+                    // (T.Besson, Rapid Silicon)
+                    //
+                    (clustering_data.intra_lb_routing).push_back(router_data->saved_lb_nets);
+
+                    // Call the check as if we would output the final packing ... and see if there is any 
+                    // mode conflict. (T.Besson)
+                    // 'is_cluster_legal' turns to false if there is a mode conflict.
+                    //
+                    is_cluster_legal = check_if_xml_mode_conflict(packer_opts, arch, 
+                                                                    clustering_data.intra_lb_routing);
+
+                    // Remove the previous pushed "intra_lb_routing_solution" to clean up 
+                    // the place. (T.Besson)
+                    //
+                    (clustering_data.intra_lb_routing).pop_back();
+
+                    if (!is_cluster_legal &&
+                        (detailed_routing_stage == (int)E_DETAILED_ROUTE_FOR_EACH_ATOM)) {  
+
+                        VTR_LOGV(verbosity > 0, "Info: rejected cluster packing solution with modes conflict [%d]\n", 
+                                max_nb_molecule);
+                    }
+                    }
+
+                    if (is_cluster_legal) {
+
+                    istart = save_cluster_routing_and_pick_new_seed(packer_opts, helper_ctx.total_clb_num, 
+                                                seed_atoms, num_blocks_hill_added, clustering_data.intra_lb_routing, 
+                                                seedindex, cluster_stats, router_data);
+
+                    store_cluster_info_and_free(packer_opts, clb_index, logic_block_type, le_pb_type, 
+                                                le_count, clb_inter_blk_nets);
+
+                    nb_packed_molecules += max_nb_molecule;
+
+                    VTR_LOGV(verbosity > 0, "Successfully packed Logic Block [%d]\n", max_nb_molecule);
+
+                    failed_for_loop = false; // tell the outer loop that we succeeded within this loop
+
+                    } else {
+                    free_data_and_requeue_used_mols_if_illegal(clb_index, savedseedindex, 
+                                                num_used_type_instances, helper_ctx.total_clb_num, seedindex);
+                    }
+
+
+                } else {
+                    free_data_and_requeue_used_mols_if_illegal(clb_index, savedseedindex, 
+                                                num_used_type_instances, helper_ctx.total_clb_num, seedindex);
+                }
+
+                free_router_data(router_data);
+                router_data = nullptr;
+            }
+
         }
     }
 
diff --git a/vpr/src/pack/cluster_util.cpp b/vpr/src/pack/cluster_util.cpp
index 0e12305dc70..73bb6e55360 100644
--- a/vpr/src/pack/cluster_util.cpp
+++ b/vpr/src/pack/cluster_util.cpp
@@ -7,6 +7,26 @@
 #include "vtr_math.h"
 #include "SetupGrid.h"
 
+// #include "vtr_assert.h"
+// #include "vtr_log.h"
+// #include "vtr_digest.h"
+// #include "vtr_memory.h"
+
+// #include "vpr_types.h"
+// #include "vpr_error.h"
+
+// #include "pugixml.hpp"
+
+#include "globals.h"
+#include "atom_netlist.h"
+#include "pack_types.h"
+#include "pb_type_graph.h"
+#include "output_clustering.h"
+#include "read_xml_arch_file.h"
+#include "vpr_utils.h"
+#include "pack.h"
+
+
 /**********************************/
 /* Global variables in clustering */
 /**********************************/
@@ -268,6 +288,16 @@ void check_and_output_clustering(const t_packer_opts& packer_opts,
     VTR_ASSERT(cluster_ctx.clb_nlist.blocks().size() == intra_lb_routing.size());
 }
 
+bool check_if_xml_mode_conflict(const t_packer_opts& packer_opts,
+                                 const t_arch* arch,
+                                 const vtr::vector<ClusterBlockId, std::vector<t_intra_lb_net>*>& intra_lb_routing) {
+
+    bool legal = check_output_clustering(intra_lb_routing, arch->architecture_id, packer_opts.output_file.c_str());
+
+    return legal;
+}
+
+
 void get_max_cluster_size_and_pb_depth(int& max_cluster_size,
                                        int& max_pb_depth) {
     auto& device_ctx = g_vpr_ctx.mutable_device();
@@ -1639,7 +1669,9 @@ t_pack_molecule* save_cluster_routing_and_pick_new_seed(const t_packer_opts& pac
     router_data->saved_lb_nets = nullptr;
 
     //Pick a new seed
-    next_seed = get_highest_gain_seed_molecule(&seedindex, seed_atoms);
+    if(!packer_opts.use_partitioning_in_pack){
+        next_seed = get_highest_gain_seed_molecule(&seedindex, seed_atoms);
+    }
 
     if (packer_opts.timing_driven) {
         if (num_blocks_hill_added > 0) {
@@ -3688,4 +3720,4 @@ void init_clb_atoms_lookup(vtr::vector<ClusterBlockId, std::unordered_set<AtomBl
 
         atoms_lookup[clb_index].insert(atom_blk_id);
     }
-}
\ No newline at end of file
+}
diff --git a/vpr/src/pack/cluster_util.h b/vpr/src/pack/cluster_util.h
index 9a91e47ea7a..f0f4f02af98 100644
--- a/vpr/src/pack/cluster_util.h
+++ b/vpr/src/pack/cluster_util.h
@@ -8,11 +8,12 @@
 #include "vpr_utils.h"
 #include "constraints_report.h"
 
-#include "concrete_timing_info.h"
+#include "timing_info.h"
 #include "PreClusterDelayCalculator.h"
 #include "PreClusterTimingGraphResolver.h"
 #include "tatum/echo_writer.hpp"
 #include "tatum/TimingReporter.hpp"
+#include "concrete_timing_info.h"
 
 /**
  * @file
@@ -129,6 +130,11 @@ void check_and_output_clustering(const t_packer_opts& packer_opts,
                                  const int& num_clb,
                                  const vtr::vector<ClusterBlockId, std::vector<t_intra_lb_net>*>& intra_lb_routing);
 
+bool check_if_xml_mode_conflict(const t_packer_opts& packer_opts,
+                                 const t_arch* arch,
+                                 const vtr::vector<ClusterBlockId, std::vector<t_intra_lb_net>*>& intra_lb_routing);
+
+
 void get_max_cluster_size_and_pb_depth(int& max_cluster_size,
                                        int& max_pb_depth);
 
@@ -452,4 +458,4 @@ bool cleanup_pb(t_pb* pb);
 void alloc_and_load_pb_stats(t_pb* pb, const int feasible_block_array_size);
 
 void init_clb_atoms_lookup(vtr::vector<ClusterBlockId, std::unordered_set<AtomBlockId>>& atoms_lookup);
-#endif
\ No newline at end of file
+#endif
diff --git a/vpr/src/pack/output_clustering.cpp b/vpr/src/pack/output_clustering.cpp
index 084898322b9..dbf9732af3f 100644
--- a/vpr/src/pack/output_clustering.cpp
+++ b/vpr/src/pack/output_clustering.cpp
@@ -38,7 +38,7 @@ static void print_clustering_stats(char* block_name, int num_block_type, float n
 
 /* Prints out one cluster (clb).  Both the external pins and the *
  * internal connections are printed out.                         */
-static void print_stats() {
+void print_stats() {
     int ipin;
     unsigned int itype;
     int total_nets_absorbed;
@@ -136,7 +136,7 @@ static void print_stats() {
 }
 
 static void print_clustering_stats_header() {
-    VTR_LOG("Final Clustering Statistics: \n");
+    VTR_LOG("Clustering Statistics: \n");
     VTR_LOG("----------   --------   ------------------------------------   --------------------------\n");
     VTR_LOG("Block Type   # Blocks   Avg. # of input clocks and pins used   Avg. # of output pins used\n");
     VTR_LOG("----------   --------   ------------------------------------   --------------------------\n");
@@ -238,10 +238,13 @@ static void clustering_xml_open_block(pugi::xml_node parent_node, t_logical_bloc
         /* Determine mode if applicable */
         port_index = 0;
         for (i = 0; i < pb_type->num_ports; i++) {
+
             if (pb_type->ports[i].type == OUT_PORT) {
                 VTR_ASSERT(!pb_type->ports[i].is_clock);
+
                 for (j = 0; j < pb_type->ports[i].num_pins; j++) {
                     const t_pb_graph_pin* pin = &pb_graph_node->output_pins[port_index][j];
+
                     node_index = pin->pin_count_in_cluster;
                     if (pb_type->num_modes > 0 && pb_route.count(node_index) && pb_route[node_index].atom_net_id) {
                         prev_node = pb_route[node_index].driver_pb_pin_id;
@@ -250,6 +253,7 @@ static void clustering_xml_open_block(pugi::xml_node parent_node, t_logical_bloc
 
                         VTR_ASSERT(edge != nullptr);
                         mode_of_edge = edge->interconnect->parent_mode_index;
+
                         if (mode != nullptr && &pb_type->modes[mode_of_edge] != mode) {
                             VPR_FATAL_ERROR(VPR_ERROR_PACK,
                                             "Differing modes for block.  Got %s previously and %s for edge %d (interconnect %s).",
@@ -259,6 +263,8 @@ static void clustering_xml_open_block(pugi::xml_node parent_node, t_logical_bloc
                         }
                         VTR_ASSERT(mode == nullptr || &pb_type->modes[mode_of_edge] == mode);
                         mode = &pb_type->modes[mode_of_edge];
+
+                    } else {
                     }
                 }
                 port_index++;
@@ -361,6 +367,171 @@ static void clustering_xml_open_block(pugi::xml_node parent_node, t_logical_bloc
     }
 }
 
+
+static bool check_clustering_xml_open_block(pugi::xml_node parent_node, t_logical_block_type_ptr type, 
+                                     const IntraLbPbPinLookup& pb_graph_pin_lookup_from_index_by_type, 
+                                     t_pb_graph_node* pb_graph_node, int pb_index, 
+                                     bool is_used, const t_pb_routes& pb_route) {
+    int i, j, k, m;
+    const t_pb_type *pb_type, *child_pb_type;
+    t_mode* mode = nullptr;
+    int prev_node;
+    int mode_of_edge, port_index, node_index;
+
+    mode_of_edge = UNDEFINED;
+
+    pb_type = pb_graph_node->pb_type;
+
+    pugi::xml_node block_node = parent_node.append_child("block");
+    block_node.append_attribute("name") = "open";
+    block_node.append_attribute("instance") = vtr::string_fmt("%s[%d]", pb_graph_node->pb_type->name, pb_index).c_str();
+    std::vector<std::string> block_modes;
+
+    if (is_used) {
+        /* Determine mode if applicable */
+        port_index = 0;
+        for (i = 0; i < pb_type->num_ports; i++) {
+
+            if (pb_type->ports[i].type == OUT_PORT) {
+                VTR_ASSERT(!pb_type->ports[i].is_clock);
+
+                for (j = 0; j < pb_type->ports[i].num_pins; j++) {
+                    const t_pb_graph_pin* pin = &pb_graph_node->output_pins[port_index][j];
+
+                    node_index = pin->pin_count_in_cluster;
+                    if (pb_type->num_modes > 0 && pb_route.count(node_index) && pb_route[node_index].atom_net_id) {
+                        prev_node = pb_route[node_index].driver_pb_pin_id;
+                        const t_pb_graph_pin* prev_pin = pb_graph_pin_lookup_from_index_by_type.pb_gpin(type->index, prev_node);
+                        const t_pb_graph_edge* edge = get_edge_between_pins(prev_pin, pin);
+
+                        VTR_ASSERT(edge != nullptr);
+                        mode_of_edge = edge->interconnect->parent_mode_index;
+
+                        if (mode != nullptr && &pb_type->modes[mode_of_edge] != mode) {
+
+                          // we return false because we do see a mode conflict in the final solution.
+                          // Ideally we would need to fix the root cause of this mode conflict. 
+                          // Will do it when more time (Rapid Silicon, T.Besson).
+                          //
+
+                          // Handle the error message at the caller level
+#if 0
+                          VTR_LOG("Info: modes conflict : pb mode = %s, edge mode = %s!\n", mode->name, 
+                                   pb_type->modes[mode_of_edge].name);
+#endif
+                          return false;
+                        }
+                        VTR_ASSERT(mode == nullptr || &pb_type->modes[mode_of_edge] == mode);
+                        mode = &pb_type->modes[mode_of_edge];
+
+                    } else {
+                    }
+                }
+                port_index++;
+            }
+        }
+
+        VTR_ASSERT(mode != nullptr && mode_of_edge != UNDEFINED);
+
+        block_node.append_attribute("mode") = mode->name;
+        block_node.append_attribute("pb_type_num_modes") = pb_type->num_modes;
+
+        pugi::xml_node inputs_node = block_node.append_child("inputs");
+
+        port_index = 0;
+        for (i = 0; i < pb_type->num_ports; i++) {
+            if (!pb_type->ports[i].is_clock && pb_type->ports[i].type == IN_PORT) {
+                pugi::xml_node port_node = inputs_node.append_child("port");
+                port_node.append_attribute("name") = pb_graph_node->pb_type->ports[i].name;
+
+                std::vector<std::string> pins;
+                for (j = 0; j < pb_type->ports[i].num_pins; j++) {
+                    node_index = pb_graph_node->input_pins[port_index][j].pin_count_in_cluster;
+
+                    if (pb_type->parent_mode == nullptr) {
+                        pins.push_back(clustering_xml_net_text(pb_route[node_index].atom_net_id));
+                    } else {
+                        pins.push_back(clustering_xml_interconnect_text(type, pb_graph_pin_lookup_from_index_by_type, node_index, pb_route));
+                    }
+                }
+                port_node.text().set(vtr::join(pins.begin(), pins.end(), " ").c_str());
+                port_index++;
+            }
+        }
+
+        pugi::xml_node outputs_node = block_node.append_child("outputs");
+
+        port_index = 0;
+        for (i = 0; i < pb_type->num_ports; i++) {
+            if (pb_type->ports[i].type == OUT_PORT) {
+                VTR_ASSERT(!pb_type->ports[i].is_clock);
+
+                pugi::xml_node port_node = outputs_node.append_child("port");
+                port_node.append_attribute("name") = pb_graph_node->pb_type->ports[i].name;
+                std::vector<std::string> pins;
+                for (j = 0; j < pb_type->ports[i].num_pins; j++) {
+                    node_index = pb_graph_node->output_pins[port_index][j].pin_count_in_cluster;
+                    pins.push_back(clustering_xml_interconnect_text(type, pb_graph_pin_lookup_from_index_by_type, node_index, pb_route));
+                }
+                port_node.text().set(vtr::join(pins.begin(), pins.end(), " ").c_str());
+                port_index++;
+            }
+        }
+
+        pugi::xml_node clock_node = block_node.append_child("clocks");
+
+        port_index = 0;
+        for (i = 0; i < pb_type->num_ports; i++) {
+            if (pb_type->ports[i].is_clock && pb_type->ports[i].type == IN_PORT) {
+                pugi::xml_node port_node = clock_node.append_child("port");
+                port_node.append_attribute("name") = pb_graph_node->pb_type->ports[i].name;
+
+                std::vector<std::string> pins;
+                for (j = 0; j < pb_type->ports[i].num_pins; j++) {
+                    node_index = pb_graph_node->clock_pins[port_index][j].pin_count_in_cluster;
+                    if (pb_type->parent_mode == nullptr) {
+                        pins.push_back(clustering_xml_net_text(pb_route[node_index].atom_net_id));
+                    } else {
+                        pins.push_back(clustering_xml_interconnect_text(type, pb_graph_pin_lookup_from_index_by_type, node_index, pb_route));
+                    }
+                }
+                port_node.text().set(vtr::join(pins.begin(), pins.end(), " ").c_str());
+                port_index++;
+            }
+        }
+
+        if (pb_type->num_modes > 0) {
+            for (i = 0; i < mode->num_pb_type_children; i++) {
+                child_pb_type = &mode->pb_type_children[i];
+                for (j = 0; j < mode->pb_type_children[i].num_pb; j++) {
+                    port_index = 0;
+                    is_used = false;
+                    for (k = 0; k < child_pb_type->num_ports && !is_used; k++) {
+                        if (child_pb_type->ports[k].type == OUT_PORT) {
+                            for (m = 0; m < child_pb_type->ports[k].num_pins; m++) {
+                                node_index = pb_graph_node->child_pb_graph_nodes[mode_of_edge][i][j].output_pins[port_index][m].pin_count_in_cluster;
+                                if (pb_route.count(node_index) && pb_route[node_index].atom_net_id) {
+                                    is_used = true;
+                                    break;
+                                }
+                            }
+                            port_index++;
+                        }
+                    }
+                    bool legal = check_clustering_xml_open_block(block_node, type, pb_graph_pin_lookup_from_index_by_type,
+                                              &pb_graph_node->child_pb_graph_nodes[mode_of_edge][i][j],
+                                              j, is_used, pb_route);
+                    if (!legal) {
+                      return false;
+                    }
+                }
+            }
+        }
+    }
+
+    return true; // everything is fine
+}
+
 /* outputs a block that is used (i.e. has configuration) and all of its child blocks */
 static void clustering_xml_block(pugi::xml_node parent_node, t_logical_block_type_ptr type, const IntraLbPbPinLookup& pb_graph_pin_lookup_from_index_by_type, t_pb* pb, int pb_index, const t_pb_routes& pb_route) {
     int i, j, k, m;
@@ -559,6 +730,256 @@ static void clustering_xml_block(pugi::xml_node parent_node, t_logical_block_typ
     }
 }
 
+/* outputs a block that is used (i.e. has configuration) and all of its child blocks */
+static bool check_clustering_xml_block(pugi::xml_node parent_node, t_logical_block_type_ptr type, const IntraLbPbPinLookup& pb_graph_pin_lookup_from_index_by_type, t_pb* pb, int pb_index, const t_pb_routes& pb_route) {
+    int i, j, k, m;
+    const t_pb_type *pb_type, *child_pb_type;
+    t_pb_graph_node* pb_graph_node;
+    t_mode* mode;
+    int port_index, node_index;
+    bool is_used;
+
+    pb_type = pb->pb_graph_node->pb_type;
+    pb_graph_node = pb->pb_graph_node;
+    mode = &pb_type->modes[pb->mode];
+
+    pugi::xml_node block_node = parent_node.append_child("block");
+    block_node.append_attribute("name") = pb->name;
+    block_node.append_attribute("instance") = vtr::string_fmt("%s[%d]", pb_type->name, pb_index).c_str();
+
+    if (pb_type->num_modes > 0) {
+        block_node.append_attribute("mode") = mode->name;
+    } else {
+        const auto& atom_ctx = g_vpr_ctx.atom();
+        AtomBlockId atom_blk = atom_ctx.nlist.find_block(pb->name);
+        VTR_ASSERT(atom_blk);
+
+        pugi::xml_node attrs_node = block_node.append_child("attributes");
+        for (const auto& attr : atom_ctx.nlist.block_attrs(atom_blk)) {
+            pugi::xml_node attr_node = attrs_node.append_child("attribute");
+            attr_node.append_attribute("name") = attr.first.c_str();
+            attr_node.text().set(attr.second.c_str());
+        }
+
+        pugi::xml_node params_node = block_node.append_child("parameters");
+        for (const auto& param : atom_ctx.nlist.block_params(atom_blk)) {
+            pugi::xml_node param_node = params_node.append_child("parameter");
+            param_node.append_attribute("name") = param.first.c_str();
+            param_node.text().set(param.second.c_str());
+        }
+    }
+
+    pugi::xml_node inputs_node = block_node.append_child("inputs");
+
+    port_index = 0;
+    for (i = 0; i < pb_type->num_ports; i++) {
+        if (!pb_type->ports[i].is_clock && pb_type->ports[i].type == IN_PORT) {
+            pugi::xml_node port_node = inputs_node.append_child("port");
+            port_node.append_attribute("name") = pb_graph_node->pb_type->ports[i].name;
+
+            std::vector<std::string> pins;
+            for (j = 0; j < pb_type->ports[i].num_pins; j++) {
+                node_index = pb->pb_graph_node->input_pins[port_index][j].pin_count_in_cluster;
+
+                if (pb_type->parent_mode == nullptr) {
+                    if (pb_route.count(node_index)) {
+                        pins.push_back(clustering_xml_net_text(pb_route[node_index].atom_net_id));
+                    } else {
+                        pins.push_back(clustering_xml_net_text(AtomNetId::INVALID()));
+                    }
+                } else {
+                    pins.push_back(clustering_xml_interconnect_text(type, pb_graph_pin_lookup_from_index_by_type, node_index, pb_route));
+                }
+            }
+            port_node.text().set(vtr::join(pins.begin(), pins.end(), " ").c_str());
+
+            //The cluster router may have rotated equivalent pins (e.g. LUT inputs),
+            //record the resulting rotation here so it can be unambigously mapped
+            //back to the atom netlist
+            if (pb_type->ports[i].equivalent != PortEquivalence::NONE && pb_type->parent_mode != nullptr && pb_type->num_modes == 0) {
+                //This is a primitive with equivalent inputs
+
+                auto& atom_ctx = g_vpr_ctx.atom();
+                AtomBlockId atom_blk = atom_ctx.nlist.find_block(pb->name);
+                VTR_ASSERT(atom_blk);
+
+                AtomPortId atom_port = atom_ctx.nlist.find_atom_port(atom_blk, pb_type->ports[i].model_port);
+
+                if (atom_port) { //Port exists (some LUTs may have no input and hence no port in the atom netlist)
+
+                    pugi::xml_node port_rotation_node = inputs_node.append_child("port_rotation_map");
+                    port_rotation_node.append_attribute("name") = pb_graph_node->pb_type->ports[i].name;
+
+                    std::set<AtomPinId> recorded_pins;
+                    std::vector<std::string> pin_map_list;
+
+                    for (j = 0; j < pb_type->ports[i].num_pins; j++) {
+                        node_index = pb->pb_graph_node->input_pins[port_index][j].pin_count_in_cluster;
+
+                        if (pb_route.count(node_index)) {
+                            AtomNetId atom_net = pb_route[node_index].atom_net_id;
+
+                            VTR_ASSERT(atom_net);
+
+                            //This physical pin is in use, find the original pin in the atom netlist
+                            AtomPinId orig_pin;
+                            for (AtomPinId atom_pin : atom_ctx.nlist.port_pins(atom_port)) {
+                                if (recorded_pins.count(atom_pin)) continue; //Don't add pins twice
+
+                                AtomNetId atom_pin_net = atom_ctx.nlist.pin_net(atom_pin);
+
+                                if (atom_pin_net == atom_net) {
+                                    recorded_pins.insert(atom_pin);
+                                    orig_pin = atom_pin;
+                                    break;
+                                }
+                            }
+
+                            VTR_ASSERT(orig_pin);
+                            //The physical pin j, maps to a pin in the atom netlist
+                            pin_map_list.push_back(vtr::string_fmt("%d", atom_ctx.nlist.pin_port_bit(orig_pin)));
+                        } else {
+                            //The physical pin is disconnected
+                            pin_map_list.push_back("open");
+                        }
+                    }
+                    port_rotation_node.text().set(vtr::join(pin_map_list.begin(), pin_map_list.end(), " ").c_str());
+                }
+            }
+
+            port_index++;
+        }
+    }
+
+    pugi::xml_node outputs_node = block_node.append_child("outputs");
+
+    port_index = 0;
+    for (i = 0; i < pb_type->num_ports; i++) {
+        if (pb_type->ports[i].type == OUT_PORT) {
+            VTR_ASSERT(!pb_type->ports[i].is_clock);
+
+            pugi::xml_node port_node = outputs_node.append_child("port");
+            port_node.append_attribute("name") = pb_graph_node->pb_type->ports[i].name;
+            std::vector<std::string> pins;
+            for (j = 0; j < pb_type->ports[i].num_pins; j++) {
+                node_index = pb->pb_graph_node->output_pins[port_index][j].pin_count_in_cluster;
+                pins.push_back(clustering_xml_interconnect_text(type, pb_graph_pin_lookup_from_index_by_type, node_index, pb_route));
+            }
+            port_node.text().set(vtr::join(pins.begin(), pins.end(), " ").c_str());
+            port_index++;
+        }
+    }
+
+    pugi::xml_node clock_node = block_node.append_child("clocks");
+
+    port_index = 0;
+    for (i = 0; i < pb_type->num_ports; i++) {
+        if (pb_type->ports[i].is_clock && pb_type->ports[i].type == IN_PORT) {
+            pugi::xml_node port_node = clock_node.append_child("port");
+            port_node.append_attribute("name") = pb_graph_node->pb_type->ports[i].name;
+
+            std::vector<std::string> pins;
+            for (j = 0; j < pb_type->ports[i].num_pins; j++) {
+                node_index = pb->pb_graph_node->clock_pins[port_index][j].pin_count_in_cluster;
+                if (pb_type->parent_mode == nullptr) {
+                    if (pb_route.count(node_index)) {
+                        pins.push_back(clustering_xml_net_text(pb_route[node_index].atom_net_id));
+                    } else {
+                        pins.push_back(clustering_xml_net_text(AtomNetId::INVALID()));
+                    }
+                } else {
+                    pins.push_back(clustering_xml_interconnect_text(type, pb_graph_pin_lookup_from_index_by_type, node_index, pb_route));
+                }
+            }
+            port_node.text().set(vtr::join(pins.begin(), pins.end(), " ").c_str());
+            port_index++;
+        }
+    }
+
+    if (pb_type->num_modes > 0) {
+        for (i = 0; i < mode->num_pb_type_children; i++) {
+            for (j = 0; j < mode->pb_type_children[i].num_pb; j++) {
+                /* If child pb is not used but routing is used, I must print things differently */
+                if ((pb->child_pbs[i] != nullptr) && (pb->child_pbs[i][j].name != nullptr)) {
+                    bool legal = check_clustering_xml_block(block_node, type, pb_graph_pin_lookup_from_index_by_type, &pb->child_pbs[i][j], j, pb_route);
+                    if (!legal) {
+                       return false;
+                    }
+                } else {
+                    is_used = false;
+                    child_pb_type = &mode->pb_type_children[i];
+                    port_index = 0;
+
+                    for (k = 0; k < child_pb_type->num_ports && !is_used; k++) {
+                        if (child_pb_type->ports[k].type == OUT_PORT) {
+                            for (m = 0; m < child_pb_type->ports[k].num_pins; m++) {
+                                node_index = pb_graph_node->child_pb_graph_nodes[pb->mode][i][j].output_pins[port_index][m].pin_count_in_cluster;
+                                if (pb_route.count(node_index) && pb_route[node_index].atom_net_id) {
+                                    is_used = true;
+                                    break;
+                                }
+                            }
+                            port_index++;
+                        }
+                    }
+                    bool legal = check_clustering_xml_open_block(block_node, type, pb_graph_pin_lookup_from_index_by_type,
+                                              &pb_graph_node->child_pb_graph_nodes[pb->mode][i][j],
+                                              j, is_used, pb_route);
+                    if (!legal) {
+                      return false;
+                    }
+                }
+            }
+        }
+    }
+
+    return true;
+}
+
+bool check_output_clustering(const vtr::vector<ClusterBlockId, std::vector<t_intra_lb_net>*>& intra_lb_routing, 
+                             const std::string& architecture_id, const char* out_fname) {
+
+    auto& device_ctx = g_vpr_ctx.device();
+    auto& atom_ctx = g_vpr_ctx.atom();
+    auto& cluster_ctx = g_vpr_ctx.mutable_clustering();
+
+    // work around : pick up the last block ID to check only the last created block
+    //
+    ClusterBlockId last_id;
+    for (auto blk_id : cluster_ctx.clb_nlist.blocks()) {
+           last_id = blk_id;
+    }
+
+    if (!intra_lb_routing.empty()) {
+      cluster_ctx.clb_nlist.block_pb(last_id)->pb_route = alloc_and_load_pb_route(intra_lb_routing[last_id], 
+                                                                         cluster_ctx.clb_nlist.block_pb(last_id)->pb_graph_node);
+    }
+
+    IntraLbPbPinLookup pb_graph_pin_lookup_from_index_by_type(device_ctx.logical_block_types);
+
+    pugi::xml_document out_xml;
+
+    pugi::xml_node block_node = out_xml.append_child("block");
+    block_node.append_attribute("name") = out_fname;
+    block_node.append_attribute("instance") = "FPGA_packed_netlist[0]";
+    block_node.append_attribute("architecture_id") = architecture_id.c_str();
+    block_node.append_attribute("atom_netlist_id") = atom_ctx.nlist.netlist_id().c_str();
+
+
+    // Check only the last_id block
+    //
+    if (!check_clustering_xml_block(block_node, cluster_ctx.clb_nlist.block_type(last_id), pb_graph_pin_lookup_from_index_by_type, 
+                             cluster_ctx.clb_nlist.block_pb(last_id), size_t(last_id), cluster_ctx.clb_nlist.block_pb(last_id)->pb_route)) {
+           return false;
+    }
+
+    if (!intra_lb_routing.empty()) {
+      cluster_ctx.clb_nlist.block_pb(last_id)->pb_route.clear();
+    }
+
+    return true; // check was legal
+}
+
 /* This routine dumps out the output netlist in a format suitable for  *
  * input to vpr. This routine also dumps out the internal structure of *
  * the cluster, in essentially a graph based format.                   */
diff --git a/vpr/src/pack/output_clustering.h b/vpr/src/pack/output_clustering.h
index 509690e4934..7b1819a179f 100644
--- a/vpr/src/pack/output_clustering.h
+++ b/vpr/src/pack/output_clustering.h
@@ -9,4 +9,8 @@ void output_clustering(const vtr::vector<ClusterBlockId, std::vector<t_intra_lb_
 
 void write_packing_results_to_xml(const bool& global_clocks, const std::string& architecture_id, const char* out_fname);
 
+bool check_output_clustering(const vtr::vector<ClusterBlockId, std::vector<t_intra_lb_net>*>& intra_lb_routing, const std::string& architecture_id, const char* out_fname);
+
+void print_stats();
+
 #endif