From c90b6819c85aa2fa7700ce4d008622544a39a7dd Mon Sep 17 00:00:00 2001
From: Krzysztof Bieganski <kbieganski@antmicro.com>
Date: Fri, 7 Jul 2023 18:33:24 +0200
Subject: [PATCH] Format preprocessed token stream in multiple passes

Currently, the formatter doesn't handle many scenarios involving preprocessor
`ifdef`s/`endif`s interleaved with `begin`s, module headers, etc.
This patch attempts to solve this problem by performing multiple passes of the
formatting on preprocessed variants of the source. Each of these variants has a
different set of preprocessor branches enabled. Together, they should cover the
entire source (though that doesn't work in all cases yet). After several
formatting passes for different variants of the AST, a correct and properly
formatted file is produced.

Signed-off-by: Krzysztof Bieganski <kbieganski@antmicro.com>
---
 common/formatting/tree_unwrapper.cc           |  18 +-
 common/formatting/tree_unwrapper.h            |   6 +-
 common/text/text_structure.cc                 |  25 ++
 common/text/text_structure.h                  |   9 +
 common/text/token_info.h                      |   5 +
 verilog/CST/expression_test.cc                |   2 +-
 verilog/analysis/BUILD                        |   3 +
 verilog/analysis/extractors_test.cc           |   2 +-
 verilog/analysis/flow_tree.cc                 |  84 +++-
 verilog/analysis/flow_tree.h                  |  13 +
 verilog/analysis/verilog_analyzer.cc          |  16 +-
 verilog/analysis/verilog_analyzer_test.cc     |   6 +-
 verilog/analysis/verilog_project.cc           |   2 +-
 verilog/formatting/BUILD                      |   1 +
 verilog/formatting/align.cc                   |  59 ++-
 verilog/formatting/formatter.cc               | 405 ++++++++++++------
 verilog/formatting/formatter.h                |  16 +-
 verilog/formatting/formatter_test.cc          | 363 +++++++++++++++-
 verilog/formatting/token_annotator.cc         |  14 +
 verilog/formatting/tree_unwrapper.cc          | 154 +++++--
 verilog/formatting/tree_unwrapper.h           |   4 +-
 verilog/formatting/tree_unwrapper_test.cc     |  51 ++-
 verilog/preprocessor/verilog_preprocess.cc    |  24 +-
 verilog/preprocessor/verilog_preprocess.h     |  11 +-
 .../preprocessor/verilog_preprocess_test.cc   |  26 +-
 verilog/tools/formatter/verilog_format.cc     |  11 +-
 .../preprocessor/verilog_preprocessor.cc      |   7 +-
 27 files changed, 1054 insertions(+), 283 deletions(-)
diff --git a/common/formatting/tree_unwrapper.cc b/common/formatting/tree_unwrapper.cc
index a1e46d0cf6..6ba6144801 100644
--- a/common/formatting/tree_unwrapper.cc
+++ b/common/formatting/tree_unwrapper.cc
@@ -70,7 +70,21 @@ TreeUnwrapper::TreeUnwrapper(
   // array, and be able to 'extend' into the array of preformatted_tokens_.
 }
 
-const TokenPartitionTree* TreeUnwrapper::Unwrap() {
+TreeUnwrapper::TreeUnwrapper(TreeUnwrapper&& other) noexcept
+    : TreeContextVisitor(std::move(other)),
+      text_structure_view_(other.text_structure_view_),
+      preformatted_tokens_(other.preformatted_tokens_),
+      next_unfiltered_token_(other.next_unfiltered_token_),
+      current_indentation_spaces_(other.current_indentation_spaces_),
+      unwrapped_lines_(std::move(other.unwrapped_lines_)) {
+  if (other.active_unwrapped_lines_ == &other.unwrapped_lines_) {
+    active_unwrapped_lines_ = &unwrapped_lines_;
+  } else {
+    active_unwrapped_lines_ = other.active_unwrapped_lines_;
+  }
+}
+
+void TreeUnwrapper::Unwrap() {
   // Collect tokens that appear before first syntax tree leaf, e.g. comments.
   CollectLeadingFilteredTokens();
 
@@ -105,8 +119,6 @@ const TokenPartitionTree* TreeUnwrapper::Unwrap() {
   }
 
   VerifyFullTreeFormatTokenRanges();
-
-  return &unwrapped_lines_;
 }
 
 std::vector<UnwrappedLine> TreeUnwrapper::FullyPartitionedUnwrappedLines()
diff --git a/common/formatting/tree_unwrapper.h b/common/formatting/tree_unwrapper.h
index 9e0db40b46..9008dd7e19 100644
--- a/common/formatting/tree_unwrapper.h
+++ b/common/formatting/tree_unwrapper.h
@@ -48,7 +48,7 @@ class TreeUnwrapper : public TreeContextVisitor {
   // Deleted standard interfaces:
   TreeUnwrapper() = delete;
   TreeUnwrapper(const TreeUnwrapper&) = delete;
-  TreeUnwrapper(TreeUnwrapper&&) = delete;
+  TreeUnwrapper(TreeUnwrapper&&) noexcept;
   TreeUnwrapper& operator=(const TreeUnwrapper&) = delete;
   TreeUnwrapper& operator=(TreeUnwrapper&&) = delete;
 
@@ -57,7 +57,7 @@ class TreeUnwrapper : public TreeContextVisitor {
   // Partitions the token stream (in text_structure_view_) into
   // unwrapped_lines_ by traversing the syntax tree representation.
   // TODO(fangism): rename this Partition.
-  const TokenPartitionTree* Unwrap();
+  void Unwrap();
 
   // Returns a flattened copy of all of the deepest nodes in the tree of
   // unwrapped lines, which represents maximal partitioning into the smallest
@@ -81,6 +81,8 @@ class TreeUnwrapper : public TreeContextVisitor {
     return active_unwrapped_lines_;
   }
 
+  TokenPartitionTree* UnwrappedLines() { return &unwrapped_lines_; }
+
   // Returns text spanned by the syntax tree being traversed.
   absl::string_view FullText() const { return text_structure_view_.Contents(); }
 
diff --git a/common/text/text_structure.cc b/common/text/text_structure.cc
index 9ce7949810..c284d2d696 100644
--- a/common/text/text_structure.cc
+++ b/common/text/text_structure.cc
@@ -525,6 +525,14 @@ TextStructure::~TextStructure() {
   CHECK(status.ok()) << status.message() << " (in dtor)";
 }
 
+void TextStructure::RebaseTokens(const TextStructure& other) {
+  CHECK_EQ(contents_->AsStringView().length(),
+           other.contents_->AsStringView().length());
+  MutableData().RebaseTokensToSuperstring(other.contents_->AsStringView(),
+                                          contents_->AsStringView(), 0);
+  contents_ = other.contents_;
+}
+
 void TextStructureView::ExpandSubtrees(NodeExpansionMap* expansions) {
   TokenSequence combined_tokens;
   // Gather indices and reconstruct iterators after there are no more
@@ -560,6 +568,23 @@ void TextStructureView::ExpandSubtrees(NodeExpansionMap* expansions) {
   CalculateFirstTokensPerLine();
 }
 
+void TextStructureView::ColorStreamViewTokens(size_t color) {
+  auto view_it = GetTokenStreamView().begin();
+  auto stream_it = MutableTokenStream().begin();
+  while (view_it != GetTokenStreamView().end() &&
+         stream_it != MutableTokenStream().end()) {
+    if (*view_it == stream_it) {
+      stream_it->color = color;
+      ++view_it;
+      ++stream_it;
+    } else if (*view_it < stream_it) {
+      ++view_it;
+    } else {
+      ++stream_it;
+    }
+  }
+}
+
 absl::Status TextStructure::StringViewConsistencyCheck() const {
   const absl::string_view contents = data_.Contents();
   if (!contents.empty() && !IsSubRange(contents, contents_->AsStringView())) {
diff --git a/common/text/text_structure.h b/common/text/text_structure.h
index 816ef24247..d4ad073076 100644
--- a/common/text/text_structure.h
+++ b/common/text/text_structure.h
@@ -79,6 +79,7 @@ class TextStructureView {
 
   // Do not copy/assign.  This contains pointers/iterators to internals.
   TextStructureView(const TextStructureView&) = delete;
+  TextStructureView(TextStructureView&&) = default;
   TextStructureView& operator=(const TextStructureView&) = delete;
 
   absl::string_view Contents() const { return contents_; }
@@ -172,6 +173,9 @@ class TextStructureView {
   // by this function.
   void ExpandSubtrees(NodeExpansionMap* expansions);
 
+  // Sets the 'color' of all tokens in the token stream view.
+  void ColorStreamViewTokens(size_t color);
+
   // All of this class's consistency checks combined.
   absl::Status InternalConsistencyCheck() const;
 
@@ -283,6 +287,11 @@ class TextStructure {
   // DeferredExpansion::subanalysis requires this destructor to be virtual.
   virtual ~TextStructure();
 
+  // Update tokens to point their text into the contents of the given
+  // TextStructure. The contents_ pointer in this TextStructure will point to
+  // the other TextStructure's contents.
+  void RebaseTokens(const TextStructure& other);
+
   const TextStructureView& Data() const { return data_; }
 
   TextStructureView& MutableData() { return data_; }
diff --git a/common/text/token_info.h b/common/text/token_info.h
index 7147a677ff..4c497a3772 100644
--- a/common/text/token_info.h
+++ b/common/text/token_info.h
@@ -157,6 +157,11 @@ class TokenInfo {
 
   bool isEOF() const { return token_enum_ == TK_EOF; }
 
+  // The color is used for differentiating tokens from different preprocess
+  // variants. If color equals 0, the token is part of preprocessor-disabled
+  // code. Only used in the formatter.
+  int color = 0;  // FIXME: do this differently
+
  protected:  // protected, as ExpectedTokenInfo accesses it.
   int token_enum_;
 
diff --git a/verilog/CST/expression_test.cc b/verilog/CST/expression_test.cc
index e4d254bf0b..e683c1b2c9 100644
--- a/verilog/CST/expression_test.cc
+++ b/verilog/CST/expression_test.cc
@@ -32,7 +32,7 @@
 namespace verilog {
 namespace {
 
-static constexpr VerilogPreprocess::Config kDefaultPreprocess;
+static VerilogPreprocess::Config kDefaultPreprocess;
 
 using verible::SyntaxTreeSearchTestCase;
 using verible::TextStructureView;
diff --git a/verilog/analysis/BUILD b/verilog/analysis/BUILD
index f628ae249b..9a816a8250 100644
--- a/verilog/analysis/BUILD
+++ b/verilog/analysis/BUILD
@@ -79,10 +79,13 @@ cc_library(
     hdrs = ["flow_tree.h"],
     deps = [
         "//common/text:token-stream-view",
+        "//common/util:interval-set",
         "//common/util:logging",
         "//common/util:status-macros",
         "//verilog/parser:verilog-token-enum",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/status",
+        "@com_google_absl//absl/status:statusor",
         "@com_google_absl//absl/strings",
     ],
 )
diff --git a/verilog/analysis/extractors_test.cc b/verilog/analysis/extractors_test.cc
index 7df644efa0..ded7c77a08 100644
--- a/verilog/analysis/extractors_test.cc
+++ b/verilog/analysis/extractors_test.cc
@@ -26,7 +26,7 @@
 namespace verilog {
 namespace analysis {
 namespace {
-static constexpr VerilogPreprocess::Config kDefaultPreprocess;
+static VerilogPreprocess::Config kDefaultPreprocess;
 
 TEST(CollectInterfaceNamesTest, NonModuleTests) {
   const std::pair<absl::string_view, std::set<std::string>> kTestCases[] = {
diff --git a/verilog/analysis/flow_tree.cc b/verilog/analysis/flow_tree.cc
index 68bdf0dd55..1de10ae39c 100644
--- a/verilog/analysis/flow_tree.cc
+++ b/verilog/analysis/flow_tree.cc
@@ -21,6 +21,7 @@
 #include "absl/status/status.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/string_view.h"
+#include "common/util/interval_set.h"
 #include "common/util/logging.h"
 #include "common/util/status_macros.h"
 #include "verilog/parser/verilog_token_enum.h"
@@ -125,7 +126,7 @@ absl::Status FlowTree::MacroFollows(
       conditional_iterator->token_enum() != PP_elsif) {
     return absl::InvalidArgumentError("Error macro name can't be extracted.");
   }
-  auto macro_iterator = conditional_iterator + 1;
+  auto macro_iterator = conditional_iterator + 2;
   if (macro_iterator->token_enum() != PP_Identifier) {
     return absl::InvalidArgumentError("Expected identifier for macro name.");
   }
@@ -141,7 +142,7 @@ absl::Status FlowTree::AddMacroOfConditional(
     return absl::InvalidArgumentError(
         "Error no macro follows the conditional directive.");
   }
-  auto macro_iterator = conditional_iterator + 1;
+  auto macro_iterator = conditional_iterator + 2;
   auto macro_identifier = macro_iterator->text();
   if (conditional_macro_id_.find(macro_identifier) ==
       conditional_macro_id_.end()) {
@@ -161,7 +162,7 @@ int FlowTree::GetMacroIDOfConditional(
     // TODO(karimtera): add a better error handling.
     return -1;
   }
-  auto macro_iterator = conditional_iterator + 1;
+  auto macro_iterator = conditional_iterator + 2;
   auto macro_identifier = macro_iterator->text();
   // It is always assumed that the macro already exists in the map.
   return conditional_macro_id_[macro_identifier];
@@ -176,6 +177,83 @@ absl::Status FlowTree::GenerateVariants(const VariantReceiver &receiver) {
   return DepthFirstSearch(receiver, source_sequence_.begin());
 }
 
+absl::StatusOr<FlowTree::DefineVariants> FlowTree::MinCoverDefineVariants() {
+  auto status = GenerateControlFlowTree();
+  if (!status.ok()) return status;
+  verible::IntervalSet<int64_t> covered;       // Tokens covered by
+                                               // MinCoverDefineVariants.
+  verible::IntervalSet<int64_t> last_covered;  // Tokens covered
+                                               // by the previous iterations.
+  DefineVariants define_variants;  // The result – all define variants that
+                                   // should cover the entire source
+  DefineSet visited;  // Visited defines are ones that are assumed to be defined
+                      // or undefined (decided in a previous iteration)
+  const int64_t tok_count = static_cast<int64_t>(source_sequence_.size());
+  while (!covered.Contains({0, tok_count})) {
+    DefineSet defines;  // Define sets are moved into the define variants list,
+                        // so we make a new one each iteration
+    visited.clear();    // We keep the visited set to avoid unnecessary
+                        // allocations, but clear it each iteration
+    TokenSequenceConstIterator tok_it = source_sequence_.begin();
+    while (tok_it < source_sequence_.end()) {
+      covered.Add(std::distance(source_sequence_.begin(), tok_it));
+      if (tok_it->token_enum() == PP_ifdef ||
+          tok_it->token_enum() == PP_ifndef ||
+          tok_it->token_enum() == PP_elsif) {
+        const auto macro_id_it = tok_it + 2;
+        auto macro_text = macro_id_it->text();
+        bool negated = tok_it->token_enum() == PP_ifndef;
+        // If this macro was already visited (either defined/undefined), we
+        // to stick to the same branch. TODO: handle `defines
+        if (visited.contains(macro_text)) {
+          bool assume_condition_is_true =
+              (negated ^ defines.contains(macro_text));
+          tok_it = edges_[tok_it][assume_condition_is_true ? 0 : 1];
+        } else {
+          // First time we see this macro; mark as visited
+          visited.insert(macro_text);
+          const auto if_it = edges_[tok_it][0];
+          const auto if_idx = std::distance(source_sequence_.begin(), if_it);
+          const auto else_it = edges_[tok_it][1];
+          const auto else_idx =
+              std::distance(source_sequence_.begin(), else_it);
+          if (!covered.Contains({if_idx, else_idx})) {
+            // If the `ifdef is not covered, we assume the condition is true
+            if (!negated) defines.insert(macro_text);
+            tok_it = if_it;
+          } else {
+            // Else we assume the condition is false
+            if (negated) defines.insert(macro_text);
+            tok_it = else_it;
+          }
+        }
+      } else {
+        const auto it = edges_.find(tok_it);
+        if (it == edges_.end() || it->second.empty()) {
+          // If there's no outgoing edge, just move to the next token.
+          tok_it++;
+        } else {
+          // Else jump
+          tok_it = edges_[tok_it][0];
+        }
+      }
+    }
+    define_variants.push_back(std::move(defines));
+    // To prevent an infinite loop, if nothing new was covered, break.
+    if (last_covered == covered) {
+      // TODO: If there are nested `ifdefs that contradict each other early in
+      // the source, this will prevent us from traversing the rest of the flow
+      // tree. It would be better to detect this case, assume that the
+      // contradicting part is covered, and continue the analysis.
+      VLOG(4) << "Giving up on finding all define variants";
+      break;  // Perhaps we should error?
+    }
+    last_covered = covered;
+  }
+  VLOG(4) << "Done generating define variants. Coverage: " << covered;
+  return define_variants;
+}
+
 // Constructs the control flow tree, which determines the edge from each node
 // (token index) to the next possible childs, And save edge_from_iterator in
 // edges_.
diff --git a/verilog/analysis/flow_tree.h b/verilog/analysis/flow_tree.h
index c5ed13a8e3..848ebfef5d 100644
--- a/verilog/analysis/flow_tree.h
+++ b/verilog/analysis/flow_tree.h
@@ -20,7 +20,9 @@
 #include <string>
 #include <vector>
 
+#include "absl/container/flat_hash_set.h"
 #include "absl/status/status.h"
+#include "absl/status/statusor.h"
 #include "common/text/token_stream_view.h"
 
 namespace verilog {
@@ -80,6 +82,17 @@ class FlowTree {
   // Generates all possible variants.
   absl::Status GenerateVariants(const VariantReceiver &receiver);
 
+  // Set of macro name defines.
+  using DefineSet = absl::flat_hash_set<absl::string_view>;
+
+  // A list of macro name sets; each set represents a variant of the source;
+  // together they should cover the entire source.
+  using DefineVariants = std::vector<DefineSet>;
+
+  // Returns the minimum set of defines needed to generate token stream variants
+  // that cover the entire source.
+  absl::StatusOr<DefineVariants> MinCoverDefineVariants();
+
   // Returns all the used macros in conditionals, ordered with the same ID as
   // used in BitSets.
   const std::vector<TokenSequenceConstIterator> &GetUsedMacros() {
diff --git a/verilog/analysis/verilog_analyzer.cc b/verilog/analysis/verilog_analyzer.cc
index cbb0ccb861..696b4a3449 100644
--- a/verilog/analysis/verilog_analyzer.cc
+++ b/verilog/analysis/verilog_analyzer.cc
@@ -248,17 +248,13 @@ absl::Status VerilogAnalyzer::Analyze() {
   // Lex into tokens.
   RETURN_IF_ERROR(Tokenize());
 
-  // Here would be one place to analyze the raw token stream.
-  FilterTokensForSyntaxTree();
-
-  // Disambiguate tokens using lexical context.
-  ContextualizeTokens();
-
   // pseudo-preprocess token stream.
   //   Not all analyses will want to preprocess.
   {
     VerilogPreprocess preprocessor(preprocess_config_);
-    preprocessor_data_ = preprocessor.ScanStream(Data().GetTokenStreamView());
+    verible::TokenStreamView lexed_streamview;
+    InitTokenStreamView(Data().TokenStream(), &lexed_streamview);
+    preprocessor_data_ = preprocessor.ScanStream(lexed_streamview);
     if (!preprocessor_data_.errors.empty()) {
       for (const auto& error : preprocessor_data_.errors) {
         rejected_tokens_.push_back(verible::RejectedToken{
@@ -288,6 +284,12 @@ absl::Status VerilogAnalyzer::Analyze() {
     // TODO(fangism): could we just move, swap, or directly reference?
   }
 
+  // Here would be one place to analyze the raw token stream.
+  FilterTokensForSyntaxTree();
+
+  // Disambiguate tokens using lexical context.
+  ContextualizeTokens();
+
   auto generator = MakeTokenViewer(Data().GetTokenStreamView());
   VerilogParser parser(&generator, filename_);
   parse_status_ = FileAnalyzer::Parse(&parser);
diff --git a/verilog/analysis/verilog_analyzer_test.cc b/verilog/analysis/verilog_analyzer_test.cc
index a56f5ec982..1da34b33c0 100644
--- a/verilog/analysis/verilog_analyzer_test.cc
+++ b/verilog/analysis/verilog_analyzer_test.cc
@@ -62,7 +62,7 @@ using verible::SyntaxTreeLeaf;
 using verible::TokenInfo;
 using verible::TokenInfoTestData;
 
-static constexpr verilog::VerilogPreprocess::Config kDefaultPreprocess;
+static verilog::VerilogPreprocess::Config kDefaultPreprocess;
 
 bool TreeContainsToken(const ConcreteSyntaxTree& tree, const TokenInfo& token) {
   const auto* matching_leaf =
@@ -509,10 +509,10 @@ TEST(AnalyzeVerilogAutomaticMode, InferredModuleBodyMode) {
 }
 
 TEST(AnalyzeVerilogAutomaticMode, AutomaticWithFallback) {
-  static constexpr verilog::VerilogPreprocess::Config kNoBranchFilter{
+  static verilog::VerilogPreprocess::Config kNoBranchFilter{
       .filter_branches = false,
   };
-  static constexpr verilog::VerilogPreprocess::Config kWithBranchFilter{
+  static verilog::VerilogPreprocess::Config kWithBranchFilter{
       .filter_branches = true,
   };
 
diff --git a/verilog/analysis/verilog_project.cc b/verilog/analysis/verilog_project.cc
index 43b30f6e7a..ef21435d48 100644
--- a/verilog/analysis/verilog_project.cc
+++ b/verilog/analysis/verilog_project.cc
@@ -36,7 +36,7 @@ namespace verilog {
 // All files we process with the verilog project, essentially applications that
 // build a symbol table (project-tool, kythe-indexer) only benefit from
 // processing the same sequence of tokens a synthesis tool sees.
-static constexpr verilog::VerilogPreprocess::Config kPreprocessConfig{
+static verilog::VerilogPreprocess::Config kPreprocessConfig{
     .filter_branches = true,
 };
 
diff --git a/verilog/formatting/BUILD b/verilog/formatting/BUILD
index b105c9e9ab..b9050fb2b8 100644
--- a/verilog/formatting/BUILD
+++ b/verilog/formatting/BUILD
@@ -163,6 +163,7 @@ cc_library(
         "//common/util:vector-tree-iterators",
         "//verilog/CST:declaration",
         "//verilog/CST:module",
+        "//verilog/analysis:flow-tree",
         "//verilog/analysis:verilog-analyzer",
         "//verilog/analysis:verilog-equivalence",
         "//verilog/parser:verilog-token-enum",
diff --git a/verilog/formatting/align.cc b/verilog/formatting/align.cc
index 5997e5caea..737adedcee 100644
--- a/verilog/formatting/align.cc
+++ b/verilog/formatting/align.cc
@@ -584,13 +584,22 @@ enum class AlignableSyntaxSubtype {
   kDistItem,  // Distribution items.
 };
 
+int EncodeAlignableSyntaxSubtype(AlignableSyntaxSubtype subtype, int color) {
+  return color << 16 | static_cast<int>(subtype);
+}
+
+AlignableSyntaxSubtype DecodeAlignableSyntaxSubtype(int encoded) {
+  return static_cast<AlignableSyntaxSubtype>(encoded & 0xFF);
+}
+
 static AlignedPartitionClassification AlignClassify(
     AlignmentGroupAction match,
-    AlignableSyntaxSubtype subtype = AlignableSyntaxSubtype::kDontCare) {
+    AlignableSyntaxSubtype subtype = AlignableSyntaxSubtype::kDontCare,
+    int color = 0) {
   if (match == AlignmentGroupAction::kMatch) {
     CHECK(subtype != AlignableSyntaxSubtype::kDontCare);
   }
-  return {match, static_cast<int>(subtype)};
+  return {match, EncodeAlignableSyntaxSubtype(subtype, color)};
 }
 
 static std::vector<TaggedTokenPartitionRange> GetConsecutiveModuleItemGroups(
@@ -609,13 +618,17 @@ static std::vector<TaggedTokenPartitionRange> GetConsecutiveModuleItemGroups(
         const SyntaxTreeNode& node = verible::SymbolCastToNode(*origin);
         // Align net/variable declarations.
         if (IsAlignableDeclaration(node)) {
-          return AlignClassify(AlignmentGroupAction::kMatch,
-                               AlignableSyntaxSubtype::kDataDeclaration);
+          return AlignClassify(
+              AlignmentGroupAction::kMatch,
+              AlignableSyntaxSubtype::kDataDeclaration,
+              partition.Value().TokensRange().front().token->color);
         }
         // Align continuous assignment, like "assign foo = bar;"
         if (node.MatchesTag(NodeEnum::kContinuousAssignmentStatement)) {
-          return AlignClassify(AlignmentGroupAction::kMatch,
-                               AlignableSyntaxSubtype::kContinuousAssignment);
+          return AlignClassify(
+              AlignmentGroupAction::kMatch,
+              AlignableSyntaxSubtype::kContinuousAssignment,
+              partition.Value().TokensRange().front().token->color);
         }
         return AlignClassify(AlignmentGroupAction::kNoMatch);
       });
@@ -636,10 +649,11 @@ static std::vector<TaggedTokenPartitionRange> GetConsecutiveClassItemGroups(
         }
         const SyntaxTreeNode& node = verible::SymbolCastToNode(*origin);
         // Align class member variables.
-        return AlignClassify(IsAlignableDeclaration(node)
-                                 ? AlignmentGroupAction::kMatch
-                                 : AlignmentGroupAction::kNoMatch,
-                             AlignableSyntaxSubtype::kClassMemberVariables);
+        return AlignClassify(
+            IsAlignableDeclaration(node) ? AlignmentGroupAction::kMatch
+                                         : AlignmentGroupAction::kNoMatch,
+            AlignableSyntaxSubtype::kClassMemberVariables,
+            partition.Value().TokensRange().front().token->color);
       });
 }
 
@@ -659,19 +673,25 @@ static std::vector<TaggedTokenPartitionRange> GetAlignableStatementGroups(
         const SyntaxTreeNode& node = verible::SymbolCastToNode(*origin);
         // Align local variable declarations.
         if (IsAlignableDeclaration(node)) {
-          return AlignClassify(AlignmentGroupAction::kMatch,
-                               AlignableSyntaxSubtype::kDataDeclaration);
+          return AlignClassify(
+              AlignmentGroupAction::kMatch,
+              AlignableSyntaxSubtype::kDataDeclaration,
+              partition.Value().TokensRange().front().token->color);
         }
         // Align blocking assignments.
         if (node.MatchesTagAnyOf({NodeEnum::kBlockingAssignmentStatement,
                                   NodeEnum::kNetVariableAssignment})) {
-          return AlignClassify(AlignmentGroupAction::kMatch,
-                               AlignableSyntaxSubtype::kBlockingAssignment);
+          return AlignClassify(
+              AlignmentGroupAction::kMatch,
+              AlignableSyntaxSubtype::kBlockingAssignment,
+              partition.Value().TokensRange().front().token->color);
         }
         // Align nonblocking assignments.
         if (node.MatchesTag(NodeEnum::kNonblockingAssignmentStatement)) {
-          return AlignClassify(AlignmentGroupAction::kMatch,
-                               AlignableSyntaxSubtype::kNonBlockingAssignment);
+          return AlignClassify(
+              AlignmentGroupAction::kMatch,
+              AlignableSyntaxSubtype::kNonBlockingAssignment,
+              partition.Value().TokensRange().front().token->color);
         }
         return AlignClassify(AlignmentGroupAction::kNoMatch);
       });
@@ -1416,7 +1436,7 @@ static const AlignmentHandlerMapType& AlignmentHandlerLibrary() {
 static verible::AlignmentCellScannerFunction AlignmentColumnScannerSelector(
     const FormatStyle& vstyle, int subtype) {
   static const auto& handler_map = AlignmentHandlerLibrary();
-  const auto iter = handler_map.find(AlignableSyntaxSubtype(subtype));
+  const auto iter = handler_map.find(DecodeAlignableSyntaxSubtype(subtype));
   CHECK(iter != handler_map.end()) << "subtype: " << subtype;
   return iter->second.column_scanner_func(vstyle);
 }
@@ -1424,8 +1444,8 @@ static verible::AlignmentCellScannerFunction AlignmentColumnScannerSelector(
 static verible::AlignmentPolicy AlignmentPolicySelector(
     const FormatStyle& vstyle, int subtype) {
   static const auto& handler_map = AlignmentHandlerLibrary();
-  const auto iter = handler_map.find(AlignableSyntaxSubtype(subtype));
-  CHECK(iter != handler_map.end()) << "subtype: " << subtype;
+  const auto iter = handler_map.find(DecodeAlignableSyntaxSubtype(subtype));
+  CHECK(iter != handler_map.end()) << "subtype: " << (0xFF & subtype);
   return iter->second.policy_func(vstyle);
 }
 
@@ -1580,6 +1600,7 @@ void TabularAlignTokenPartitions(const FormatStyle& style,
   const ExtractAlignmentGroupsFunction extract_alignment_groups =
       std::bind(alignment_partitioner, std::placeholders::_1, style);
 
+  VLOG(4) << "===> " << partition.Value() << std::endl;
   verible::TabularAlignTokens(style.column_limit, full_text,
                               disabled_byte_ranges, extract_alignment_groups,
                               &partition);
diff --git a/verilog/formatting/formatter.cc b/verilog/formatting/formatter.cc
index dafe89a799..ee2a4469d8 100644
--- a/verilog/formatting/formatter.cc
+++ b/verilog/formatting/formatter.cc
@@ -20,6 +20,7 @@
 #include <iterator>
 #include <vector>
 
+#include "absl/algorithm/container.h"
 #include "absl/status/status.h"
 #include "absl/status/statusor.h"
 #include "common/formatting/format_token.h"
@@ -46,6 +47,7 @@
 #include "common/util/vector_tree_iterators.h"
 #include "verilog/CST/declaration.h"
 #include "verilog/CST/module.h"
+#include "verilog/analysis/flow_tree.h"
 #include "verilog/analysis/verilog_analyzer.h"
 #include "verilog/analysis/verilog_equivalence.h"
 #include "verilog/formatting/align.h"
@@ -53,8 +55,9 @@
 #include "verilog/formatting/format_style.h"
 #include "verilog/formatting/token_annotator.h"
 #include "verilog/formatting/tree_unwrapper.h"
+#include "verilog/parser/verilog_lexer.h"
+#include "verilog/parser/verilog_token_classifications.h"
 #include "verilog/parser/verilog_token_enum.h"
-#include "verilog/preprocessor/verilog_preprocess.h"
 
 namespace verilog {
 namespace formatter {
@@ -73,32 +76,40 @@ using verible::VectorTreeLeavesIterator;
 
 using partition_node_type = VectorTree<TreeViewNodeInfo<TokenPartitionTree>>;
 
+using PreprocessConfigVariants = std::vector<VerilogPreprocess::Config>;
+
 // Takes a TextStructureView and FormatStyle, and formats UnwrappedLines.
 class Formatter {
  public:
-  Formatter(const verible::TextStructureView& text_structure,
-            const FormatStyle& style)
-      : text_structure_(text_structure), style_(style) {}
+  Formatter(const absl::string_view text, const FormatStyle& style,
+            const PreprocessConfigVariants& preproc_config_variants_ = {{}})
+      : text_(text),
+        preproc_config_variants_(preproc_config_variants_),
+        style_(style) {}
 
   // Formats the source code
-  Status Format(const ExecutionControl&);
-
-  Status Format() { return Format(ExecutionControl()); }
+  Status Format(const ExecutionControl&, const verible::LineNumberSet&);
 
-  void SelectLines(const LineNumberSet& lines);
+  Status Format() { return Format(ExecutionControl(), {}); }
 
   // Outputs all of the FormattedExcerpt lines to stream.
   // If "include_disabled" is false, does not contain the disabled ranges.
   void Emit(bool include_disabled, std::ostream& stream) const;
 
  private:
-  // Contains structural information about the code to format, such as
-  // TokenSequence from lexing, and ConcreteSyntaxTree from parsing
-  const verible::TextStructureView& text_structure_;
+  // The original text to format
+  absl::string_view text_;
+
+  // Preprocessor configuration variants to use for formatting.
+  std::vector<VerilogPreprocess::Config> preproc_config_variants_;
 
   // The style configuration for the formatter
   FormatStyle style_;
 
+  // Contains structural information about the code to format, such as
+  // TokenSequence from lexing, and ConcreteSyntaxTree from parsing
+  std::vector<std::unique_ptr<verible::TextStructure>> text_structures_;
+
   // Ranges of text where formatter is disabled (by comment directives).
   ByteOffsetSet disabled_ranges_;
 
@@ -107,48 +118,47 @@ class Formatter {
 };
 
 // TODO(b/148482625): make this public/re-usable for general content comparison.
-Status VerifyFormatting(const verible::TextStructureView& text_structure,
-                        absl::string_view formatted_output,
-                        absl::string_view filename) {
+Status VerifyFormatting(
+    const absl::string_view text, const absl::string_view formatted_output,
+    absl::string_view filename,
+    const PreprocessConfigVariants& preproc_config_variants = {{}}) {
   // Verify that the formatted output creates the same lexical
   // stream (filtered) as the original.  If any tokens were lost, fall back to
   // printing the original source unformatted.
   // Note: We cannot just Tokenize() and compare because Analyze()
   // performs additional transformations like expanding MacroArgs to
   // expression subtrees.
-  const auto reanalyzer = VerilogAnalyzer::AnalyzeAutomaticMode(
-      formatted_output, filename, verilog::VerilogPreprocess::Config());
-  const auto relex_status = ABSL_DIE_IF_NULL(reanalyzer)->LexStatus();
-  const auto reparse_status = reanalyzer->ParseStatus();
-
-  if (!relex_status.ok() || !reparse_status.ok()) {
-    const auto& token_errors = reanalyzer->TokenErrorMessages();
-    // Only print the first error.
-    if (!token_errors.empty()) {
-      return absl::DataLossError(
-          absl::StrCat("Error lex/parsing-ing formatted output.  "
-                       "Please file a bug.\nFirst error: ",
-                       token_errors.front()));
+  for (const VerilogPreprocess::Config& config : preproc_config_variants) {
+    {
+      const auto reanalyzer = VerilogAnalyzer::AnalyzeAutomaticMode(
+          formatted_output, filename, config);
+      const auto relex_status = ABSL_DIE_IF_NULL(reanalyzer)->LexStatus();
+      const auto reparse_status = reanalyzer->ParseStatus();
+
+      if (!relex_status.ok() || !reparse_status.ok()) {
+        const auto& token_errors = reanalyzer->TokenErrorMessages();
+        // Only print the first error.
+        if (!token_errors.empty()) {
+          return absl::DataLossError(
+              absl::StrCat("Error lexing/parsing formatted output.  "
+                           "Please file a bug.\nFirst error: ",
+                           token_errors.front()));
+        }
+      }
     }
-  }
 
-  {
-    // Filter out only whitespaces and compare.
-    // First difference will be printed to cerr for debugging.
-    std::ostringstream errstream;
-    // Note: text_structure.TokenStream() and reanalyzer->Data().TokenStream()
-    // contain already lexed tokens, so this comparison check is repeating the
-    // work done by the lexers.
-    // Should performance be a concern, we could pass in those tokens to
-    // avoid lexing twice, but for now, using plain strings as an interface
-    // to comparator functions is simpler and more intuitive.
-    // See analysis/verilog_equivalence.cc implementation.
-    if (verilog::FormatEquivalent(text_structure.Contents(), formatted_output,
-                                  &errstream) != DiffStatus::kEquivalent) {
-      return absl::DataLossError(absl::StrCat(
-          "Formatted output is lexically different from the input.    "
-          "Please file a bug.  Details:\n",
-          errstream.str()));
+    {
+      // Filter out only whitespaces and compare.
+      // First difference will be printed to cerr for debugging.
+      std::ostringstream errstream;
+      // See analysis/verilog_equivalence.cc implementation.
+      if (verilog::FormatEquivalent(text, formatted_output, &errstream) !=
+          DiffStatus::kEquivalent) {
+        return absl::DataLossError(absl::StrCat(
+            "Formatted output is lexically different from the input.    "
+            "Please file a bug.  Details:\n",
+            errstream.str()));
+      }
     }
   }
 
@@ -160,7 +170,8 @@ static Status ReformatVerilogIncrementally(absl::string_view original_text,
                                            absl::string_view filename,
                                            const FormatStyle& style,
                                            std::ostream& reformat_stream,
-                                           const ExecutionControl& control) {
+                                           const ExecutionControl& control,
+                                           bool preprocess) {
   // Differences from the first formatting.
   const verible::LineDiffs formatting_diffs(original_text, formatted_text);
   // Added lines will be re-applied to incremental re-formatting.
@@ -173,16 +184,14 @@ static Status ReformatVerilogIncrementally(absl::string_view original_text,
   formatted_lines.Add(formatting_diffs.after_lines.size() + 1);
   VLOG(1) << "formatted changed lines: " << formatted_lines;
   return FormatVerilog(formatted_text, filename, style, reformat_stream,
-                       formatted_lines, control);
+                       formatted_lines, control, preprocess);
 }
 
-static Status ReformatVerilog(absl::string_view original_text,
-                              absl::string_view formatted_text,
-                              absl::string_view filename,
-                              const FormatStyle& style,
-                              std::ostream& reformat_stream,
-                              const LineNumberSet& lines,
-                              const ExecutionControl& control) {
+static Status ReformatVerilog(
+    absl::string_view original_text, absl::string_view formatted_text,
+    absl::string_view filename, const FormatStyle& style,
+    std::ostream& reformat_stream, const LineNumberSet& lines,
+    const ExecutionControl& control, bool preprocess) {
   // Disable reformat check to terminate recursion.
   ExecutionControl convergence_control(control);
   convergence_control.verify_convergence = false;
@@ -190,19 +199,19 @@ static Status ReformatVerilog(absl::string_view original_text,
   if (lines.empty()) {
     // format whole file
     return FormatVerilog(formatted_text, filename, style, reformat_stream,
-                         lines, convergence_control);
+                         lines, convergence_control, preprocess);
   }
   // reformat incrementally
   return ReformatVerilogIncrementally(original_text, formatted_text, filename,
                                       style, reformat_stream,
-                                      convergence_control);
+                                      convergence_control, preprocess);
 }
 
 static absl::StatusOr<std::unique_ptr<VerilogAnalyzer>> ParseWithStatus(
-    absl::string_view text, absl::string_view filename) {
+    absl::string_view text, absl::string_view filename,
+    const verilog::VerilogPreprocess::Config& preprocess_config = {}) {
   std::unique_ptr<VerilogAnalyzer> analyzer =
-      VerilogAnalyzer::AnalyzeAutomaticMode(
-          text, filename, verilog::VerilogPreprocess::Config());
+      VerilogAnalyzer::AnalyzeAutomaticMode(text, filename, preprocess_config);
   {
     // Lex and parse code.  Exit on failure.
     const auto lex_status = ABSL_DIE_IF_NULL(analyzer)->LexStatus();
@@ -222,16 +231,16 @@ static absl::StatusOr<std::unique_ptr<VerilogAnalyzer>> ParseWithStatus(
   return analyzer;
 }
 
-absl::Status FormatVerilog(const verible::TextStructureView& text_structure,
-                           absl::string_view filename, const FormatStyle& style,
-                           std::string* formatted_text,
-                           const verible::LineNumberSet& lines,
-                           const ExecutionControl& control) {
-  Formatter fmt(text_structure, style);
-  fmt.SelectLines(lines);
+absl::Status FormatVerilog(
+    const absl::string_view text, absl::string_view filename,
+    const FormatStyle& style, std::string* formatted_text,
+    const verible::LineNumberSet& lines,
+    std::vector<VerilogPreprocess::Config>& preproc_config_variants,
+    const ExecutionControl& control) {
+  Formatter fmt(text, style, preproc_config_variants);
 
   // Format code.
-  Status format_status = fmt.Format(control);
+  Status format_status = fmt.Format(control, lines);
   if (!format_status.ok()) {
     if (format_status.code() != StatusCode::kResourceExhausted) {
       // Some more fatal error, halt immediately.
@@ -252,8 +261,7 @@ absl::Status FormatVerilog(const verible::TextStructureView& text_structure,
   *formatted_text = output_buffer.str();
 
   // For now, unconditionally verify.
-  if (Status verify_status =
-          VerifyFormatting(text_structure, *formatted_text, filename);
+  if (Status verify_status = VerifyFormatting(text, *formatted_text, filename);
       !verify_status.ok()) {
     return verify_status;
   }
@@ -261,17 +269,54 @@ absl::Status FormatVerilog(const verible::TextStructureView& text_structure,
   return format_status;
 }
 
+// Returns the minimum set of filtering preprocess configs needed to generate
+// token stream variants that cover the entire source.
+absl::StatusOr<PreprocessConfigVariants> GetMinCoverPreprocessConfigs(
+    VerilogAnalyzer* analyzer) {
+  if (Status tokenize_status = analyzer->Tokenize(); !tokenize_status.ok()) {
+    return tokenize_status;
+  }
+  FlowTree control_flow_tree(analyzer->Data().TokenStream());
+  auto define_variants = control_flow_tree.MinCoverDefineVariants();
+  if (!define_variants.ok()) return define_variants.status();
+
+  std::vector<VerilogPreprocess::Config> config_variants;
+  config_variants.reserve(define_variants->size());
+  for (const FlowTree::DefineSet& defines : *define_variants) {
+    VerilogPreprocess::Config config_variant{.filter_branches = true};
+    for (auto define : defines) {
+      config_variant.macro_definitions.emplace(define, std::nullopt);
+    }
+    config_variants.push_back(config_variant);
+  }
+  return config_variants;
+}
+
 Status FormatVerilog(absl::string_view text, absl::string_view filename,
                      const FormatStyle& style, std::ostream& formatted_stream,
                      const LineNumberSet& lines,
-                     const ExecutionControl& control) {
-  const auto analyzer = ParseWithStatus(text, filename);
-  if (!analyzer.ok()) return analyzer.status();
+                     const ExecutionControl& control, bool preprocess) {
+  // Prepare preprocess config variants
+  std::vector<VerilogPreprocess::Config> preproc_config_variants;
+  // TODO: Make VerilogAnalyzer movable and make this an std::optional
+  std::unique_ptr<VerilogAnalyzer> preprocess_variants_analyzer;
+  // Keep this analyzer alive, as the defines in preprocess_config_variants
+  // refer to its text contents
+  if (preprocess) {
+    preprocess_variants_analyzer =
+        std::make_unique<VerilogAnalyzer>(text, filename);
+    auto variants_or =
+        GetMinCoverPreprocessConfigs(&*preprocess_variants_analyzer);
+    if (!variants_or.ok()) return variants_or.status();
+    preproc_config_variants = std::move(*variants_or);
+  } else {
+    preproc_config_variants.push_back(VerilogPreprocess::Config());
+  }
 
-  const verible::TextStructureView& text_structure = analyzer->get()->Data();
   std::string formatted_text;
-  Status format_status = FormatVerilog(text_structure, filename, style,
-                                       &formatted_text, lines, control);
+  Status format_status = FormatVerilog(text, filename, style, &formatted_text,
+                                       lines, preproc_config_variants, control);
+
   // Commit formatted text to the output stream independent of status.
   formatted_stream << formatted_text;
   if (!format_status.ok()) return format_status;
@@ -283,7 +328,7 @@ Status FormatVerilog(absl::string_view text, absl::string_view filename,
     std::ostringstream reformat_stream;
     if (auto reformat_status =
             ReformatVerilog(text, formatted_text, filename, style,
-                            reformat_stream, lines, control);
+                            reformat_stream, lines, control, preprocess);
         !reformat_status.ok()) {
       return reformat_status;
     }
@@ -303,11 +348,10 @@ absl::Status FormatVerilogRange(const verible::TextStructureView& structure,
     return absl::OkStatus();
   }
 
-  Formatter fmt(structure, style);
-  fmt.SelectLines({line_range});
+  Formatter fmt(structure.Contents(), style);
 
   // Format code.
-  Status format_status = fmt.Format(control);
+  Status format_status = fmt.Format(control, {line_range});
   if (!format_status.ok()) return format_status;
 
   // In any diagnostic mode, proceed no further.
@@ -603,11 +647,6 @@ std::ostream& ExecutionControl::Stream() const {
   return (stream != nullptr) ? *stream : std::cout;
 }
 
-void Formatter::SelectLines(const LineNumberSet& lines) {
-  disabled_ranges_ = EnabledLinesToDisabledByteRanges(
-      lines, text_structure_.GetLineColumnMap());
-}
-
 // Given control flags and syntax tree, selectively disable some ranges
 // of text from formatting.  This provides an easy way to preserve spacing on
 // selected syntax subtrees to reduce formatter harm while allowing
@@ -786,45 +825,154 @@ class ContinuationCommentAligner {
   int formatted_column_ = kInvalidColumn;
 };
 
-Status Formatter::Format(const ExecutionControl& control) {
-  const absl::string_view full_text(text_structure_.Contents());
-  const auto& token_stream(text_structure_.TokenStream());
+// Merges the second partition tree into the first one. Whenever there is a
+// discrepancy, it chooses the more granular partition.
+void MergePartitionTrees(verible::TokenPartitionTree* target,
+                         verible::TokenPartitionTree& source) {
+  const auto tokens_begin = [](const TokenPartitionTree& node) {
+    return node.Value().TokensRange().front().token->text().begin();
+  };
+  const auto tokens_end = [](const TokenPartitionTree& node) {
+    return node.Value().TokensRange().back().token->text().end();
+  };
+  auto target_it = VectorTreeLeavesIterator(target);
+  auto target_end = ++VectorTreeLeavesIterator(&RightmostDescendant(*target));
+  auto source_it = VectorTreeLeavesIterator(&source);
+  const auto source_end =
+      ++VectorTreeLeavesIterator(&verible::RightmostDescendant(source));
+  while (target_it != target_end && source_it != source_end) {
+    VLOG(1) << "Merging partitions.";
+    VLOG(1) << "Source: " << *source_it;
+    VLOG(1) << "Target: " << *target_it;
+    if (tokens_begin(*target_it) == tokens_begin(*source_it) &&
+        tokens_end(*target_it) == tokens_end(*source_it)) {
+      // [-- SOURCE --]
+      // [-- TARGET --]
+      VLOG(1) << "Exact match of partitions";
+      if (!target_it->Value().Origin() && source_it->Value().Origin()) {
+        *target_it = *source_it;
+      }
+      target_it++;
+      source_it++;
+    } else if (tokens_begin(*target_it) < tokens_begin(*source_it) &&
+               tokens_end(*target_it) == tokens_end(*source_it)) {
+      //   [-- SOURCE --]
+      // [-- TARGET --]
+      VLOG(1) << "Source partition has more leaves";
+      target_it->Children().push_back(*source_it);
+      auto parent = target_it->Parent();
+      size_t i = &*target_it - &parent->Children().front();
+      verible::FlattenOneChild(*parent, i);
+      target_it = VectorTreeLeavesIterator(&parent->Children()[i]);
+      target_end = ++VectorTreeLeavesIterator(&RightmostDescendant(*target));
+      source_it++;
+    } else if (tokens_end(*target_it) <= tokens_end(*source_it)) {
+      //   [-- SOURCE --]    [-- SOURCE --]    [-- SOURCE --]    [-- SOURCE --]
+      // [-- TARGET --]   or [- TARGET -]   or  [- TARGET -]  or [- TARGET -]
+      VLOG(1) << "Target is behind";
+      if (tokens_end(*target_it) == tokens_end(*source_it)) source_it++;
+      target_it++;
+    } else if (tokens_end(*source_it) < tokens_end(*target_it)) {
+      // [-- SOURCE --]      [- SOURCE -]       [- SOURCE -]
+      //   [-- TARGET --] or [-- TARGET --] or [-- TARGET --]
+      VLOG(1) << "Source is behind";
+      target_it->Children().push_back(*source_it);
+      source_it++;
+    } else {
+      LOG(FATAL) << "Unreachable.";
+    }
+  }
+}
+
+Status Formatter::Format(const ExecutionControl& control,
+                         const verible::LineNumberSet& lines) {
+  struct FormatVariant {
+    FormatVariant(const FormatVariant&) = delete;
+    FormatVariant(FormatVariant&&) = default;
+
+    const verible::TextStructureView& text_structure;
+    std::unique_ptr<UnwrapperData> unwrapper_data;
+    TreeUnwrapper tree_unwrapper;
+
+    FormatVariant(const verible::TextStructureView& text_struct,
+                  const FormatStyle& style)
+        : text_structure(text_struct),
+          unwrapper_data{
+              std::make_unique<UnwrapperData>(text_structure.TokenStream())},
+          tree_unwrapper{text_structure, style,
+                         unwrapper_data->preformatted_tokens} {}
+  };
+
+  std::vector<FormatVariant> format_variants;
+  format_variants.reserve(preproc_config_variants_.size());
+  size_t color = 0;
+  for (const VerilogPreprocess::Config& config : preproc_config_variants_) {
+    color++;
+
+    const auto analyzer = ParseWithStatus(text_, "", config);
+    if (!analyzer.ok()) return analyzer.status();
+    auto text_structure = (*analyzer)->ReleaseTextStructure();
+
+    if (!text_structures_.empty()) {
+      text_structure->RebaseTokens(*text_structures_.front());
+    }
+
+    text_structure->MutableData().ColorStreamViewTokens(color);
+    format_variants.push_back({text_structure->Data(), style_});
 
-  // Initialize auxiliary data needed for TreeUnwrapper.
-  UnwrapperData unwrapper_data(token_stream);
+    text_structures_.push_back(std::move(text_structure));
+  }
+  disabled_ranges_ = EnabledLinesToDisabledByteRanges(
+      lines, text_structures_.front()->Data().GetLineColumnMap());
+  // Update text, as it may have been modified by the analyzer.
+  text_ = text_structures_.front()->Data().Contents();
 
   // Partition input token stream into hierarchical set of UnwrappedLines.
-  TreeUnwrapper tree_unwrapper(text_structure_, style_,
-                               unwrapper_data.preformatted_tokens);
 
-  const TokenPartitionTree* format_tokens_partitions = nullptr;
   // TODO(fangism): The following block could be parallelized because
   // full-partitioning does not depend on format annotations.
-  {
+  for (FormatVariant& format_variant : format_variants) {
     // Annotate inter-token information between all adjacent PreFormatTokens.
     // This must be done before any decisions about ExpandableTreeView
     // can be made because they depend on minimum-spacing, and must-break.
-    AnnotateFormattingInformation(style_, text_structure_,
-                                  &unwrapper_data.preformatted_tokens);
+    AnnotateFormattingInformation(
+        style_, format_variant.text_structure,
+        &format_variant.unwrapper_data->preformatted_tokens);
 
     // Determine ranges of disabling the formatter, based on comment controls.
-    disabled_ranges_.Union(DisableFormattingRanges(full_text, token_stream));
+    disabled_ranges_.Union(DisableFormattingRanges(
+        text_, format_variant.text_structure.TokenStream()));
 
     // Find disabled formatting ranges for specific syntax tree node types.
     // These are typically temporary workarounds for sections that users
     // habitually prefer to format themselves.
-    if (const auto& root = text_structure_.SyntaxTree()) {
-      DisableSyntaxBasedRanges(&disabled_ranges_, *root, style_, full_text);
+    if (const auto& root = format_variant.text_structure.SyntaxTree()) {
+      DisableSyntaxBasedRanges(&disabled_ranges_, *root, style_, text_);
     }
 
     // Disable formatting ranges.
     verible::PreserveSpacesOnDisabledTokenRanges(
-        &unwrapper_data.preformatted_tokens, disabled_ranges_, full_text);
+        &format_variant.unwrapper_data->preformatted_tokens, disabled_ranges_,
+        text_);
 
     // Partition PreFormatTokens into candidate unwrapped lines.
-    format_tokens_partitions = tree_unwrapper.Unwrap();
+    format_variant.tree_unwrapper.Unwrap();
+  }
+
+  // Merge partition trees from all format variants into the first one.
+  for (auto it = format_variants.begin() + 1; it != format_variants.end();
+       ++it) {
+    MergePartitionTrees(
+        format_variants.front().tree_unwrapper.CurrentTokenPartition(),
+        *it->tree_unwrapper.CurrentTokenPartition());
   }
 
+  const verible::TextStructureView& text_structure =
+      format_variants.front().text_structure;
+  TreeUnwrapper& tree_unwrapper = format_variants.front().tree_unwrapper;
+  verible::TokenPartitionTree* format_tokens_partitions =
+      tree_unwrapper.UnwrappedLines();
+
   {
     // For debugging only: identify largest leaf partitions, and stop.
     if (control.show_token_partition_tree) {
@@ -837,7 +985,7 @@ Status Formatter::Format(const ExecutionControl& control) {
     if (control.show_largest_token_partitions != 0) {
       PrintLargestPartitions(control.Stream(), *format_tokens_partitions,
                              control.show_largest_token_partitions,
-                             text_structure_.GetLineColumnMap(), full_text);
+                             text_structure.GetLineColumnMap(), text_);
     }
     if (control.AnyStop()) {
       return absl::OkStatus();
@@ -863,10 +1011,9 @@ Status Formatter::Format(const ExecutionControl& control) {
           break;
         case PartitionPolicyEnum::kTabularAlignment:
           // TODO(b/145170750): Adjust inter-token spacing to achieve alignment,
-          // but leave partitioning intact.
-          // This relies on inter-token spacing having already been annotated.
-          TabularAlignTokenPartitions(style_, full_text, disabled_ranges_,
-                                      &node);
+          // but leave partitioning intact. This relies on
+          // inter-token spacing having already been annotated.
+          TabularAlignTokenPartitions(style_, text_, disabled_ranges_, &node);
           break;
         default:
           break;
@@ -874,10 +1021,13 @@ Status Formatter::Format(const ExecutionControl& control) {
     });
   }
 
+  verible::TokenPartitionTree* const root =
+      tree_unwrapper.CurrentTokenPartition();
+  const auto unwrapper_data = std::move(format_variants.front().unwrapper_data);
+
   // Apply token spacing from partitions to tokens. This is permanent, so it
   // must be done after all reshaping is done.
   {
-    auto* root = tree_unwrapper.CurrentTokenPartition();
     auto node_iter = VectorTreeLeavesIterator(&LeftmostDescendant(*root));
     const auto end = ++VectorTreeLeavesIterator(&RightmostDescendant(*root));
 
@@ -888,7 +1038,7 @@ Status Formatter::Format(const ExecutionControl& control) {
 
       if (partition_policy == PartitionPolicyEnum::kAlreadyFormatted) {
         verible::ApplyAlreadyFormattedPartitionPropertiesToTokens(
-            &(*node_iter), &unwrapper_data.preformatted_tokens);
+            &(*node_iter), &unwrapper_data->preformatted_tokens);
       } else if (partition_policy == PartitionPolicyEnum::kInline) {
         auto* parent = node_iter->Parent();
         CHECK_NOTNULL(parent);
@@ -897,7 +1047,7 @@ Status Formatter::Format(const ExecutionControl& control) {
         // This removes the node pointed to by node_iter (and all other
         // siblings)
         verible::ApplyAlreadyFormattedPartitionPropertiesToTokens(
-            parent, &unwrapper_data.preformatted_tokens);
+            parent, &unwrapper_data->preformatted_tokens);
         // Move to the parent which is now a leaf
         node_iter = verible::VectorTreeLeavesIterator(parent);
       }
@@ -906,8 +1056,8 @@ Status Formatter::Format(const ExecutionControl& control) {
 
   // Produce sequence of independently operable UnwrappedLines.
   const auto unwrapped_lines = MakeUnwrappedLinesWorklist(
-      style_, full_text, disabled_ranges_, *format_tokens_partitions,
-      &unwrapper_data.preformatted_tokens);
+      style_, text_, disabled_ranges_, *format_tokens_partitions,
+      &unwrapper_data->preformatted_tokens);
 
   // For each UnwrappedLine: minimize total penalty of wrap/break decisions.
   // TODO(fangism): This could be parallelized if results are written
@@ -915,7 +1065,7 @@ Status Formatter::Format(const ExecutionControl& control) {
   std::vector<const UnwrappedLine*> partially_formatted_lines;
   formatted_lines_.reserve(unwrapped_lines.size());
   ContinuationCommentAligner continuation_comment_aligner(
-      text_structure_.GetLineColumnMap(), text_structure_.Contents());
+      text_structure.GetLineColumnMap(), text_);
   for (const auto& uwline : unwrapped_lines) {
     // TODO(fangism): Use different formatting strategies depending on
     // uwline.PartitionPolicy().
@@ -925,6 +1075,12 @@ Status Formatter::Format(const ExecutionControl& control) {
       // For partitions that were successfully aligned, do not search
       // line-wrapping, but instead accept the adjusted padded spacing.
       formatted_lines_.emplace_back(uwline);
+    } else if (IsPreprocessorControlFlow(verilog_tokentype(
+                   uwline.TokensRange().begin()->TokenEnum()))) {
+      // Remove indentation before preprocessing control flow.
+      UnwrappedLine formatted_line = uwline;
+      formatted_line.SetIndentationSpaces(0);
+      formatted_lines_.emplace_back(formatted_line);
     } else {
       // In other case, default to searching for optimal line wrapping.
       const auto optimal_solutions =
@@ -961,13 +1117,12 @@ Status Formatter::Format(const ExecutionControl& control) {
 }
 
 void Formatter::Emit(bool include_disabled, std::ostream& stream) const {
-  const absl::string_view full_text(text_structure_.Contents());
   std::function<bool(const verible::TokenInfo&)> include_token_p;
   if (include_disabled) {
     include_token_p = [](const verible::TokenInfo&) { return true; };
   } else {
-    include_token_p = [this, &full_text](const verible::TokenInfo& tok) {
-      return !disabled_ranges_.Contains(tok.left(full_text));
+    include_token_p = [this](const verible::TokenInfo& tok) {
+      return !disabled_ranges_.Contains(tok.left(text_));
     };
   }
 
@@ -976,14 +1131,13 @@ void Formatter::Emit(bool include_disabled, std::ostream& stream) const {
     // TODO(fangism): The handling of preserved spaces before tokens is messy:
     // some of it is handled here, some of it is inside FormattedToken.
     // TODO(mglb): Test empty line handling when this method becomes testable.
-    const auto front_offset =
-        line.Tokens().empty() ? position
-                              : line.Tokens().front().token->left(full_text);
+    const auto front_offset = line.Tokens().empty()
+                                  ? position
+                                  : line.Tokens().front().token->left(text_);
     const absl::string_view leading_whitespace(
-        full_text.substr(position, front_offset - position));
-    FormatWhitespaceWithDisabledByteRanges(full_text, leading_whitespace,
-                                           disabled_ranges_, include_disabled,
-                                           stream);
+        text_.substr(position, front_offset - position));
+    FormatWhitespaceWithDisabledByteRanges(
+        text_, leading_whitespace, disabled_ranges_, include_disabled, stream);
 
     // When front of first token is format-disabled, the previous call will
     // already cover the space up to the front token, in which case,
@@ -992,15 +1146,14 @@ void Formatter::Emit(bool include_disabled, std::ostream& stream) const {
     if (!line.Tokens().empty()) {
       line.FormattedText(stream, !disabled_ranges_.Contains(front_offset),
                          include_token_p);
-      position = line.Tokens().back().token->right(full_text);
+      position = line.Tokens().back().token->right(text_);
     }
   }
 
   // Handle trailing spaces after last token.
-  const absl::string_view trailing_whitespace(full_text.substr(position));
-  FormatWhitespaceWithDisabledByteRanges(full_text, trailing_whitespace,
-                                         disabled_ranges_, include_disabled,
-                                         stream);
+  const absl::string_view trailing_whitespace(text_.substr(position));
+  FormatWhitespaceWithDisabledByteRanges(
+      text_, trailing_whitespace, disabled_ranges_, include_disabled, stream);
 }
 
 }  // namespace formatter
diff --git a/verilog/formatting/formatter.h b/verilog/formatting/formatter.h
index 69e21256c1..09847ea6e5 100644
--- a/verilog/formatting/formatter.h
+++ b/verilog/formatting/formatter.h
@@ -21,7 +21,9 @@
 #include "absl/status/status.h"
 #include "common/strings/position.h"
 #include "common/text/text_structure.h"
+#include "verilog/analysis/flow_tree.h"
 #include "verilog/formatting/format_style.h"
+#include "verilog/preprocessor/verilog_preprocess.h"
 
 namespace verilog {
 namespace formatter {
@@ -75,14 +77,16 @@ absl::Status FormatVerilog(absl::string_view text, absl::string_view filename,
                            const FormatStyle& style,
                            std::ostream& formatted_stream,
                            const verible::LineNumberSet& lines = {},
-                           const ExecutionControl& control = {});
+                           const ExecutionControl& control = {},
+                           bool preprocess = false);
 // Ditto, but with TextStructureView as input and std::string as output.
 // This does verification of the resulting format, but _no_ convergence test.
-absl::Status FormatVerilog(const verible::TextStructureView& text_structure,
-                           absl::string_view filename, const FormatStyle& style,
-                           std::string* formatted_text,
-                           const verible::LineNumberSet& lines = {},
-                           const ExecutionControl& control = {});
+absl::Status FormatVerilog(
+    absl::string_view text, absl::string_view filename,
+    const FormatStyle& style, std::string* formatted_text,
+    const verible::LineNumberSet& lines = {},
+    const std::vector<VerilogPreprocess::Config>& define_variants = {{}},
+    const ExecutionControl& control = {});
 
 // Format only lines in line_range interval [min, max) in "full_content"
 // using "style". Emits _only_ the formatted code in the range
diff --git a/verilog/formatting/formatter_test.cc b/verilog/formatting/formatter_test.cc
index b72202d405..fef7e2a0a7 100644
--- a/verilog/formatting/formatter_test.cc
+++ b/verilog/formatting/formatter_test.cc
@@ -47,14 +47,16 @@
 namespace verilog {
 namespace formatter {
 
+using PreprocessConfigVariants = std::vector<VerilogPreprocess::Config>;
 // private, extern function in formatter.cc, directly tested here.
-absl::Status VerifyFormatting(const verible::TextStructureView& text_structure,
-                              absl::string_view formatted_output,
-                              absl::string_view filename);
+absl::Status VerifyFormatting(
+    absl::string_view text, absl::string_view formatted_output,
+    absl::string_view filename,
+    const PreprocessConfigVariants& preproc_config_variants = {{}});
 
 namespace {
 
-static constexpr VerilogPreprocess::Config kDefaultPreprocess;
+static VerilogPreprocess::Config kDefaultPreprocess;
 
 using absl::StatusCode;
 using testing::HasSubstr;
@@ -68,7 +70,8 @@ TEST(VerifyFormattingTest, NoError) {
   const std::unique_ptr<VerilogAnalyzer> analyzer =
       VerilogAnalyzer::AnalyzeAutomaticMode(code, "<file>", kDefaultPreprocess);
   const auto& text_structure = ABSL_DIE_IF_NULL(analyzer)->Data();
-  const auto status = VerifyFormatting(text_structure, code, "<filename>");
+  const auto status =
+      VerifyFormatting(text_structure.Contents(), code, "<filename>");
   EXPECT_OK(status);
 }
 
@@ -79,7 +82,8 @@ TEST(VerifyFormattingTest, LexError) {
       VerilogAnalyzer::AnalyzeAutomaticMode(code, "<file>", kDefaultPreprocess);
   const auto& text_structure = ABSL_DIE_IF_NULL(analyzer)->Data();
   const absl::string_view bad_code("1class c;endclass\n");  // lexical error
-  const auto status = VerifyFormatting(text_structure, bad_code, "<filename>");
+  const auto status =
+      VerifyFormatting(text_structure.Contents(), bad_code, "<filename>");
   EXPECT_FALSE(status.ok());
   EXPECT_EQ(status.code(), StatusCode::kDataLoss);
 }
@@ -91,7 +95,8 @@ TEST(VerifyFormattingTest, ParseError) {
       VerilogAnalyzer::AnalyzeAutomaticMode(code, "<file>", kDefaultPreprocess);
   const auto& text_structure = ABSL_DIE_IF_NULL(analyzer)->Data();
   const absl::string_view bad_code("classc;endclass\n");  // syntax error
-  const auto status = VerifyFormatting(text_structure, bad_code, "<filename>");
+  const auto status =
+      VerifyFormatting(text_structure.Contents(), bad_code, "<filename>");
   EXPECT_FALSE(status.ok());
   EXPECT_EQ(status.code(), StatusCode::kDataLoss);
 }
@@ -103,7 +108,8 @@ TEST(VerifyFormattingTest, LexicalDifference) {
       VerilogAnalyzer::AnalyzeAutomaticMode(code, "<file>", kDefaultPreprocess);
   const auto& text_structure = ABSL_DIE_IF_NULL(analyzer)->Data();
   const absl::string_view bad_code("class c;;endclass\n");  // different tokens
-  const auto status = VerifyFormatting(text_structure, bad_code, "<filename>");
+  const auto status =
+      VerifyFormatting(text_structure.Contents(), bad_code, "<filename>");
   EXPECT_FALSE(status.ok());
   EXPECT_EQ(status.code(), StatusCode::kDataLoss);
 }
@@ -111,9 +117,11 @@ TEST(VerifyFormattingTest, LexicalDifference) {
 struct FormatterTestCase {
   absl::string_view input;
   absl::string_view expected;
+  bool preprocess = true;
 };
 
 static const LineNumberSet kEnableAllLines;
+static const ExecutionControl kDefaultControl;
 
 // Test that the expected output is produced with the formatter using a custom
 // FormatStyle.
@@ -138,6 +146,18 @@ TEST(FormatterTest, FormatCustomStyleTest) {
     EXPECT_OK(status);
     EXPECT_EQ(stream.str(), test_case.expected) << "code:\n" << test_case.input;
   }
+
+  for (const auto& test_case : kTestCases) {
+    VLOG(1) << "code-to-format:\n" << test_case.input << "<EOF>";
+    if (!test_case.preprocess) continue;
+    std::ostringstream stream;
+    const auto status =
+        FormatVerilog(test_case.input, "<filename>", style, stream,
+                      kEnableAllLines, kDefaultControl, true);
+    // Require these test cases to be valid.
+    EXPECT_OK(status) << status.message();
+    EXPECT_EQ(stream.str(), test_case.expected) << "code:\n" << test_case.input;
+  }
 }
 
 static constexpr FormatterTestCase kFormatterTestCases[] = {
@@ -437,7 +457,8 @@ static constexpr FormatterTestCase kFormatterTestCases[] = {
     {// macro call nested with function call containing an ifdef
      "`J(D(`ifdef e))\n",
      "`J(D(\n"
-     "   `ifdef e))\n"},
+     "   `ifdef e))\n",
+     false},
 
     // `uvm macros indenting
     {
@@ -1538,7 +1559,8 @@ static constexpr FormatterTestCase kFormatterTestCases[] = {
      "    output reg  yy\n"
      "`endif\n"
      ");\n"
-     "endmodule : foo\n"},
+     "endmodule : foo\n",
+     false},
     {"module foo(\n"
      "input w , \n"
      "`define   FOO BAR\n"
@@ -15484,6 +15506,18 @@ TEST(FormatterEndToEndTest, VerilogFormatTest) {
     EXPECT_OK(status) << status.message();
     EXPECT_EQ(stream.str(), test_case.expected) << "code:\n" << test_case.input;
   }
+
+  for (const auto& test_case : kFormatterTestCases) {
+    if (!test_case.preprocess) continue;
+    VLOG(1) << "code-to-format:\n" << test_case.input << "<EOF>";
+    std::ostringstream stream;
+    const auto status =
+        FormatVerilog(test_case.input, "<filename>", style, stream,
+                      kEnableAllLines, kDefaultControl, true);
+    // Require these test cases to be valid.
+    EXPECT_OK(status) << status.message();
+    EXPECT_EQ(stream.str(), test_case.expected) << "code:\n" << test_case.input;
+  }
 }
 
 TEST(FormatterEndToEndTest, AutoInferAlignment) {
@@ -15555,7 +15589,8 @@ TEST(FormatterEndToEndTest, AutoInferAlignment) {
        "    output reg     bar\n"   // ... and triggers alignment.
        "`endif\n"
        ");\n"
-       "endmodule : pd\n"},
+       "endmodule : pd\n",
+       false},
       {"module pd(\n"
        "input logic [31:0] bus,\n"
        "input logic [7:0] bus2,\n"
@@ -16465,6 +16500,18 @@ TEST(FormatterEndToEndTest, AutoInferAlignment) {
     EXPECT_OK(status) << status.message();
     EXPECT_EQ(stream.str(), test_case.expected) << "code:\n" << test_case.input;
   }
+
+  for (const auto& test_case : kTestCases) {
+    VLOG(1) << "code-to-format:\n" << test_case.input << "<EOF>";
+    if (!test_case.preprocess) continue;
+    std::ostringstream stream;
+    const auto status =
+        FormatVerilog(test_case.input, "<filename>", style, stream,
+                      kEnableAllLines, kDefaultControl, true);
+    // Require these test cases to be valid.
+    EXPECT_OK(status) << status.message();
+    EXPECT_EQ(stream.str(), test_case.expected) << "code:\n" << test_case.input;
+  }
 }  // NOLINT(readability/fn_size)
 
 static constexpr FormatterTestCase kFormatterWideTestCases[] = {
@@ -16567,6 +16614,18 @@ TEST(FormatterEndToEndTest, VerilogFormatWideTest) {
     EXPECT_OK(status) << status.message();
     EXPECT_EQ(stream.str(), test_case.expected) << "code:\n" << test_case.input;
   }
+
+  for (const auto& test_case : kFormatterWideTestCases) {
+    VLOG(1) << "code-to-format:\n" << test_case.input << "<EOF>";
+    if (!test_case.preprocess) continue;
+    std::ostringstream stream;
+    const auto status =
+        FormatVerilog(test_case.input, "<filename>", style, stream,
+                      kEnableAllLines, kDefaultControl, true);
+    // Require these test cases to be valid.
+    EXPECT_OK(status) << status.message();
+    EXPECT_EQ(stream.str(), test_case.expected) << "code:\n" << test_case.input;
+  }
 }
 
 TEST(FormatterEndToEndTest, DisableModulePortDeclarations) {
@@ -16616,6 +16675,16 @@ TEST(FormatterEndToEndTest, DisableModulePortDeclarations) {
     // Require these test cases to be valid.
     EXPECT_OK(status) << status.message();
     EXPECT_EQ(stream.str(), test_case.expected) << "code:\n" << test_case.input;
+    if (test_case.preprocess) {
+      std::ostringstream stream;
+      const auto status =
+          FormatVerilog(test_case.input, "<filename>", style, stream,
+                        kEnableAllLines, kDefaultControl, true);
+      // Require these test cases to be valid.
+      EXPECT_OK(status) << status.message();
+      EXPECT_EQ(stream.str(), test_case.expected) << "code:\n"
+                                                  << test_case.input;
+    }
   }
 }
 
@@ -16710,6 +16779,16 @@ TEST(FormatterEndToEndTest, DisableModuleInstantiations) {
     // Require these test cases to be valid.
     EXPECT_OK(status) << status.message();
     EXPECT_EQ(stream.str(), test_case.expected) << "code:\n" << test_case.input;
+    if (test_case.preprocess) {
+      std::ostringstream stream;
+      const auto status =
+          FormatVerilog(test_case.input, "<filename>", style, stream,
+                        kEnableAllLines, kDefaultControl, true);
+      // Require these test cases to be valid.
+      EXPECT_OK(status) << status.message();
+      EXPECT_EQ(stream.str(), test_case.expected) << "code:\n"
+                                                  << test_case.input;
+    }
   }
 }
 
@@ -16819,6 +16898,16 @@ TEST(FormatterEndToEndTest, DisableTryWrapLongLines) {
     // Require these test cases to be valid.
     EXPECT_OK(status) << status.message();
     EXPECT_EQ(stream.str(), test_case.expected) << "code:\n" << test_case.input;
+    if (test_case.preprocess) {
+      std::ostringstream stream;
+      const auto status =
+          FormatVerilog(test_case.input, "<filename>", style, stream,
+                        kEnableAllLines, kDefaultControl, true);
+      // Require these test cases to be valid.
+      EXPECT_OK(status) << status.message();
+      EXPECT_EQ(stream.str(), test_case.expected) << "code:\n"
+                                                  << test_case.input;
+    }
   }
 }
 
@@ -16879,6 +16968,16 @@ TEST(FormatterEndToEndTest, ModulePortDeclarationsIndentNotWrap) {
     // Require these test cases to be valid.
     EXPECT_OK(status) << status.message();
     EXPECT_EQ(stream.str(), test_case.expected) << "code:\n" << test_case.input;
+    if (test_case.preprocess) {
+      std::ostringstream stream;
+      const auto status =
+          FormatVerilog(test_case.input, "<filename>", style, stream,
+                        kEnableAllLines, kDefaultControl, true);
+      // Require these test cases to be valid.
+      EXPECT_OK(status) << status.message();
+      EXPECT_EQ(stream.str(), test_case.expected) << "code:\n"
+                                                  << test_case.input;
+    }
   }
 }
 
@@ -16927,6 +17026,16 @@ TEST(FormatterEndToEndTest, NamedPortConnectionsIndentNotWrap) {
     // Require these test cases to be valid.
     EXPECT_OK(status) << status.message();
     EXPECT_EQ(stream.str(), test_case.expected) << "code:\n" << test_case.input;
+    if (test_case.preprocess) {
+      std::ostringstream stream;
+      const auto status =
+          FormatVerilog(test_case.input, "<filename>", style, stream,
+                        kEnableAllLines, kDefaultControl, true);
+      // Require these test cases to be valid.
+      EXPECT_OK(status) << status.message();
+      EXPECT_EQ(stream.str(), test_case.expected) << "code:\n"
+                                                  << test_case.input;
+    }
   }
 }
 
@@ -16971,6 +17080,18 @@ TEST(FormatterEndToEndTest, WrapEndElseStatements) {
     EXPECT_OK(status) << status.message();
     EXPECT_EQ(stream.str(), test_case.expected) << "code:\n" << test_case.input;
   }
+
+  for (const auto& test_case : kTestCases) {
+    VLOG(1) << "code-to-format:\n" << test_case.input << "<EOF>";
+    if (!test_case.preprocess) continue;
+    std::ostringstream stream;
+    const auto status =
+        FormatVerilog(test_case.input, "<filename>", style, stream,
+                      kEnableAllLines, kDefaultControl, true);
+    // Require these test cases to be valid.
+    EXPECT_OK(status) << status.message();
+    EXPECT_EQ(stream.str(), test_case.expected) << "code:\n" << test_case.input;
+  }
 }
 
 TEST(FormatterEndToEndTest, FormalParametersIndentNotWrap) {
@@ -17042,6 +17163,18 @@ TEST(FormatterEndToEndTest, FormalParametersIndentNotWrap) {
     EXPECT_OK(status) << status.message();
     EXPECT_EQ(stream.str(), test_case.expected) << "code:\n" << test_case.input;
   }
+
+  for (const auto& test_case : kTestCases) {
+    VLOG(1) << "code-to-format:\n" << test_case.input << "<EOF>";
+    if (!test_case.preprocess) continue;
+    std::ostringstream stream;
+    const auto status =
+        FormatVerilog(test_case.input, "<filename>", style, stream,
+                      kEnableAllLines, kDefaultControl, true);
+    // Require these test cases to be valid.
+    EXPECT_OK(status) << status.message();
+    EXPECT_EQ(stream.str(), test_case.expected) << "code:\n" << test_case.input;
+  }
 }
 
 TEST(FormatterEndToEndTest, NamedParametersIndentNotWrap) {
@@ -17117,12 +17250,25 @@ TEST(FormatterEndToEndTest, NamedParametersIndentNotWrap) {
     EXPECT_OK(status) << status.message();
     EXPECT_EQ(stream.str(), test_case.expected) << "code:\n" << test_case.input;
   }
+
+  for (const auto& test_case : kTestCases) {
+    VLOG(1) << "code-to-format:\n" << test_case.input << "<EOF>";
+    if (!test_case.preprocess) continue;
+    std::ostringstream stream;
+    const auto status =
+        FormatVerilog(test_case.input, "<filename>", style, stream,
+                      kEnableAllLines, kDefaultControl, true);
+    // Require these test cases to be valid.
+    EXPECT_OK(status) << status.message();
+    EXPECT_EQ(stream.str(), test_case.expected) << "code:\n" << test_case.input;
+  }
 }
 
 struct SelectLinesTestCase {
   absl::string_view input;
   LineNumberSet lines;  // explicit set of lines to enable formatting
   absl::string_view expected;
+  bool preprocess = true;
 };
 
 // Tests that formatter honors selected line numbers.
@@ -17335,6 +17481,21 @@ TEST(FormatterEndToEndTest, SelectLines) {
         << "code:\n"
         << test_case.input << "\nlines: " << test_case.lines;
   }
+
+  for (const auto& test_case : kTestCases) {
+    VLOG(1) << "code-to-format:\n" << test_case.input << "<EOF>";
+    if (!test_case.preprocess) continue;
+    std::ostringstream stream;
+    const auto status =
+        FormatVerilog(test_case.input, "<filename>", style, stream,
+                      test_case.lines, kDefaultControl, true);
+    // Require these test cases to be valid.
+    EXPECT_OK(status) << status.message() << '\n'
+                      << "Lines: " << test_case.lines;
+    EXPECT_EQ(stream.str(), test_case.expected)
+        << "code:\n"
+        << test_case.input << "\nlines: " << test_case.lines;
+  }
 }
 
 // These tests verify the mode where horizontal spacing is discarded while
@@ -17460,6 +17621,18 @@ TEST(FormatterEndToEndTest, PreserveVSpacesOnly) {
     EXPECT_OK(status) << status.message();
     EXPECT_EQ(stream.str(), test_case.expected) << "code:\n" << test_case.input;
   }
+
+  for (const auto& test_case : kTestCases) {
+    VLOG(1) << "code-to-format:\n" << test_case.input << "<EOF>";
+    if (!test_case.preprocess) continue;
+    std::ostringstream stream;
+    const auto status =
+        FormatVerilog(test_case.input, "<filename>", style, stream,
+                      kEnableAllLines, kDefaultControl, true);
+    // Require these test cases to be valid.
+    EXPECT_OK(status) << status.message();
+    EXPECT_EQ(stream.str(), test_case.expected) << "code:\n" << test_case.input;
+  }
 }
 
 static constexpr FormatterTestCase kFormatterTestCasesElseStatements[] = {
@@ -17710,6 +17883,18 @@ TEST(FormatterEndToEndTest, FormatElseStatements) {
     EXPECT_OK(status) << status.message();
     EXPECT_EQ(stream.str(), test_case.expected) << "code:\n" << test_case.input;
   }
+
+  for (const auto& test_case : kFormatterTestCasesElseStatements) {
+    VLOG(1) << "code-to-format:\n" << test_case.input << "<EOF>";
+    if (!test_case.preprocess) continue;
+    std::ostringstream stream;
+    const auto status =
+        FormatVerilog(test_case.input, "<filename>", style, stream,
+                      kEnableAllLines, kDefaultControl, true);
+    // Require these test cases to be valid.
+    EXPECT_OK(status) << status.message();
+    EXPECT_EQ(stream.str(), test_case.expected) << "code:\n" << test_case.input;
+  }
 }
 
 TEST(FormatterEndToEndTest, ConstraintExpressions) {
@@ -17804,6 +17989,18 @@ TEST(FormatterEndToEndTest, ConstraintExpressions) {
     EXPECT_OK(status) << status.message();
     EXPECT_EQ(stream.str(), test_case.expected) << "code:\n" << test_case.input;
   }
+
+  for (const auto& test_case : kTestCases) {
+    VLOG(1) << "code-to-format:\n" << test_case.input << "<EOF>";
+    if (!test_case.preprocess) continue;
+    std::ostringstream stream;
+    const auto status =
+        FormatVerilog(test_case.input, "<filename>", style, stream,
+                      kEnableAllLines, kDefaultControl, true);
+    // Require these test cases to be valid.
+    EXPECT_OK(status) << status.message();
+    EXPECT_EQ(stream.str(), test_case.expected) << "code:\n" << test_case.input;
+  }
 }
 
 static constexpr FormatterTestCase kFormatterTestCasesEnumDeclarations[] = {
@@ -17894,6 +18091,15 @@ TEST(FormatterEndToEndTest, FormatAlignEnumDeclarations) {
     EXPECT_OK(status) << status.message();
     EXPECT_EQ(stream.str(), test_case.expected) << "code:\n" << test_case.input;
   }
+  for (const auto& test_case : kFormatterTestCasesEnumDeclarations) {
+    if (!test_case.preprocess) continue;
+    std::ostringstream stream;
+    const auto status =
+        FormatVerilog(test_case.input, "<filename>", style, stream,
+                      kEnableAllLines, kDefaultControl, true);
+    EXPECT_OK(status) << status.message();
+    EXPECT_EQ(stream.str(), test_case.expected) << "code:\n" << test_case.input;
+  }
 }
 
 TEST(FormatterEndToEndTest, DiagnosticShowFullTree) {
@@ -17912,6 +18118,15 @@ TEST(FormatterEndToEndTest, DiagnosticShowFullTree) {
     EXPECT_EQ(status.code(), StatusCode::kCancelled);
     EXPECT_TRUE(
         absl::StartsWith(debug_stream.str(), "Full token partition tree"));
+
+    if (test_case.preprocess) {
+      std::ostringstream stream;
+      const auto status = FormatVerilog(test_case.input, "<filename>", style,
+                                        stream, kEnableAllLines, control, true);
+      EXPECT_EQ(status.code(), StatusCode::kCancelled);
+      EXPECT_TRUE(
+          absl::StartsWith(debug_stream.str(), "Full token partition tree"));
+    }
   }
 }
 
@@ -17931,6 +18146,15 @@ TEST(FormatterEndToEndTest, DiagnosticLargestPartitions) {
     EXPECT_EQ(status.code(), StatusCode::kCancelled);
     EXPECT_TRUE(absl::StartsWith(debug_stream.str(), "Showing the "))
         << "got: " << debug_stream.str();
+
+    if (test_case.preprocess) {
+      std::ostringstream stream;
+      const auto status = FormatVerilog(test_case.input, "<filename>", style,
+                                        stream, kEnableAllLines, control, true);
+      EXPECT_EQ(status.code(), StatusCode::kCancelled);
+      EXPECT_TRUE(absl::StartsWith(debug_stream.str(), "Showing the "))
+          << "got: " << debug_stream.str();
+    }
   }
 }
 
@@ -17953,6 +18177,18 @@ TEST(FormatterEndToEndTest, DiagnosticEquallyOptimalWrappings) {
           << "got: " << debug_stream.str();
       // Cannot guarantee among unit tests that there will be >1 solution.
     }
+
+    if (test_case.preprocess) {
+      std::ostringstream stream;
+      const auto status = FormatVerilog(test_case.input, "<filename>", style,
+                                        stream, kEnableAllLines, control, true);
+      EXPECT_OK(status) << status.message();
+      if (!debug_stream.str().empty()) {
+        EXPECT_TRUE(absl::StartsWith(debug_stream.str(), "Showing the "))
+            << "got: " << debug_stream.str();
+        // Cannot guarantee among unit tests that there will be >1 solution.
+      }
+    }
   }
 }
 
@@ -17969,8 +18205,13 @@ TEST(FormatterEndToEndTest, UnfinishedLineWrapSearching) {
   ExecutionControl control;
   control.max_search_states = 2;  // Cause search to abort early.
   control.stream = &debug_stream;
-  const auto status = FormatVerilog(code, "<filename>", style, stream,
-                                    kEnableAllLines, control);
+  auto status = FormatVerilog(code, "<filename>", style, stream,
+                              kEnableAllLines, control);
+  EXPECT_EQ(status.code(), StatusCode::kResourceExhausted);
+  EXPECT_TRUE(absl::StartsWith(status.message(), "***"));
+
+  status = FormatVerilog(code, "<filename>", style, stream, kEnableAllLines,
+                         control, true);
   EXPECT_EQ(status.code(), StatusCode::kResourceExhausted);
   EXPECT_TRUE(absl::StartsWith(status.message(), "***"));
 }
@@ -18110,6 +18351,18 @@ TEST(FormatterEndToEndTest, OnelineFormatReferenceTest) {
     EXPECT_OK(status) << status.message();
     EXPECT_EQ(stream.str(), test_case.expected) << "code:\n" << test_case.input;
   }
+
+  for (const auto& test_case : kOnelineFormatReferenceTestCases) {
+    VLOG(1) << "code-to-format:\n" << test_case.input << "<EOF>";
+    if (!test_case.preprocess) continue;
+    std::ostringstream stream;
+    const auto status =
+        FormatVerilog(test_case.input, "<filename>", style, stream,
+                      kEnableAllLines, kDefaultControl, true);
+    // Require these test cases to be valid.
+    EXPECT_OK(status) << status.message();
+    EXPECT_EQ(stream.str(), test_case.expected) << "code:\n" << test_case.input;
+  }
 }
 
 // Tests that constructs that could be formatted as one-liners are expanded
@@ -18131,6 +18384,18 @@ TEST(FormatterEndToEndTest, OnelineFormatExpandTest) {
     EXPECT_OK(status) << status.message();
     EXPECT_EQ(stream.str(), test_case.expected) << "code:\n" << test_case.input;
   }
+
+  for (const auto& test_case : kOnelineFormatExpandTestCases) {
+    VLOG(1) << "code-to-format:\n" << test_case.input << "<EOF>";
+    if (!test_case.preprocess) continue;
+    std::ostringstream stream;
+    const auto status =
+        FormatVerilog(test_case.input, "<filename>", style, stream,
+                      kEnableAllLines, kDefaultControl, true);
+    // Require these test cases to be valid.
+    EXPECT_OK(status) << status.message();
+    EXPECT_EQ(stream.str(), test_case.expected) << "code:\n" << test_case.input;
+  }
 }
 
 struct DimensionsAlignmentTestCase {
@@ -18330,6 +18595,18 @@ TEST(FormatterEndToEndTest, FormatNestedFunctionsTestCases40ColumnsLimit) {
     EXPECT_OK(status) << status.message();
     EXPECT_EQ(stream.str(), test_case.expected) << "code:\n" << test_case.input;
   }
+
+  for (const auto& test_case : kNestedFunctionsTestCases40ColumnsLimit) {
+    VLOG(1) << "code-to-format:\n" << test_case.input << "<EOF>";
+    if (!test_case.preprocess) continue;
+    std::ostringstream stream;
+    const auto status =
+        FormatVerilog(test_case.input, "<filename>", style, stream,
+                      kEnableAllLines, kDefaultControl, true);
+    // Require these test cases to be valid.
+    EXPECT_OK(status) << status.message();
+    EXPECT_EQ(stream.str(), test_case.expected) << "code:\n" << test_case.input;
+  }
 }
 
 static constexpr FormatterTestCase kNestedFunctionsTestCases60ColumnsLimit[] = {
@@ -18374,6 +18651,18 @@ TEST(FormatterEndToEndTest, FormatNestedFunctionsTestCases60ColumnsLimit) {
     EXPECT_OK(status) << status.message();
     EXPECT_EQ(stream.str(), test_case.expected) << "code:\n" << test_case.input;
   }
+
+  for (const auto& test_case : kNestedFunctionsTestCases60ColumnsLimit) {
+    VLOG(1) << "code-to-format:\n" << test_case.input << "<EOF>";
+    if (!test_case.preprocess) continue;
+    std::ostringstream stream;
+    const auto status =
+        FormatVerilog(test_case.input, "<filename>", style, stream,
+                      kEnableAllLines, kDefaultControl, true);
+    // Require these test cases to be valid.
+    EXPECT_OK(status) << status.message();
+    EXPECT_EQ(stream.str(), test_case.expected) << "code:\n" << test_case.input;
+  }
 }
 
 static constexpr FormatterTestCase kNestedFunctionsTestCases80ColumnsLimit[] = {
@@ -18428,6 +18717,18 @@ TEST(FormatterEndToEndTest, FormatNestedFunctionsTestCases80ColumnsLimit) {
     EXPECT_OK(status) << status.message();
     EXPECT_EQ(stream.str(), test_case.expected) << "code:\n" << test_case.input;
   }
+
+  for (const auto& test_case : kNestedFunctionsTestCases80ColumnsLimit) {
+    VLOG(1) << "code-to-format:\n" << test_case.input << "<EOF>";
+    if (!test_case.preprocess) continue;
+    std::ostringstream stream;
+    const auto status =
+        FormatVerilog(test_case.input, "<filename>", style, stream,
+                      kEnableAllLines, kDefaultControl, true);
+    // Require these test cases to be valid.
+    EXPECT_OK(status) << status.message();
+    EXPECT_EQ(stream.str(), test_case.expected) << "code:\n" << test_case.input;
+  }
 }
 
 static constexpr FormatterTestCase kNestedFunctionsTestCases100ColumnsLimit[] =
@@ -18499,6 +18800,18 @@ TEST(FormatterEndToEndTest, FormatNestedFunctionsTestCases100ColumnsLimit) {
     EXPECT_OK(status) << status.message();
     EXPECT_EQ(stream.str(), test_case.expected) << "code:\n" << test_case.input;
   }
+
+  for (const auto& test_case : kNestedFunctionsTestCases100ColumnsLimit) {
+    VLOG(1) << "code-to-format:\n" << test_case.input << "<EOF>";
+    if (!test_case.preprocess) continue;
+    std::ostringstream stream;
+    const auto status =
+        FormatVerilog(test_case.input, "<filename>", style, stream,
+                      kEnableAllLines, kDefaultControl, true);
+    // Require these test cases to be valid.
+    EXPECT_OK(status) << status.message();
+    EXPECT_EQ(stream.str(), test_case.expected) << "code:\n" << test_case.input;
+  }
 }
 
 static constexpr FormatterTestCase noCompactIndexingAndSelectionsTestCases[]{
@@ -18528,6 +18841,18 @@ TEST(FormatterEndToEndTest, compactIndexingAndSelectionsTestCases) {
     EXPECT_OK(status) << status.message();
     EXPECT_EQ(stream.str(), test_case.expected) << "code:\n" << test_case.input;
   }
+
+  for (const auto& test_case : noCompactIndexingAndSelectionsTestCases) {
+    VLOG(1) << "code-to-format:\n" << test_case.input << "<EOF>";
+    if (!test_case.preprocess) continue;
+    std::ostringstream stream;
+    const auto status =
+        FormatVerilog(test_case.input, "<filename>", style, stream,
+                      kEnableAllLines, kDefaultControl, true);
+    // Require these test cases to be valid.
+    EXPECT_OK(status) << status.message();
+    EXPECT_EQ(stream.str(), test_case.expected) << "code:\n" << test_case.input;
+  }
 }
 
 static constexpr FormatterTestCase kFunctionCallsWithComments[] = {
@@ -18589,6 +18914,18 @@ TEST(FormatterEndToEndTest, FunctionCallsWithComments) {
     EXPECT_OK(status) << status.message();
     EXPECT_EQ(stream.str(), test_case.expected) << "code:\n" << test_case.input;
   }
+
+  for (const auto& test_case : kFunctionCallsWithComments) {
+    VLOG(1) << "code-to-format:\n" << test_case.input << "<EOF>";
+    if (!test_case.preprocess) continue;
+    std::ostringstream stream;
+    const auto status =
+        FormatVerilog(test_case.input, "<filename>", style, stream,
+                      kEnableAllLines, kDefaultControl, true);
+    // Require these test cases to be valid.
+    EXPECT_OK(status) << status.message();
+    EXPECT_EQ(stream.str(), test_case.expected) << "code:\n" << test_case.input;
+  }
 }
 
 // Extracts the first non-whitespace token and prepends it with number of
diff --git a/verilog/formatting/token_annotator.cc b/verilog/formatting/token_annotator.cc
index 8e418a8e83..f837b54b7c 100644
--- a/verilog/formatting/token_annotator.cc
+++ b/verilog/formatting/token_annotator.cc
@@ -811,6 +811,11 @@ static WithReason<SpacingOptions> BreakDecisionBetween(
       return {SpacingOptions::kMustWrap,
               "Force-preserve line break around block comment"};
     }
+    if (IsPreprocessorKeyword(
+            static_cast<verilog_tokentype>(left.TokenEnum()))) {
+      return {SpacingOptions::kMustAppend,
+              "Append comment blocks to preprocessor tokens"};
+    }
   }
 
   // TODO(fangism): check for all token types in verilog.lex that
@@ -968,6 +973,15 @@ void AnnotateFormattingInformation(
     // For unit testing, tokens' text snippets don't necessarily originate
     // from the same contiguous string buffer, so skip this step.
     ConnectPreFormatTokensPreservedSpaceStarts(buffer_start, format_tokens);
+
+    for (auto& ftoken : *format_tokens) {
+      // Remove spacing between an `ifdef, `ifndef, or `elseif and the following
+      // identifier.
+      // FIXME: should probably do this elsewhere
+      if (verilog_tokentype(ftoken.TokenEnum()) == PP_Identifier) {
+        ftoken.before.preserved_space_start = nullptr;
+      }
+    }
   }
 
   // Annotate inter-token information using the syntax tree for context.
diff --git a/verilog/formatting/tree_unwrapper.cc b/verilog/formatting/tree_unwrapper.cc
index 6ca906b4ae..330f035b9d 100644
--- a/verilog/formatting/tree_unwrapper.cc
+++ b/verilog/formatting/tree_unwrapper.cc
@@ -146,6 +146,9 @@ enum TokenScannerState {
   // While encountering any string of consecutive newlines
   kHaveNewline,
 
+  // While encountering tokens related to preprocessor control flow
+  kPreprocessorControlFlow,
+
   // Transition from newline to non-newline
   kNewPartition,
 
@@ -163,6 +166,8 @@ TokenScannerStateStrings() {
           {"kStart", TokenScannerState::kStart},
           {"kHaveNewline", TokenScannerState::kHaveNewline},
           {"kNewPartition", TokenScannerState::kNewPartition},
+          {"kPreprocessorControlFlow",
+           TokenScannerState::kPreprocessorControlFlow},
           {"kEndWithNewline", TokenScannerState::kEndWithNewline},
           {"kEndNoNewline", TokenScannerState::kEndNoNewline},
       });
@@ -196,14 +201,17 @@ class TreeUnwrapper::TokenScanner {
   }
 
   // Calls TransitionState using current_state_ and the tranition token_Type
-  void UpdateState(verilog_tokentype token_type) {
-    current_state_ = TransitionState(current_state_, token_type);
+  void UpdateState(verilog_tokentype token_type, int color) {
+    current_state_ = TransitionState(current_state_, token_type, color);
+    on_preproc_ident_ = token_type == PP_Identifier;
     seen_any_nonspace_ |= IsComment(token_type);
   }
 
   // Returns true if this is a state that should start a new token partition.
   bool ShouldStartNewPartition() const {
     return current_state_ == State::kNewPartition ||
+           (current_state_ == State::kPreprocessorControlFlow &&
+            !on_preproc_ident_) ||
            (current_state_ == State::kEndWithNewline && seen_any_nonspace_);
   }
 
@@ -212,12 +220,13 @@ class TreeUnwrapper::TokenScanner {
 
   // The current state of the TokenScanner.
   State current_state_ = kStart;
+  bool on_preproc_ident_ = false;
   bool seen_any_nonspace_ = false;
 
   // Transitions the TokenScanner given a TokenState and a verilog_tokentype
   // transition
   static State TransitionState(const State& old_state,
-                               verilog_tokentype token_type);
+                               verilog_tokentype token_type, int color);
 };
 
 static bool IsNewlineOrEOF(verilog_tokentype token_type) {
@@ -238,11 +247,24 @@ static bool IsNewlineOrEOF(verilog_tokentype token_type) {
  *   TreeUnwrapper::CatchUpToCurrentLeaf()
  */
 TokenScannerState TreeUnwrapper::TokenScanner::TransitionState(
-    const State& old_state, verilog_tokentype token_type) {
+    const State& old_state, verilog_tokentype token_type, int color) {
   VLOG(4) << "state transition on: " << old_state
           << ", token: " << verilog_symbol_name(token_type);
+  if (IsPreprocessorControlFlow(token_type)) {
+    const State new_state = kPreprocessorControlFlow;
+    VLOG(4) << "new state: " << new_state;
+    return new_state;
+  }
   State new_state = old_state;
   switch (old_state) {
+    case kPreprocessorControlFlow: {
+      if (IsComment(token_type)) {
+        new_state = kStart;
+      } else if (token_type != PP_Identifier) {
+        new_state = kNewPartition;
+      }
+      break;
+    }
     case kStart: {
       if (IsNewlineOrEOF(token_type)) {
         new_state = kHaveNewline;
@@ -274,6 +296,7 @@ TokenScannerState TreeUnwrapper::TokenScanner::TransitionState(
       break;
     }
     case kEndWithNewline:
+      if (!color) new_state = kEndNoNewline;
     case kEndNoNewline: {
       // Terminal states.  Must Reset() next.
       break;
@@ -314,6 +337,8 @@ TreeUnwrapper::TreeUnwrapper(const verible::TextStructureView& view,
   CHECK(back.text().empty());
 }
 
+TreeUnwrapper::TreeUnwrapper(TreeUnwrapper&& other) noexcept = default;
+
 TreeUnwrapper::~TreeUnwrapper() = default;
 
 static bool IsPreprocessorClause(NodeEnum e) {
@@ -368,9 +393,10 @@ static bool ShouldIndentRelativeToDirectParent(
   });
 }
 
-void TreeUnwrapper::UpdateInterLeafScanner(verilog_tokentype token_type) {
+void TreeUnwrapper::UpdateInterLeafScanner(verilog_tokentype token_type,
+                                           int color) {
   VLOG(4) << __FUNCTION__ << ", token: " << verilog_symbol_name(token_type);
-  inter_leaf_scanner_->UpdateState(token_type);
+  inter_leaf_scanner_->UpdateState(token_type, color);
   if (inter_leaf_scanner_->ShouldStartNewPartition()) {
     VLOG(4) << "new partition";
     // interleaf-tokens like comments do not have corresponding syntax tree
@@ -386,7 +412,7 @@ void TreeUnwrapper::AdvanceLastVisitedLeaf() {
   const auto& next_token = *NextUnfilteredToken();
   const verilog_tokentype token_enum =
       verilog_tokentype(next_token.token_enum());
-  UpdateInterLeafScanner(token_enum);
+  UpdateInterLeafScanner(token_enum, next_token.color);
   AdvanceNextUnfilteredToken();
   VLOG(4) << "end of " << __FUNCTION__;
 }
@@ -493,6 +519,7 @@ static verible::TokenSequence::const_iterator StopAtLastNewlineBeforeTreeLeaf(
   VLOG(4) << __FUNCTION__;
   auto token_iter = token_begin;
   auto last_newline = token_begin;
+  bool always_last_newline = false;
   bool have_last_newline = false;
 
   // Find next syntax tree token or EOF.
@@ -513,12 +540,23 @@ static verible::TokenSequence::const_iterator StopAtLastNewlineBeforeTreeLeaf(
         ++token_iter;
         break;
       default:
-        break_while = true;
+        if (token_iter->color) {
+          break_while = true;
+        } else {
+          if (token_iter->token_enum() == PP_Identifier ||
+              token_iter->token_enum() == PP_else ||
+              token_iter->token_enum() == PP_endif) {
+            always_last_newline = true;
+          }
+          ++token_iter;
+        }
         break;
     }
   }
 
-  const auto result = have_last_newline ? last_newline : token_iter;
+  const auto result = always_last_newline ? token_iter
+                      : have_last_newline ? last_newline
+                                          : token_iter;
   VLOG(4) << "end of " << __FUNCTION__ << ", advanced "
           << std::distance(token_begin, result) << " tokens";
   return result;
@@ -542,7 +580,7 @@ void TreeUnwrapper::LookAheadBeyondCurrentNode() {
     VLOG(4) << "token: " << VerboseToken(next_token);
     const verilog_tokentype token_enum =
         verilog_tokentype(next_token.token_enum());
-    UpdateInterLeafScanner(token_enum);
+    UpdateInterLeafScanner(token_enum, next_token.color);
     if (next_token_iter != token_end) {
       AdvanceNextUnfilteredToken();
     } else {
@@ -1525,6 +1563,32 @@ static bool PartitionIsForcedIntoNewLine(const TokenPartitionTree& partition) {
   return ftokens.front().before.break_decision == SpacingOptions::kMustWrap;
 }
 
+bool IsPreprocessorPartition(const TokenPartitionTree& partition) {
+  auto tok = partition.Value().TokensRange().front().TokenEnum();
+  return IsPreprocessorControlFlow(verilog_tokentype(tok)) ||
+         tok == PP_Identifier;
+}
+
+TokenPartitionTree* MergeLeafIntoPreviousLeafIfNotPreprocessor(
+    TokenPartitionTree* const leaf) {
+  if (IsPreprocessorPartition(*leaf)) {
+    VLOG(5) << "  preprocessor partition; not merging into previous partition.";
+    return nullptr;
+  }
+  VLOG(5) << "  merge into previous partition.";
+  return MergeLeafIntoPreviousLeaf(leaf);
+}
+
+TokenPartitionTree* MergeLeafIntoNextLeafIfNotPreprocessor(
+    TokenPartitionTree* const leaf) {
+  if (IsPreprocessorPartition(*leaf)) {
+    VLOG(5) << "  preprocessor partition; not merging into next partition.";
+    return nullptr;
+  }
+  VLOG(5) << "  merge into next partition.";
+  return MergeLeafIntoNextLeaf(leaf);
+}
+
 // Joins partition containing only a ',' (and optionally comments) with
 // a partition preceding or following it.
 static void AttachSeparatorToPreviousOrNextPartition(
@@ -1562,6 +1626,12 @@ static void AttachSeparatorToPreviousOrNextPartition(
     return;
   }
 
+  if (IsPreprocessorPartition(partition[0])) {
+    VLOG(5) << "  preprocessor partition at [0]; not attaching to other "
+               "partitions.";
+    return;
+  }
+
   // Merge with previous partition if both partitions are in the same line in
   // original text
   const auto* previous_partition = PreviousLeaf(*partition);
@@ -1572,8 +1642,7 @@ static void AttachSeparatorToPreviousOrNextPartition(
       absl::string_view original_text_between = verible::make_string_view_range(
           previous_token.Text().end(), separator->Text().begin());
       if (!absl::StrContains(original_text_between, '\n')) {
-        VLOG(5) << "  merge into previous partition.";
-        verible::MergeLeafIntoPreviousLeaf(partition);
+        MergeLeafIntoPreviousLeafIfNotPreprocessor(partition);
         return;
       }
     }
@@ -1588,8 +1657,7 @@ static void AttachSeparatorToPreviousOrNextPartition(
       absl::string_view original_text_between = verible::make_string_view_range(
           separator->Text().end(), next_token.Text().begin());
       if (!absl::StrContains(original_text_between, '\n')) {
-        VLOG(5) << "  merge into next partition.";
-        verible::MergeLeafIntoNextLeaf(partition);
+        MergeLeafIntoNextLeafIfNotPreprocessor(partition);
         return;
       }
     }
@@ -1599,16 +1667,14 @@ static void AttachSeparatorToPreviousOrNextPartition(
   if (partition->Value().TokensRange().size() == 1) {
     // Try merging with previous partition
     if (!PartitionIsForcedIntoNewLine(*partition)) {
-      VLOG(5) << "  merge into previous partition.";
-      verible::MergeLeafIntoPreviousLeaf(partition);
+      MergeLeafIntoPreviousLeafIfNotPreprocessor(partition);
       return;
     }
 
     // Try merging with next partition
     if (next_partition != nullptr &&
         !PartitionIsForcedIntoNewLine(*next_partition)) {
-      VLOG(5) << "  merge into next partition.";
-      verible::MergeLeafIntoNextLeaf(partition);
+      MergeLeafIntoNextLeafIfNotPreprocessor(partition);
       return;
     }
   }
@@ -1657,7 +1723,7 @@ static void AttachTrailingSemicolonToPreviousPartition(
       semicolon_partition->Value().SetIndentationSpaces(
           group->Value().IndentationSpaces());
     } else {
-      verible::MergeLeafIntoPreviousLeaf(semicolon_partition);
+      MergeLeafIntoPreviousLeafIfNotPreprocessor(semicolon_partition);
     }
     VLOG(4) << "after moving semicolon:\n" << *partition;
   }
@@ -1726,7 +1792,7 @@ static void AttachOpeningBraceToDeclarationsAssignmentOperator(
     }
 
     if (!PartitionIsForcedIntoNewLine(next)) {
-      verible::MergeLeafIntoNextLeaf(&current);
+      MergeLeafIntoNextLeafIfNotPreprocessor(&current);
       // There shouldn't be more matching partitions
       return;
     }
@@ -1747,7 +1813,7 @@ static void ReshapeIfClause(const SyntaxTreeNode& node,
   // Then fuse the 'begin' partition with the preceding 'if (...)'
   auto& if_body_partition = partition.Children().back();
   auto& begin_partition = if_body_partition.Children().front();
-  verible::MergeLeafIntoPreviousLeaf(&begin_partition);
+  MergeLeafIntoPreviousLeafIfNotPreprocessor(&begin_partition);
   partition.Value().SetPartitionPolicy(
       if_body_partition.Value().PartitionPolicy());
   // if seq_block body was empty, that leaves only 'end', so hoist.
@@ -1792,7 +1858,7 @@ static void ReshapeElseClause(const SyntaxTreeNode& node,
     return;
   }
 
-  verible::MergeLeafIntoNextLeaf(&else_partition);
+  MergeLeafIntoNextLeafIfNotPreprocessor(&else_partition);
 }
 
 static void HoistOnlyChildPartition(TokenPartitionTree* partition) {
@@ -1815,7 +1881,7 @@ static void PushEndIntoElsePartition(TokenPartitionTree* partition_ptr) {
   auto& partition = *partition_ptr;
   auto& if_clause_partition = partition.Children().front();
   auto* end_partition = &RightmostDescendant(if_clause_partition);
-  auto* end_parent = verible::MergeLeafIntoNextLeaf(end_partition);
+  auto* end_parent = MergeLeafIntoNextLeafIfNotPreprocessor(end_partition);
   // if moving leaf results in any singleton partitions, hoist.
   if (end_parent != nullptr) {
     HoistOnlyChildPartition(end_parent);
@@ -1972,7 +2038,7 @@ class MacroCallReshaper {
       if (semicolon_ == NextSibling(*paren_group_)) {
         if (!PartitionIsForcedIntoNewLine(*semicolon_)) {
           // Merge with ')'
-          verible::MergeLeafIntoPreviousLeaf(semicolon_);
+          MergeLeafIntoPreviousLeafIfNotPreprocessor(semicolon_);
           semicolon_ = nullptr;
         }
       }
@@ -2225,7 +2291,7 @@ class MacroCallReshaper {
                          return IsComment(verilog_tokentype(t.TokenEnum()));
                        })) {
         // Merge
-        verible::MergeLeafIntoPreviousLeaf(r_paren_);
+        MergeLeafIntoPreviousLeafIfNotPreprocessor(r_paren_);
         r_paren_ = &argument_list_->Children().back();
       } else {
         // Group
@@ -2305,7 +2371,7 @@ class MacroCallReshaper {
     CHECK(!PartitionIsForcedIntoNewLine(*l_paren_));
     verible::AdjustIndentationAbsolute(paren_group_,
                                        main_node_->Value().IndentationSpaces());
-    verible::MergeLeafIntoNextLeaf(identifier_);
+    MergeLeafIntoNextLeafIfNotPreprocessor(identifier_);
     HoistOnlyChildPartition(main_node_);
 
     paren_group_ = main_node_;
@@ -2638,7 +2704,7 @@ void TreeUnwrapper::ReshapeTokenPartitions(
       // Push the "import..." down.
       {
         if (partition.Children().size() > 1) {
-          verible::MergeLeafIntoNextLeaf(&partition.Children().front());
+          MergeLeafIntoNextLeafIfNotPreprocessor(&partition.Children().front());
           AttachTrailingSemicolonToPreviousPartition(&partition);
         }
         break;
@@ -2651,7 +2717,7 @@ void TreeUnwrapper::ReshapeTokenPartitions(
       auto& target_instance_partition = partition;
       auto& children = target_instance_partition.Children();
       // Attach ')' to the instance name
-      verible::MergeLeafIntoNextLeaf(PreviousSibling(children.back()));
+      MergeLeafIntoNextLeafIfNotPreprocessor(PreviousSibling(children.back()));
 
       verible::AdjustIndentationRelative(&children.back(), -style.wrap_spaces);
       break;
@@ -2668,7 +2734,7 @@ void TreeUnwrapper::ReshapeTokenPartitions(
         auto& last_prev = *ABSL_DIE_IF_NULL(PreviousSibling(last));
         if (PartitionStartsWithCloseParen(last) &&
             PartitionEndsWithOpenParen(last_prev)) {
-          verible::MergeLeafIntoPreviousLeaf(&last);
+          MergeLeafIntoPreviousLeafIfNotPreprocessor(&last);
         }
       }
       // If there were any parameters or ports at all, expand.
@@ -2688,7 +2754,7 @@ void TreeUnwrapper::ReshapeTokenPartitions(
         auto& last_prev = *ABSL_DIE_IF_NULL(PreviousSibling(last));
         if (PartitionStartsWithCloseParen(last) &&
             PartitionEndsWithOpenParen(last_prev)) {
-          verible::MergeLeafIntoPreviousLeaf(&last);
+          MergeLeafIntoPreviousLeafIfNotPreprocessor(&last);
         }
       }
       break;
@@ -2703,7 +2769,7 @@ void TreeUnwrapper::ReshapeTokenPartitions(
     case NodeEnum::kTaskPrototype: {
       auto& last = RightmostDescendant(partition);
       if (PartitionIsCloseParenSemi(last)) {
-        verible::MergeLeafIntoPreviousLeaf(&last);
+        MergeLeafIntoPreviousLeafIfNotPreprocessor(&last);
       }
       break;
     }
@@ -2746,7 +2812,7 @@ void TreeUnwrapper::ReshapeTokenPartitions(
             auto* group = verible::GroupLeafWithPreviousLeaf(&last);
             group->Value().SetPartitionPolicy(PartitionPolicyEnum::kStack);
           } else {
-            verible::MergeLeafIntoPreviousLeaf(&last);
+            MergeLeafIntoPreviousLeafIfNotPreprocessor(&last);
           }
         }
       }
@@ -2770,7 +2836,7 @@ void TreeUnwrapper::ReshapeTokenPartitions(
             auto* group = verible::GroupLeafWithPreviousLeaf(&last);
             group->Value().SetPartitionPolicy(PartitionPolicyEnum::kStack);
           } else {
-            verible::MergeLeafIntoPreviousLeaf(&last);
+            MergeLeafIntoPreviousLeafIfNotPreprocessor(&last);
           }
         }
       } else if (policy == PartitionPolicyEnum::kStack) {
@@ -2870,7 +2936,7 @@ void TreeUnwrapper::ReshapeTokenPartitions(
       auto& last = RightmostDescendant(partition);
       // TODO(fangism): why does test fail without this clause?
       if (PartitionStartsWithCloseParen(last)) {
-        verible::MergeLeafIntoPreviousLeaf(&last);
+        MergeLeafIntoPreviousLeafIfNotPreprocessor(&last);
       }
       break;
     }
@@ -2880,7 +2946,7 @@ void TreeUnwrapper::ReshapeTokenPartitions(
       if (partition.Children().size() == 2) {
         auto& last = RightmostDescendant(partition);
         if (PartitionIsCloseBrace(last)) {
-          verible::MergeLeafIntoPreviousLeaf(&last);
+          MergeLeafIntoPreviousLeafIfNotPreprocessor(&last);
         }
       }
       break;
@@ -2919,7 +2985,7 @@ void TreeUnwrapper::ReshapeTokenPartitions(
       if (children.size() == 2 &&
           verible::is_leaf(children.front()) /* left side */ &&
           !PartitionIsForcedIntoNewLine(children.back())) {
-        verible::MergeLeafIntoNextLeaf(&children.front());
+        MergeLeafIntoNextLeafIfNotPreprocessor(&children.front());
         VLOG(4) << "after merge leaf (left-into-right):\n" << partition;
       }
       break;
@@ -2948,7 +3014,7 @@ void TreeUnwrapper::ReshapeTokenPartitions(
       AttachSeparatorsToListElementPartitions(&partition);
       // Merge the 'assign' keyword with the (first) x=y assignment.
       // TODO(fangism): reshape for multiple assignments.
-      verible::MergeLeafIntoNextLeaf(&partition.Children().front());
+      MergeLeafIntoNextLeafIfNotPreprocessor(&partition.Children().front());
       VLOG(4) << "after merging 'assign':\n" << partition;
       AdjustSubsequentPartitionsIndentation(&partition, style.wrap_spaces);
       VLOG(4) << "after adjusting partitions indentation:\n" << partition;
@@ -2970,10 +3036,10 @@ void TreeUnwrapper::ReshapeTokenPartitions(
       VLOG(4) << "kForSpec got ';' at child " << dist1 << " and " << dist2;
       // Merge from back-to-front to keep indices valid.
       if (dist2 > 0 && (dist2 - dist1 > 1)) {
-        verible::MergeLeafIntoPreviousLeaf(&*iter2);
+        MergeLeafIntoPreviousLeafIfNotPreprocessor(&*iter2);
       }
       if (dist1 > 0) {
-        verible::MergeLeafIntoPreviousLeaf(&*iter1);
+        MergeLeafIntoPreviousLeafIfNotPreprocessor(&*iter1);
       }
       break;
     }
@@ -3017,7 +3083,7 @@ void TreeUnwrapper::ReshapeTokenPartitions(
         auto& begin_partition = LeftmostDescendant(seq_block_partition);
         VLOG(4) << "begin partition: " << begin_partition;
         CHECK(is_leaf(begin_partition));
-        verible::MergeLeafIntoPreviousLeaf(&begin_partition);
+        MergeLeafIntoPreviousLeafIfNotPreprocessor(&begin_partition);
         VLOG(4) << "after merging 'begin' to predecessor:\n" << partition;
         // Flatten only the statement block so that the control partition
         // can retain its own partition policy.
@@ -3033,11 +3099,11 @@ void TreeUnwrapper::ReshapeTokenPartitions(
 
         // merge "do" <- "begin"
         auto& begin_partition = LeftmostDescendant(seq_block_partition);
-        verible::MergeLeafIntoPreviousLeaf(&begin_partition);
+        MergeLeafIntoPreviousLeafIfNotPreprocessor(&begin_partition);
 
         // merge "end" -> "while"
         auto& end_partition = RightmostDescendant(seq_block_partition);
-        verible::MergeLeafIntoNextLeaf(&end_partition);
+        MergeLeafIntoNextLeafIfNotPreprocessor(&end_partition);
 
         // Flatten only the statement block so that the control partition
         // can retain its own partition policy.
@@ -3051,7 +3117,7 @@ void TreeUnwrapper::ReshapeTokenPartitions(
               ->MatchesTagAnyOf({NodeEnum::kProceduralTimingControlStatement,
                                  NodeEnum::kSeqBlock})) {
         // Merge 'always' keyword with next sibling, and adjust subtree indent.
-        verible::MergeLeafIntoNextLeaf(&partition.Children().front());
+        MergeLeafIntoNextLeafIfNotPreprocessor(&partition.Children().front());
         verible::AdjustIndentationAbsolute(
             &partition.Children().front(),
             partition.Value().IndentationSpaces());
@@ -3184,7 +3250,7 @@ void TreeUnwrapper::Visit(const verible::SyntaxTreeLeaf& leaf) {
   VLOG(4) << "Visit leaf: after CatchUp";
 
   // Possibly start new partition (without advancing token iterator).
-  UpdateInterLeafScanner(tag);
+  UpdateInterLeafScanner(tag, leaf.get().color);
 
   // Sanity check that NextUnfilteredToken() is aligned to the current leaf.
   CHECK_EQ(NextUnfilteredToken()->text().begin(), leaf.get().text().begin());
diff --git a/verilog/formatting/tree_unwrapper.h b/verilog/formatting/tree_unwrapper.h
index 204b975436..24ec9ac9c1 100644
--- a/verilog/formatting/tree_unwrapper.h
+++ b/verilog/formatting/tree_unwrapper.h
@@ -65,7 +65,7 @@ class TreeUnwrapper final : public verible::TreeUnwrapper {
   // Deleted standard interfaces:
   TreeUnwrapper() = delete;
   TreeUnwrapper(const TreeUnwrapper&) = delete;
-  TreeUnwrapper(TreeUnwrapper&&) = delete;
+  TreeUnwrapper(TreeUnwrapper&&) noexcept;
   TreeUnwrapper& operator=(const TreeUnwrapper&) = delete;
   TreeUnwrapper& operator=(TreeUnwrapper&&) = delete;
 
@@ -120,7 +120,7 @@ class TreeUnwrapper final : public verible::TreeUnwrapper {
   void EatSpaces();
 
   // Update token tracking, and possibly start a new partition.
-  void UpdateInterLeafScanner(verilog_tokentype);
+  void UpdateInterLeafScanner(verilog_tokentype, int);
 
   // This should only be called directly from CatchUpToCurrentLeaf and
   // LookAheadBeyondCurrentLeaf.
diff --git a/verilog/formatting/tree_unwrapper_test.cc b/verilog/formatting/tree_unwrapper_test.cc
index 88dde9155a..cc04a71c7a 100644
--- a/verilog/formatting/tree_unwrapper_test.cc
+++ b/verilog/formatting/tree_unwrapper_test.cc
@@ -2886,7 +2886,8 @@ TEST_F(TreeUnwrapperTest, UnwrapModuleTests) {
   for (const auto& test_case : kUnwrapModuleTestCases) {
     VLOG(1) << "Test: " << test_case.test_name;
     auto tree_unwrapper = CreateTreeUnwrapper(test_case.source_code);
-    const auto* uwline_tree = tree_unwrapper->Unwrap();
+    tree_unwrapper->Unwrap();
+    const auto* uwline_tree = tree_unwrapper->UnwrappedLines();
     EXPECT_TRUE(VerifyUnwrappedLines(&std::cout, *ABSL_DIE_IF_NULL(uwline_tree),
                                      test_case));
   }
@@ -3145,7 +3146,8 @@ TEST_F(TreeUnwrapperTest, UnwrapCommentsTests) {
   for (const auto& test_case : kUnwrapCommentsTestCases) {
     VLOG(1) << "Test: " << test_case.test_name;
     auto tree_unwrapper = CreateTreeUnwrapper(test_case.source_code);
-    const auto* uwline_tree = tree_unwrapper->Unwrap();
+    tree_unwrapper->Unwrap();
+    const auto* uwline_tree = tree_unwrapper->UnwrappedLines();
     EXPECT_TRUE(VerifyUnwrappedLines(&std::cout, *ABSL_DIE_IF_NULL(uwline_tree),
                                      test_case))
         << "code:\n"
@@ -3275,7 +3277,8 @@ TEST_F(TreeUnwrapperTest, UnwrapUvmTests) {
   for (const auto& test_case : kUnwrapUvmTestCases) {
     VLOG(1) << "Test: " << test_case.test_name;
     auto tree_unwrapper = CreateTreeUnwrapper(test_case.source_code);
-    const auto* uwline_tree = tree_unwrapper->Unwrap();
+    tree_unwrapper->Unwrap();
+    const auto* uwline_tree = tree_unwrapper->UnwrappedLines();
     EXPECT_TRUE(VerifyUnwrappedLines(&std::cout, *ABSL_DIE_IF_NULL(uwline_tree),
                                      test_case))
         << "code:\n"
@@ -3958,7 +3961,8 @@ TEST_F(TreeUnwrapperTest, UnwrapClassTests) {
   for (const auto& test_case : kClassTestCases) {
     VLOG(1) << "Test: " << test_case.test_name;
     auto tree_unwrapper = CreateTreeUnwrapper(test_case.source_code);
-    const auto* uwline_tree = tree_unwrapper->Unwrap();
+    tree_unwrapper->Unwrap();
+    const auto* uwline_tree = tree_unwrapper->UnwrappedLines();
     EXPECT_TRUE(VerifyUnwrappedLines(&std::cout, *ABSL_DIE_IF_NULL(uwline_tree),
                                      test_case));
   }
@@ -4146,7 +4150,8 @@ const TreeUnwrapperTestData kUnwrapPackageTestCases[] = {
 TEST_F(TreeUnwrapperTest, UnwrapPackageTests) {
   for (const auto& test_case : kUnwrapPackageTestCases) {
     auto tree_unwrapper = CreateTreeUnwrapper(test_case.source_code);
-    const auto* uwline_tree = tree_unwrapper->Unwrap();
+    tree_unwrapper->Unwrap();
+    const auto* uwline_tree = tree_unwrapper->UnwrappedLines();
     EXPECT_TRUE(VerifyUnwrappedLines(&std::cout, *ABSL_DIE_IF_NULL(uwline_tree),
                                      test_case));
   }
@@ -4218,7 +4223,8 @@ const TreeUnwrapperTestData kDescriptionTestCases[] = {
 TEST_F(TreeUnwrapperTest, DescriptionTests) {
   for (const auto& test_case : kDescriptionTestCases) {
     auto tree_unwrapper = CreateTreeUnwrapper(test_case.source_code);
-    const auto* uwline_tree = tree_unwrapper->Unwrap();
+    tree_unwrapper->Unwrap();
+    const auto* uwline_tree = tree_unwrapper->UnwrappedLines();
     EXPECT_TRUE(VerifyUnwrappedLines(&std::cout, *ABSL_DIE_IF_NULL(uwline_tree),
                                      test_case));
   }
@@ -4887,7 +4893,8 @@ const TreeUnwrapperTestData kUnwrapPreprocessorTestCases[] = {
 TEST_F(TreeUnwrapperTest, UnwrapPreprocessorTests) {
   for (const auto& test_case : kUnwrapPreprocessorTestCases) {
     auto tree_unwrapper = CreateTreeUnwrapper(test_case.source_code);
-    const auto* uwline_tree = tree_unwrapper->Unwrap();
+    tree_unwrapper->Unwrap();
+    const auto* uwline_tree = tree_unwrapper->UnwrappedLines();
     EXPECT_TRUE(VerifyUnwrappedLines(&std::cout, *ABSL_DIE_IF_NULL(uwline_tree),
                                      test_case));
   }
@@ -5064,7 +5071,8 @@ const TreeUnwrapperTestData kUnwrapInterfaceTestCases[] = {
 TEST_F(TreeUnwrapperTest, UnwrapInterfaceTests) {
   for (const auto& test_case : kUnwrapInterfaceTestCases) {
     auto tree_unwrapper = CreateTreeUnwrapper(test_case.source_code);
-    const auto* uwline_tree = tree_unwrapper->Unwrap();
+    tree_unwrapper->Unwrap();
+    const auto* uwline_tree = tree_unwrapper->UnwrappedLines();
     EXPECT_TRUE(VerifyUnwrappedLines(&std::cout, *ABSL_DIE_IF_NULL(uwline_tree),
                                      test_case));
   }
@@ -6235,7 +6243,8 @@ const TreeUnwrapperTestData kUnwrapTaskTestCases[] = {
 TEST_F(TreeUnwrapperTest, UnwrapTaskTests) {
   for (const auto& test_case : kUnwrapTaskTestCases) {
     auto tree_unwrapper = CreateTreeUnwrapper(test_case.source_code);
-    const auto* uwline_tree = tree_unwrapper->Unwrap();
+    tree_unwrapper->Unwrap();
+    const auto* uwline_tree = tree_unwrapper->UnwrappedLines();
     EXPECT_TRUE(VerifyUnwrappedLines(&std::cout, *ABSL_DIE_IF_NULL(uwline_tree),
                                      test_case));
   }
@@ -7679,7 +7688,8 @@ TEST_F(TreeUnwrapperTest, UnwrapFunctionTests) {
   for (const auto& test_case : kUnwrapFunctionTestCases) {
     VLOG(4) << "==== kUnwrapFunctionTests ====\n" << test_case.source_code;
     auto tree_unwrapper = CreateTreeUnwrapper(test_case.source_code);
-    const auto* uwline_tree = tree_unwrapper->Unwrap();
+    tree_unwrapper->Unwrap();
+    const auto* uwline_tree = tree_unwrapper->UnwrappedLines();
     EXPECT_TRUE(VerifyUnwrappedLines(&std::cout, *ABSL_DIE_IF_NULL(uwline_tree),
                                      test_case));
   }
@@ -7711,7 +7721,8 @@ const TreeUnwrapperTestData kUnwrapStructTestCases[] = {
 TEST_F(TreeUnwrapperTest, UnwrapStructTests) {
   for (const auto& test_case : kUnwrapStructTestCases) {
     auto tree_unwrapper = CreateTreeUnwrapper(test_case.source_code);
-    const auto* uwline_tree = tree_unwrapper->Unwrap();
+    tree_unwrapper->Unwrap();
+    const auto* uwline_tree = tree_unwrapper->UnwrappedLines();
     EXPECT_TRUE(VerifyUnwrappedLines(&std::cout, *ABSL_DIE_IF_NULL(uwline_tree),
                                      test_case));
   }
@@ -7743,7 +7754,8 @@ const TreeUnwrapperTestData kUnwrapUnionTestCases[] = {
 TEST_F(TreeUnwrapperTest, UnwrapUnionTests) {
   for (const auto& test_case : kUnwrapUnionTestCases) {
     auto tree_unwrapper = CreateTreeUnwrapper(test_case.source_code);
-    const auto* uwline_tree = tree_unwrapper->Unwrap();
+    tree_unwrapper->Unwrap();
+    const auto* uwline_tree = tree_unwrapper->UnwrappedLines();
     EXPECT_TRUE(VerifyUnwrappedLines(&std::cout, *ABSL_DIE_IF_NULL(uwline_tree),
                                      test_case));
   }
@@ -7805,7 +7817,8 @@ const TreeUnwrapperTestData kUnwrapEnumTestCases[] = {
 TEST_F(TreeUnwrapperTest, UnwrapEnumTests) {
   for (const auto& test_case : kUnwrapEnumTestCases) {
     auto tree_unwrapper = CreateTreeUnwrapper(test_case.source_code);
-    const auto* uwline_tree = tree_unwrapper->Unwrap();
+    tree_unwrapper->Unwrap();
+    const auto* uwline_tree = tree_unwrapper->UnwrappedLines();
     EXPECT_TRUE(VerifyUnwrappedLines(&std::cout, *ABSL_DIE_IF_NULL(uwline_tree),
                                      test_case));
   }
@@ -7954,7 +7967,8 @@ const TreeUnwrapperTestData kUnwrapPropertyTestCases[] = {
 TEST_F(TreeUnwrapperTest, UnwrapPropertyTests) {
   for (const auto& test_case : kUnwrapPropertyTestCases) {
     auto tree_unwrapper = CreateTreeUnwrapper(test_case.source_code);
-    const auto* uwline_tree = tree_unwrapper->Unwrap();
+    tree_unwrapper->Unwrap();
+    const auto* uwline_tree = tree_unwrapper->UnwrappedLines();
     EXPECT_TRUE(VerifyUnwrappedLines(&std::cout, *ABSL_DIE_IF_NULL(uwline_tree),
                                      test_case));
   }
@@ -8092,7 +8106,8 @@ const TreeUnwrapperTestData kUnwrapCovergroupTestCases[] = {
 TEST_F(TreeUnwrapperTest, UnwrapCovergroupTests) {
   for (const auto& test_case : kUnwrapCovergroupTestCases) {
     auto tree_unwrapper = CreateTreeUnwrapper(test_case.source_code);
-    const auto* uwline_tree = tree_unwrapper->Unwrap();
+    tree_unwrapper->Unwrap();
+    const auto* uwline_tree = tree_unwrapper->UnwrappedLines();
     EXPECT_TRUE(VerifyUnwrappedLines(&std::cout, *ABSL_DIE_IF_NULL(uwline_tree),
                                      test_case));
   }
@@ -8163,7 +8178,8 @@ const TreeUnwrapperTestData kUnwrapSequenceTestCases[] = {
 TEST_F(TreeUnwrapperTest, UnwrapSequenceTests) {
   for (const auto& test_case : kUnwrapSequenceTestCases) {
     auto tree_unwrapper = CreateTreeUnwrapper(test_case.source_code);
-    const auto* uwline_tree = tree_unwrapper->Unwrap();
+    tree_unwrapper->Unwrap();
+    const auto* uwline_tree = tree_unwrapper->UnwrappedLines();
     EXPECT_TRUE(VerifyUnwrappedLines(&std::cout, *ABSL_DIE_IF_NULL(uwline_tree),
                                      test_case));
   }
@@ -8447,7 +8463,8 @@ const TreeUnwrapperTestData kUnwrapPrimitivesTestCases[] = {
 TEST_F(TreeUnwrapperTest, UnwrapPrimitivesTests) {
   for (const auto& test_case : kUnwrapPrimitivesTestCases) {
     auto tree_unwrapper = CreateTreeUnwrapper(test_case.source_code);
-    const auto* uwline_tree = tree_unwrapper->Unwrap();
+    tree_unwrapper->Unwrap();
+    const auto* uwline_tree = tree_unwrapper->UnwrappedLines();
     EXPECT_TRUE(VerifyUnwrappedLines(&std::cout, *ABSL_DIE_IF_NULL(uwline_tree),
                                      test_case));
   }
diff --git a/verilog/preprocessor/verilog_preprocess.cc b/verilog/preprocessor/verilog_preprocess.cc
index b8e2b21f40..6326a0836f 100644
--- a/verilog/preprocessor/verilog_preprocess.cc
+++ b/verilog/preprocessor/verilog_preprocess.cc
@@ -44,10 +44,13 @@ namespace verilog {
 using verible::TokenGenerator;
 using verible::TokenStreamView;
 using verible::container::FindOrNull;
+using verible::container::FindWithDefault;
 using verible::container::InsertOrUpdate;
 
 VerilogPreprocess::VerilogPreprocess(const Config& config)
-    : VerilogPreprocess(config, nullptr) {}
+    : VerilogPreprocess(config, nullptr) {
+  preprocess_data_.macro_definitions = config_.macro_definitions;
+}
 
 VerilogPreprocess::VerilogPreprocess(const Config& config, FileOpener opener)
     : config_(config), file_opener_(std::move(opener)) {
@@ -55,6 +58,7 @@ VerilogPreprocess::VerilogPreprocess(const Config& config, FileOpener opener)
   // place a toplevel 'conditional' that is always selected.
   // Thus we only need to test in `else and `endif to see if we underrun due
   // to unbalanced statements.
+  preprocess_data_.macro_definitions = config_.macro_definitions;
   conditional_block_.push(
       BranchBlock(true, true, verible::TokenInfo::EOFToken()));
 }
@@ -288,8 +292,8 @@ absl::Status VerilogPreprocess::HandleMacroIdentifier(
 
   // Finding the macro definition.
   const absl::string_view sv = (*iter)->text();
-  const auto* found =
-      FindOrNull(preprocess_data_.macro_definitions, sv.substr(1));
+  const auto found = FindWithDefault(preprocess_data_.macro_definitions,
+                                     sv.substr(1), std::nullopt);
   if (!found) {
     preprocess_data_.errors.emplace_back(
         **iter,
@@ -302,7 +306,7 @@ absl::Status VerilogPreprocess::HandleMacroIdentifier(
     verible::MacroCall macro_call;
     RETURN_IF_ERROR(
         ConsumeAndParseMacroCall(iter, generator, &macro_call, *found));
-    RETURN_IF_ERROR(ExpandMacro(macro_call, found));
+    RETURN_IF_ERROR(ExpandMacro(macro_call, *found));
   }
   auto& lexed = preprocess_data_.lexed_macros_backup.back();
   if (!forward) return absl::OkStatus();
@@ -378,16 +382,16 @@ absl::Status VerilogPreprocess::ExpandText(
 // `MACRO([param1],[param2],...)
 absl::Status VerilogPreprocess::ExpandMacro(
     const verible::MacroCall& macro_call,
-    const verible::MacroDefinition* macro_definition) {
+    const verible::MacroDefinition& macro_definition) {
   const auto& actual_parameters = macro_call.positional_arguments;
 
   std::map<absl::string_view, verible::DefaultTokenInfo> subs_map;
-  if (macro_definition->IsCallable()) {
-    RETURN_IF_ERROR(macro_definition->PopulateSubstitutionMap(actual_parameters,
-                                                              &subs_map));
+  if (macro_definition.IsCallable()) {
+    RETURN_IF_ERROR(
+        macro_definition.PopulateSubstitutionMap(actual_parameters, &subs_map));
   }
 
-  VerilogLexer lexer(macro_definition->DefinitionText().text());
+  VerilogLexer lexer(macro_definition.DefinitionText().text());
   verible::TokenSequence lexed_sequence;
   verible::TokenSequence expanded_lexed_sequence;
   // Populating the lexed token sequence.
@@ -420,7 +424,7 @@ absl::Status VerilogPreprocess::ExpandMacro(
       for (auto& u : expanded_child) expanded_lexed_sequence.push_back(u);
       continue;
     }
-    if (macro_definition->IsCallable()) {
+    if (macro_definition.IsCallable()) {
       // Check if the last token is a formal parameter
       const auto* replacement = FindOrNull(subs_map, last_token.text());
       if (replacement) {
diff --git a/verilog/preprocessor/verilog_preprocess.h b/verilog/preprocessor/verilog_preprocess.h
index 69d9759835..e9c5ce13ca 100644
--- a/verilog/preprocessor/verilog_preprocess.h
+++ b/verilog/preprocessor/verilog_preprocess.h
@@ -69,7 +69,8 @@ struct VerilogPreprocessError {
 // Information that results from preprocessing.
 struct VerilogPreprocessData {
   using MacroDefinition = verible::MacroDefinition;
-  using MacroDefinitionRegistry = std::map<absl::string_view, MacroDefinition>;
+  using MacroDefinitionRegistry =
+      std::map<absl::string_view, std::optional<MacroDefinition>>;
   using TokenSequence = std::vector<verible::TokenInfo>;
 
   // Resulting token stream after preprocessing
@@ -94,6 +95,8 @@ struct VerilogPreprocessData {
 class VerilogPreprocess {
   using TokenStreamView = verible::TokenStreamView;
   using MacroDefinition = verible::MacroDefinition;
+  using MacroDefinitionRegistry =
+      std::map<absl::string_view, std::optional<MacroDefinition>>;
   using MacroParameterInfo = verible::MacroParameterInfo;
 
  public:
@@ -115,7 +118,9 @@ class VerilogPreprocess {
 
     // Expand macro definition bodies, this will relexes the macro body.
     bool expand_macros = false;
-    // TODO(hzeller): Provide a map of command-line provided +define+'s
+
+    MacroDefinitionRegistry macro_definitions;
+    // TODO(hzeller): Provide a way to +define+ from the command line.
   };
 
   explicit VerilogPreprocess(const Config& config);
@@ -182,7 +187,7 @@ class VerilogPreprocess {
   void RegisterMacroDefinition(const MacroDefinition&);
   absl::Status ExpandText(const absl::string_view&);
   absl::Status ExpandMacro(const verible::MacroCall&,
-                           const verible::MacroDefinition*);
+                           const verible::MacroDefinition&);
   absl::Status HandleInclude(TokenStreamView::const_iterator,
                              const StreamIteratorGenerator&);
 
diff --git a/verilog/preprocessor/verilog_preprocess_test.cc b/verilog/preprocessor/verilog_preprocess_test.cc
index cb72019ca2..af609c442c 100644
--- a/verilog/preprocessor/verilog_preprocess_test.cc
+++ b/verilog/preprocessor/verilog_preprocess_test.cc
@@ -37,7 +37,7 @@ namespace {
 using testing::ElementsAre;
 using testing::Pair;
 using testing::StartsWith;
-using verible::container::FindOrNull;
+using verible::container::FindWithDefault;
 using verible::file::CreateDir;
 using verible::file::JoinPath;
 using verible::file::testing::ScopedTestFile;
@@ -171,8 +171,8 @@ TEST(VerilogPreprocessTest, OneMacroDefinitionNoParamsNoValue) {
 
     const auto& definitions = tester.PreprocessorData().macro_definitions;
     EXPECT_THAT(definitions, ElementsAre(Pair("FOOOO", testing::_)));
-    auto macro = FindOrNull(definitions, "FOOOO");
-    ASSERT_NE(macro, nullptr);
+    auto macro = FindWithDefault(definitions, "FOOOO", std::nullopt);
+    ASSERT_TRUE(macro.has_value());
     EXPECT_EQ(macro->DefinitionText().text(), "");
     EXPECT_FALSE(macro->IsCallable());
     EXPECT_TRUE(macro->Parameters().empty());
@@ -187,8 +187,8 @@ TEST(VerilogPreprocessTest, OneMacroDefinitionNoParamsSimpleValue) {
 
   const auto& definitions = tester.PreprocessorData().macro_definitions;
   EXPECT_THAT(definitions, ElementsAre(Pair("FOOOO", testing::_)));
-  auto macro = FindOrNull(definitions, "FOOOO");
-  ASSERT_NE(macro, nullptr);
+  auto macro = FindWithDefault(definitions, "FOOOO", std::nullopt);
+  ASSERT_TRUE(macro.has_value());
   EXPECT_EQ(macro->DefinitionText().text(), "\"bar\"");
   EXPECT_FALSE(macro->IsCallable());
   EXPECT_TRUE(macro->Parameters().empty());
@@ -202,8 +202,8 @@ TEST(VerilogPreprocessTest, OneMacroDefinitionOneParamWithValue) {
 
   const auto& definitions = tester.PreprocessorData().macro_definitions;
   EXPECT_THAT(definitions, ElementsAre(Pair("FOOOO", testing::_)));
-  auto macro = FindOrNull(definitions, "FOOOO");
-  ASSERT_NE(macro, nullptr);
+  auto macro = FindWithDefault(definitions, "FOOOO", std::nullopt);
+  ASSERT_TRUE(macro.has_value());
   EXPECT_EQ(macro->DefinitionText().text(), "(x+1)");
   EXPECT_TRUE(macro->IsCallable());
   const auto& params = macro->Parameters();
@@ -221,8 +221,8 @@ TEST(VerilogPreprocessTest, OneMacroDefinitionOneParamDefaultWithValue) {
 
   const auto& definitions = tester.PreprocessorData().macro_definitions;
   EXPECT_THAT(definitions, ElementsAre(Pair("FOOOO", testing::_)));
-  auto macro = FindOrNull(definitions, "FOOOO");
-  ASSERT_NE(macro, nullptr);
+  auto macro = FindWithDefault(definitions, "FOOOO", std::nullopt);
+  ASSERT_TRUE(macro.has_value());
   EXPECT_EQ(macro->DefinitionText().text(), "(x+3)");
   EXPECT_TRUE(macro->IsCallable());
   const auto& params = macro->Parameters();
@@ -243,15 +243,15 @@ TEST(VerilogPreprocessTest, TwoMacroDefinitions) {
   EXPECT_THAT(definitions, ElementsAre(Pair("BAAAAR", testing::_),
                                        Pair("FOOOO", testing::_)));
   {
-    auto macro = FindOrNull(definitions, "BAAAAR");
-    ASSERT_NE(macro, nullptr);
+    auto macro = FindWithDefault(definitions, "BAAAAR", std::nullopt);
+    ASSERT_TRUE(macro.has_value());
     EXPECT_TRUE(macro->IsCallable());
     const auto& params = macro->Parameters();
     EXPECT_EQ(params.size(), 2);
   }
   {
-    auto macro = FindOrNull(definitions, "FOOOO");
-    ASSERT_NE(macro, nullptr);
+    auto macro = FindWithDefault(definitions, "FOOOO", std::nullopt);
+    ASSERT_TRUE(macro.has_value());
     EXPECT_TRUE(macro->IsCallable());
     const auto& params = macro->Parameters();
     EXPECT_EQ(params.size(), 1);
diff --git a/verilog/tools/formatter/verilog_format.cc b/verilog/tools/formatter/verilog_format.cc
index fd6c514708..3a99f83af3 100644
--- a/verilog/tools/formatter/verilog_format.cc
+++ b/verilog/tools/formatter/verilog_format.cc
@@ -101,6 +101,11 @@ ABSL_FLAG(bool, verify_convergence, true,
           "If true, and not incrementally formatting with --lines, "
           "verify that re-formatting the formatted output yields "
           "no further changes, i.e. formatting is convergent.");
+ABSL_FLAG(bool, preprocess_variants, false,
+          "Experimental feature. If true, formatting is done on multiple "
+          "preprocessed variants of "
+          "the input file and merged together. This can help with some "
+          "preprocessor-related parsing issues.");
 
 ABSL_FLAG(bool, verbose, false, "Be more verbose.");
 
@@ -174,9 +179,9 @@ static bool formatOneFile(absl::string_view filename,
   }
 
   std::ostringstream stream;
-  const auto format_status =
-      FormatVerilog(*content_or, diagnostic_filename, format_style, stream,
-                    lines_to_format, formatter_control);
+  const auto format_status = FormatVerilog(
+      *content_or, diagnostic_filename, format_style, stream, lines_to_format,
+      formatter_control, absl::GetFlag(FLAGS_preprocess_variants));
 
   const std::string& formatted_output(stream.str());
   if (!format_status.ok()) {
diff --git a/verilog/tools/preprocessor/verilog_preprocessor.cc b/verilog/tools/preprocessor/verilog_preprocessor.cc
index bc33adf7f0..09710e5105 100644
--- a/verilog/tools/preprocessor/verilog_preprocessor.cc
+++ b/verilog/tools/preprocessor/verilog_preprocessor.cc
@@ -193,12 +193,7 @@ static absl::Status GenerateVariants(const SubcommandArgsRange& args,
   verible::TokenSequence lexed_sequence;
   for (lexer.DoNextToken(); !lexer.GetLastToken().isEOF();
        lexer.DoNextToken()) {
-    // For now we will store the syntax tree tokens only, ignoring all the
-    // white-space characters. however that should be stored to output the
-    // source code just like it was.
-    if (verilog::VerilogLexer::KeepSyntaxTreeTokens(lexer.GetLastToken())) {
-      lexed_sequence.push_back(lexer.GetLastToken());
-    }
+    lexed_sequence.push_back(lexer.GetLastToken());
   }
 
   // Control flow tree constructing.