From 5552b7eb8df176894d7866f0be71646337cc8f8f Mon Sep 17 00:00:00 2001
From: Aiden Grossman <aidengrossman@google.com>
Date: Sun, 17 Nov 2024 20:43:42 -0800
Subject: [PATCH] Add option to skip blocks with no loop register during
 annotation (#259)

This patch adds an option to the compile_modules pipeline to skip
emitting blocks that do not have any loop register attached to them
because they use all the GPRs. This is necessary when we want to
benchmark in loop mode.

This is intended to fix an issue where when running the benchmarking
pipeline (which is currently setup to only run in loop mode), we just
pass MCRegister::NoRegister, which causes a machine code verification
error as that is not valid. I will open another patch to fix that at
some point.
---
 gematria/datasets/pipelines/compile_modules.py  |  8 ++++++++
 .../datasets/pipelines/compile_modules_lib.py   | 17 +++++++++++++----
 .../pipelines/compile_modules_lib_test.py       | 15 ++++++++++++++-
 3 files changed, 35 insertions(+), 5 deletions(-)

diff --git a/gematria/datasets/pipelines/compile_modules.py b/gematria/datasets/pipelines/compile_modules.py
index ac89aa34..4cd77487 100644
--- a/gematria/datasets/pipelines/compile_modules.py
+++ b/gematria/datasets/pipelines/compile_modules.py
@@ -64,6 +64,13 @@
     'The maximum number of times to try annotating a block before giving up',
 )
 
+_SKIP_NO_LOOP_REGISTER = flags.DEFINE_bool(
+    'skip_no_loop_register',
+    False,
+    'Whether or not to skip emitting basic blocks for which a loop register'
+    ' cannot be found.',
+)
+
 
 def main(argv) -> None:
   del argv  # Unused.
@@ -77,6 +84,7 @@ def main(argv) -> None:
       ANNOTATOR_MAPPING[_ANNOTATOR_TYPE.value],
       _MAX_ANNOTATION_ATTEMPTS.value,
       _OUTPUT_VOCAB_FILE.value,
+      _SKIP_NO_LOOP_REGISTER.value,
   )
 
   with beam.Pipeline(options=beam_options) as pipeline:
diff --git a/gematria/datasets/pipelines/compile_modules_lib.py b/gematria/datasets/pipelines/compile_modules_lib.py
index 0bc0a6ef..e3a601d3 100644
--- a/gematria/datasets/pipelines/compile_modules_lib.py
+++ b/gematria/datasets/pipelines/compile_modules_lib.py
@@ -167,9 +167,11 @@ def __init__(
       self,
       annotator_type: bhive_to_exegesis.AnnotatorType,
       max_annotation_attempts: int,
+      skip_no_loop_register: bool,
   ):
     self._annotator_type = annotator_type
     self._max_annotation_attempts = max_annotation_attempts
+    self._skip_no_loop_register = skip_no_loop_register
     self._blocks_annotated_successfully = metrics.Metrics.counter(
         _BEAM_METRIC_NAMESPACE_NAME, 'annotate_blocks_success'
     )
@@ -193,10 +195,14 @@ def process(
       print('', file=dummy_file)
 
     try:
+      execution_annotations = self._bhive_to_exegesis.annotate_basic_block(
+          bb_hex, self._annotator_type, self._max_annotation_attempts
+      )
+      if not execution_annotations.HasField('loop_register'):
+        return
+
       yield execution_annotation_pb2.BlockWithExecutionAnnotations(
-          execution_annotations=self._bhive_to_exegesis.annotate_basic_block(
-              bb_hex, self._annotator_type, self._max_annotation_attempts
-          ),
+          execution_annotations=execution_annotations,
           block_hex=bb_hex,
       )
       self._blocks_annotated_successfully.inc()
@@ -231,6 +237,7 @@ def get_bbs(
     annotator_type: bhive_to_exegesis.AnnotatorType,
     max_annotation_attempts: int,
     vocab_output_file: str,
+    skip_no_loop_register: bool,
 ) -> Callable[[beam.Pipeline], None]:
   """Creates a pipeline to process BBs from IR modules.
 
@@ -281,7 +288,9 @@ def pipeline(root: beam.Pipeline) -> None:
         | 'Deduplicate Processed BBs' >> DeduplicateValues()
     )
     annotated_bbs = processed_bbs_deduplicated | 'Annotate BBs' >> beam.ParDo(
-        AnnotateBBs(annotator_type, max_annotation_attempts)
+        AnnotateBBs(
+            annotator_type, max_annotation_attempts, skip_no_loop_register
+        )
     )
     bb_vocab = processed_bbs_deduplicated | 'Get Vocab' >> beam.ParDo(
         GetVocab()
diff --git a/gematria/datasets/pipelines/compile_modules_lib_test.py b/gematria/datasets/pipelines/compile_modules_lib_test.py
index 4db4c3ea..bca93be9 100644
--- a/gematria/datasets/pipelines/compile_modules_lib_test.py
+++ b/gematria/datasets/pipelines/compile_modules_lib_test.py
@@ -94,7 +94,7 @@ def test_deduplicate_values(self):
       bhive_to_exegesis.AnnotatorType.exegesis,
   ])
   def test_annotate_bbs(self, annotator_type):
-    annotator = compile_modules_lib.AnnotateBBs(annotator_type, 50)
+    annotator = compile_modules_lib.AnnotateBBs(annotator_type, 50, False)
     annotator.setup()
 
     annotated_blocks = list(
@@ -103,6 +103,18 @@ def test_annotate_bbs(self, annotator_type):
 
     self.assertLen(annotated_blocks, 1)
 
+  def test_annotate_bbs_no_loop_register(self):
+    annotator = compile_modules_lib.AnnotateBBs(
+        bhive_to_exegesis.AnnotatorType.fast, 50, True
+    )
+    annotator.setup()
+
+    annotated_blocks = list(
+        annotator.process('4889C84889DA4889FE4889EC4D89C84D89DA4D89EC4D89FE')
+    )
+
+    self.assertLen(annotated_blocks, 0)
+
   def test_get_vocab(self):
     get_vocab_function = compile_modules_lib.GetVocab()
     get_vocab_function.setup()
@@ -185,6 +197,7 @@ def test_get_bbs(self, annotator_type):
         annotator_type,
         50,
         vocab_output_file_pattern,
+        False,
     )
 
     with test_pipeline.TestPipeline() as pipeline_under_test: