From 44254c43ee5c55b8442d0b32f0fc1bdf5e78bcb8 Mon Sep 17 00:00:00 2001
From: Nik <git@mail.vaessen.tech>
Date: Fri, 16 Jun 2023 17:37:07 +0200
Subject: [PATCH] add option to skip correct pairs in visualization (#79)

---
 jiwer/alignment.py      |  8 +++++++-
 pyproject.toml          |  2 +-
 tests/test_alignment.py | 20 ++++++++++++++++++++
 3 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/jiwer/alignment.py b/jiwer/alignment.py
index bb71637..3944b7c 100644
--- a/jiwer/alignment.py
+++ b/jiwer/alignment.py
@@ -30,7 +30,9 @@
 
 
 def visualize_alignment(
-    output: Union[WordOutput, CharacterOutput], show_measures: bool = True
+    output: Union[WordOutput, CharacterOutput],
+    show_measures: bool = True,
+    skip_correct: bool = True,
 ) -> str:
     """
     Visualize the output of [jiwer.process_words][process.process_words] and
@@ -43,6 +45,7 @@ def visualize_alignment(
         output: The processed output of reference and hypothesis pair(s).
         show_measures: If enabled, the visualization will include measures like the WER
                        or CER
+        skip_correct: If enabled, the visualization will exclude correct reference and hypothesis pairs
 
     Returns:
         (str): The visualization as a string
@@ -101,6 +104,9 @@ def visualize_alignment(
 
     final_str = ""
     for idx, (gt, hp, chunks) in enumerate(zip(references, hypothesis, alignment)):
+        if skip_correct and len(chunks) == 1 and chunks[0].type == "equal":
+            continue
+
         final_str += f"sentence {idx+1}\n"
         final_str += _construct_comparison_string(
             gt, hp, chunks, include_space_seperator=not is_cer
diff --git a/pyproject.toml b/pyproject.toml
index 41ca541..7eccb34 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "jiwer"
-version = "3.0.1"
+version = "3.0.2"
 description = "Evaluate your speech-to-text system with similarity measures such as word error rate (WER)"
 authors = ["Nik Vaessen <nikvaes@gmail.com>"]
 readme = "README.md"
diff --git a/tests/test_alignment.py b/tests/test_alignment.py
index 6b30507..e1f52cd 100644
--- a/tests/test_alignment.py
+++ b/tests/test_alignment.py
@@ -103,6 +103,26 @@ def test_multiple_sentences(self):
         )
         self.assertEqual(alignment, correct_alignment)
 
+    def test_skip_correct(self):
+        correct_alignment = (
+            "sentence 2\n"
+            "REF: one\n"
+            "HYP:   1\n"
+            "       S\n"
+            "\n"
+            "sentence 3\n"
+            "REF: two\n"
+            "HYP:   2\n"
+            "       S\n"
+        )
+        alignment = jiwer.visualize_alignment(
+            jiwer.process_words(
+                ["perfect", "one", "two", "three"], ["perfect", "1", "2", "three"]
+            ),
+            show_measures=False,
+        )
+        self.assertEqual(alignment, correct_alignment)
+
 
 class TestAlignmentVisualizationCharacters(unittest.TestCase):
     def test_insertion(self):