From 44254c43ee5c55b8442d0b32f0fc1bdf5e78bcb8 Mon Sep 17 00:00:00 2001 From: Nik Date: Fri, 16 Jun 2023 17:37:07 +0200 Subject: [PATCH] add option to skip correct pairs in visualization (#79) --- jiwer/alignment.py | 8 +++++++- pyproject.toml | 2 +- tests/test_alignment.py | 20 ++++++++++++++++++++ 3 files changed, 28 insertions(+), 2 deletions(-) diff --git a/jiwer/alignment.py b/jiwer/alignment.py index bb71637..3944b7c 100644 --- a/jiwer/alignment.py +++ b/jiwer/alignment.py @@ -30,7 +30,9 @@ def visualize_alignment( - output: Union[WordOutput, CharacterOutput], show_measures: bool = True + output: Union[WordOutput, CharacterOutput], + show_measures: bool = True, + skip_correct: bool = True, ) -> str: """ Visualize the output of [jiwer.process_words][process.process_words] and @@ -43,6 +45,7 @@ def visualize_alignment( output: The processed output of reference and hypothesis pair(s). show_measures: If enabled, the visualization will include measures like the WER or CER + skip_correct: If enabled, the visualization will exclude correct reference and hypothesis pairs Returns: (str): The visualization as a string @@ -101,6 +104,9 @@ def visualize_alignment( final_str = "" for idx, (gt, hp, chunks) in enumerate(zip(references, hypothesis, alignment)): + if skip_correct and len(chunks) == 1 and chunks[0].type == "equal": + continue + final_str += f"sentence {idx+1}\n" final_str += _construct_comparison_string( gt, hp, chunks, include_space_seperator=not is_cer diff --git a/pyproject.toml b/pyproject.toml index 41ca541..7eccb34 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "jiwer" -version = "3.0.1" +version = "3.0.2" description = "Evaluate your speech-to-text system with similarity measures such as word error rate (WER)" authors = ["Nik Vaessen "] readme = "README.md" diff --git a/tests/test_alignment.py b/tests/test_alignment.py index 6b30507..e1f52cd 100644 --- a/tests/test_alignment.py +++ b/tests/test_alignment.py @@ -103,6 +103,26 @@ def test_multiple_sentences(self): ) self.assertEqual(alignment, correct_alignment) + def test_skip_correct(self): + correct_alignment = ( + "sentence 2\n" + "REF: one\n" + "HYP: 1\n" + " S\n" + "\n" + "sentence 3\n" + "REF: two\n" + "HYP: 2\n" + " S\n" + ) + alignment = jiwer.visualize_alignment( + jiwer.process_words( + ["perfect", "one", "two", "three"], ["perfect", "1", "2", "three"] + ), + show_measures=False, + ) + self.assertEqual(alignment, correct_alignment) + class TestAlignmentVisualizationCharacters(unittest.TestCase): def test_insertion(self):