fix comments

AaltoRSE · Apr 5, 2024 · 5eb50ac · 5eb50ac
1 parent f37f804
commit 5eb50ac
Show file tree

Hide file tree

Showing 2 changed files with 3 additions and 3 deletions.
diff --git a/src/speech2text.py b/src/speech2text.py
@@ -121,7 +121,7 @@ def combine_transcription_and_diarization(transcription_segments,
         }
     """
 
-    # Convert transcription segments to word-level using wav2vec2 alignment
+    # Convert transcription segments so that each segment corresponds to a word
     wav2vec_model_name = settings.wav2vec_models[language] if language in settings.wav2vec_models else None
 
     align_model, align_metadata = whisperx.load_align_model(language,
@@ -135,7 +135,7 @@ def combine_transcription_and_diarization(transcription_segments,
                               settings.compute_device
                               )
 
-    # Combine diarization and word-level transcription segments
+    # Assign speaker to transcribed word segments
     segments = assign_word_speakers(diarization_segments, transcription_segments['segments'])
 
     # Reformat the result (return a dictionary of lists)

diff --git a/src/utils.py b/src/utils.py
@@ -161,7 +161,7 @@ def __call__(
 
 def assign_word_speakers(diarize_df, transcript_segments):
     """
-    This function assigns speakers to words and segments in a transcript based on diarization results.
+    Assign speakers to words and segments in a transcript based on diarization results.
 
     Args:
         diarize_df (pd.DataFrame): The diarization dataframe.