shashikg · Pranjalya · Dec 28, 2023 · Jan 31, 2024 · Mar 5, 2024 · Mar 19, 2024
diff --git a/whisper_s2t/backends/ctranslate2/model.py b/whisper_s2t/backends/ctranslate2/model.py
@@ -215,7 +215,7 @@ def align_words(self, features, texts, text_tokens, sot_seqs, seq_lens, seg_meta
 
  return word_timings
 
- def generate_segment_batched(self, features, prompts, seq_lens, seg_metadata):
+ def generate_segment_batched(self, features, prompts, seq_lens=None, seg_metadata=None):
 
  if self.device == 'cpu':
  features = np.ascontiguousarray(features.detach().numpy())

diff --git a/whisper_s2t/data.py b/whisper_s2t/data.py
@@ -1,5 +1,6 @@
 import torch
 
+import math
 import numpy as np
 from tqdm import tqdm
 
@@ -63,7 +64,7 @@ def __call__(self, input_file=None, audio_signal=None):
  audio_duration = len(audio_signal)/self.sampling_rate
 
  start_ends = []
- for i in range(0, int(audio_duration), int(self.max_seg_len)):
+ for i in range(0, math.ceil(audio_duration), int(self.max_seg_len)):
  start_ends.append([i, i + self.max_seg_len])
 
  start_ends[-1][1] = min(audio_duration, start_ends[-1][1]) # fix edge