lint: black + isort

AaltoRSE · Mar 13, 2024 · f5c7a22 · f5c7a22
1 parent 4d0c41a
commit f5c7a22
Show file tree

Hide file tree

Showing 3 changed files with 36 additions and 32 deletions.
diff --git a/src/speech2text.py b/src/speech2text.py
@@ -266,16 +266,20 @@ def write_alignment_to_txt_file(alignment: dict, output_file_stem: Path):
 def load_whisperx_model(
     name: str,
     language: Optional[str] = None,
-    device: Optional[Union[str, torch.device]] = 'cuda'):
+    device: Optional[Union[str, torch.device]] = "cuda",
+):
     """
     Load a Whisper model in GPU.
 
     Will raise an error if CUDA is not available. This is due to batch_size optimization method in utils.py.
-    The submitted script will run on a GPU node, so this should not be a problem. The only issue is with a 
+    The submitted script will run on a GPU node, so this should not be a problem. The only issue is with a
     hardware failure.
     """
     if not torch.cuda.is_available():
-        raise ValueError("CUDA is not available. Check the hardware failures for " + subprocess.check_output(['hostname']).decode())
+        raise ValueError(
+            "CUDA is not available. Check the hardware failures for "
+            + subprocess.check_output(["hostname"]).decode()
+        )
 
     if name not in settings.available_whisper_models:
         logger.warning(

diff --git a/src/submit.py b/src/submit.py
@@ -7,12 +7,12 @@
 warnings.filterwarnings("ignore")
 
 import argparse
-from argparse import Namespace
 import json
 import os
 import re
 import shlex
 import subprocess
+from argparse import Namespace
 from pathlib import Path, PosixPath
 
 import settings
@@ -73,7 +73,7 @@ def get_argument_parser():
     return parser
 
 
-def get_existing_result_files(input_file: str, output_dir: str) -> 'tuple[list, list]':
+def get_existing_result_files(input_file: str, output_dir: str) -> "tuple[list, list]":
     """
     For the input file or folder, check if the expected result files exist already in the output directory.
 
@@ -106,12 +106,12 @@ def get_existing_result_files(input_file: str, output_dir: str) -> 'tuple[list,
 def parse_job_name(input_path: str) -> Path:
     """
     Convert input file/folder to path object.
-    
+
     Parameters
     ----------
     input_path: str
         The input path for the audio files.
-        
+
     Returns
     -------
     Path
@@ -120,16 +120,15 @@ def parse_job_name(input_path: str) -> Path:
     return Path(input_path).name
 
 
-def parse_output_dir(input_path: str, 
-                     create_if_not_exists: bool = True) -> str:
+def parse_output_dir(input_path: str, create_if_not_exists: bool = True) -> str:
     """
     Create the output directory for the results.
-    
+
     Parameters
     ----------
     input_path: str
         The input path for the audio files.
-        
+
     Returns
     -------
     output_dir: str
@@ -148,10 +147,9 @@ def parse_output_dir(input_path: str,
     return output_dir
 
 
-def create_array_input_file(input_dir: str,
-                            output_dir: str,
-                            job_name: Path, 
-                            tmp_dir) -> str:
+def create_array_input_file(
+    input_dir: str, output_dir: str, job_name: Path, tmp_dir
+) -> str:
     """
     Process the input directory and create a json file with the list of audio files to process.
 
@@ -175,14 +173,16 @@ def create_array_input_file(input_dir: str,
     input_files = []
     for input_file in Path(input_dir).glob("*.*"):
         try:
-            result = subprocess.run(["ffmpeg", "-i", str(input_file)], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+            result = subprocess.run(
+                ["ffmpeg", "-i", str(input_file)],
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+            )
         except subprocess.CalledProcessError as e:
             print(f"Error processing {input_file}: {e}")
             continue
         if "Audio:" not in str(result.stderr):
-            print(
-            f".. {input_file}: Skip since it's not an audio file."
-            )
+            print(f".. {input_file}: Skip since it's not an audio file.")
             continue
         existing, missing = get_existing_result_files(input_file, output_dir)
         if existing and not missing:
@@ -259,13 +259,15 @@ def estimate_job_time(input_path: PosixPath) -> str:
     return add_durations(PIPELINE_LOADING_TIME, audio_processing_time)
 
 
-def create_sbatch_script_for_array_job(input_file: str, 
-                                       job_name: Path, 
-                                       mem: int, 
-                                       cpus_per_task: int, 
-                                       time: str, 
-                                       email: str,
-                                       tmp_dir: str) -> str:
+def create_sbatch_script_for_array_job(
+    input_file: str,
+    job_name: Path,
+    mem: int,
+    cpus_per_task: int,
+    time: str,
+    email: str,
+    tmp_dir: str,
+) -> str:
     """
     Create the sbatch script for the array job.
 
@@ -316,8 +318,7 @@ def create_sbatch_script_for_array_job(input_file: str,
     return tmp_file_sh
 
 
-def submit_dir(args: Namespace, 
-               job_name: Path):
+def submit_dir(args: Namespace, job_name: Path):
     """
     Run sbatch command to submit the job to the cluster.
 
@@ -387,8 +388,7 @@ def create_sbatch_script_for_single_file(
     return tmp_file_sh
 
 
-def submit_file(args: Namespace, 
-                job_name: Path):
+def submit_file(args: Namespace, job_name: Path):
     """
     Run sbatch command to submit the job to the cluster.
 
@@ -500,7 +500,7 @@ def check_whisper_model(name: str) -> bool:
     ----------
     name: str
         The Whisper model to check.
-    
+
     Returns
     -------
     Boolean:

diff --git a/src/utils.py b/src/utils.py
@@ -193,7 +193,7 @@ def calculate_max_batch_size() -> int:
     Parameters
     ----------
     None
-    
+
     Returns
     -------
     batch_size: