From ea68a0ff14d7275241c5232ea46a4cb09eea4153 Mon Sep 17 00:00:00 2001
From: Teemu Ruokolainen <teemu.p.ruokolainen@gmail.com>
Date: Wed, 21 Feb 2024 13:46:29 +0200
Subject: [PATCH 1/3] update

---
 src/submit.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/submit.py b/src/submit.py
index e120681..8c226a3 100644
--- a/src/submit.py
+++ b/src/submit.py
@@ -240,6 +240,9 @@ def submit_dir(args, job_name):
     cmd = shlex.split(cmd)
     subprocess.run(cmd)
 
+    # Log
+    print(f"Results will be written to folder: {output_dir}\n")
+
 
 def create_sbatch_script_for_single_file(
     input_file, job_name, mem, cpus_per_task, time, email, tmp_dir
@@ -297,6 +300,9 @@ def submit_file(args, job_name):
     cmd = shlex.split(cmd)
     subprocess.run(cmd)
 
+    # Log
+    print(f"Results will be written to folder: {output_dir}\n")
+
 
 def check_language(language):
     supported_languages = list(settings.supported_languages.keys())

From 3c8b74a7e363f55e0158b3ca68dcb959ab56cc57 Mon Sep 17 00:00:00 2001
From: Teemu Ruokolainen <teemu.p.ruokolainen@gmail.com>
Date: Wed, 21 Feb 2024 14:25:57 +0200
Subject: [PATCH 2/3] update

---
 docs/source/user_guide.md |  2 ++
 src/submit.py             | 12 ++++++------
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/docs/source/user_guide.md b/docs/source/user_guide.md
index 95299cb..8f250a9 100644
--- a/docs/source/user_guide.md
+++ b/docs/source/user_guide.md
@@ -54,6 +54,8 @@ Go to [Open On Demand](http://ood.triton.aalto.fi) and log in with your Aalto us
 > 
 > Subsequently, the shell will ask you for a password. This is your Aalto password. Note that your key presses do not show - just write your password and press enter.
 >
+> You can skip the following question about `.zshrc` file creation by pressing "q".
+>
 > Afterwards, you can close this tab. Your Triton account is now fully operational.
 
 
diff --git a/src/submit.py b/src/submit.py
index 8c226a3..d2a6505 100644
--- a/src/submit.py
+++ b/src/submit.py
@@ -235,14 +235,14 @@ def submit_dir(args, job_name):
         args.SPEECH2TEXT_TMP,
     )
 
+    # Log
+    print(f"Results will be written to folder: {output_dir}\n")
+
     # Submit
     cmd = f"sbatch {tmp_file_sh.absolute()}"
     cmd = shlex.split(cmd)
     subprocess.run(cmd)
 
-    # Log
-    print(f"Results will be written to folder: {output_dir}\n")
-
 
 def create_sbatch_script_for_single_file(
     input_file, job_name, mem, cpus_per_task, time, email, tmp_dir
@@ -295,14 +295,14 @@ def submit_file(args, job_name):
         args.SPEECH2TEXT_TMP,
     )
 
+    # Log
+    print(f"Results will be written to folder: {output_dir}\n")
+
     # Submit
     cmd = f"sbatch {tmp_file_sh.absolute()}"
     cmd = shlex.split(cmd)
     subprocess.run(cmd)
 
-    # Log
-    print(f"Results will be written to folder: {output_dir}\n")
-
 
 def check_language(language):
     supported_languages = list(settings.supported_languages.keys())

From a9cbfc494b693acc4fabcfd08dd0483f73c5f314 Mon Sep 17 00:00:00 2001
From: Teemu Ruokolainen <teemu.p.ruokolainen@gmail.com>
Date: Wed, 21 Feb 2024 15:07:36 +0200
Subject: [PATCH 3/3] fix binary help text

---
 bin/speech2text | 74 ++++++++++++++++++-------------------------------
 1 file changed, 27 insertions(+), 47 deletions(-)

diff --git a/bin/speech2text b/bin/speech2text
index cd49e33..4d8c7be 100755
--- a/bin/speech2text
+++ b/bin/speech2text
@@ -2,63 +2,43 @@
 
 usage() {                                    
      cat << EOF
-Aalto speech2text app.
+This app does speech2text with diarization.
 
-Usage:                             
+Example run on a single file: 
 
-0) Load the speech2text app
+    export SPEECH2TEXT_EMAIL=john.smith@aalto.fi
+    export SPEECH2TEXT_LANGUAGE=finnish
+    speech2text audiofile.mp3
 
-Load the speech2text app with
+Example run on a folder containing one or more audio file:
 
-module load speech2text
+    export SPEECH2TEXT_EMAIL=jane.smith@aalto.fi
+    export SPEECH2TEXT_LANGUAGE=finnish
+    speech2text audiofiles/
 
-This needs to be done once every login.
+The audio files can be in any common audio (.wav, .mp3, .aff, etc.) or video (.mp4, .mov, etc.) format.
 
+The speech2text app writes result files to a subfolder results/ next to each audio file.
+Result filenames are the audio filename with .txt and .csv extensions. For example, result files
+corresponding to audiofile.mp3 are written to results/audiofile.txt and results/audiofile.csv.
+Result files in a folder audiofiles/ will be written to folder audiofiles/results/.
 
-1) Set environment variables
+Notification emails will be sent to SPEECH2TEXT_EMAIL. If SPEECH2TEXT_EMAIL is left 
+unspecified, no notifications are sent.
 
-Set email (for Slurm job notifications) and audio language environment variables:
+Supported languages are:
 
-export SPEECH2TEXT_EMAIL=my.name@aalto.fi
-export SPEECH2TEXT_LANGUAGE=my-language
+afrikaans, arabic, armenian, azerbaijani, belarusian, bosnian, bulgarian, catalan, 
+chinese, croatian, czech, danish, dutch, english, estonian, finnish, french, galician, 
+german, greek, hebrew, hindi, hungarian, icelandic, indonesian, italian, japanese, 
+kannada, kazakh, korean, latvian, lithuanian, macedonian, malay, marathi, maori, nepali,
+norwegian, persian, polish, portuguese, romanian, russian, serbian, slovak, slovenian, 
+spanish, swahili, swedish, tagalog, tamil, thai, turkish, ukrainian, urdu, vietnamese, 
+welsh
 
-For example:
-
-export SPEECH2TEXT_EMAIL=john.smith@aalto.fi
-export SPEECH2TEXT_LANGUAGE=finnish
-
-The following variables are already set by the lmod .lua script. They can be ignored by user.
-
-HF_HOME
-TORCH_HOME
-WHISPER_CACHE
-PYANNOTE_CONFIG
-NUMBA_CACHE
-MPLCONFIGDIR
-SPEECH2TEXT_TMP
-SPEECH2TEXT_MEM
-SPEECH2TEXT_CPUS_PER_TASK
-SPEECH2TEXT_TIME
-
-
-2a) Process a single audio file
-
-speech2text audio-file
-
-The audio file can be in any common audio (.wav, .mp3, .aff, etc.) or video (.mp4, .mov, etc.) format.
-The transcription and diarization results (.txt and .csv files) corresponding to each audio file 
-will be written to results/ next to the file.
-
-
-2b) Process multiple audio files in a folder
-
-speech2text audio-files/
-
-The audio file can be in any common audio (.wav, .mp3, .aff, etc.) or video (.mp4, .mov, etc.) format.
-The transcription and diarization results (.txt and .csv files) corresponding to each audio file 
-will be written to audio-files/results.
-
-See also: https://github.com/AaltoRSE/speech2text
+You can leave the language variable SPEECH2TEXT_LANGUAGE unspecified, in which case 
+speech2text tries to detect the language automatically. Specifying the language 
+explicitly is, however, recommended.
 EOF
 }