Skip to content

Commit

Permalink
Updated the .txt file output to be more readable
Browse files Browse the repository at this point in the history
  • Loading branch information
danielmaclaren committed Sep 29, 2023
1 parent 5a5a338 commit 7aa6fb0
Showing 1 changed file with 17 additions and 3 deletions.
20 changes: 17 additions & 3 deletions interviewkit/transcript.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
print("Please install Whisper: pip install git+https://github.com/openai/whisper.git")
exit(1)

from whisper.utils import get_writer
from pydantic import BaseModel

class Transcript(BaseModel):
Expand All @@ -21,9 +22,22 @@ def main() -> None:

result = model.transcribe((AUDIO_LOCATION + AUDIO_FILENAME), fp16=False)

# Save as a TXT file without any line breaks, name of the file could perhaps be changed with the slicer.py name of audio file
with open((AUDIO_FILENAME + ".txt"), "w", encoding="utf-8") as txt:
txt.write(result["text"])
transcript_output_location = "./"

# Setting some initial options values for the .txt output file
txt_file_options = {
'max_line_width': 50, # the maximum number of characters in a line before breaking the line
'max_line_count': 1, # the maximum number of lines in a segment
'highlight_words': False # underline each word as it is spoken in srt and vtt
}

# Save as a .txt file without line breaks, name of the file could be changed with the slicer.py name of audio file
# with open((AUDIO_FILENAME + ".txt"), "w", encoding="utf-8") as txt:
# txt.write(result["text"])

# Save as a .txt file with hard breaks, added for readability to user
txt_writer = get_writer("txt", transcript_output_location)
txt_writer(result, AUDIO_FILENAME, txt_file_options)

if __name__ == "__main__":
main()

0 comments on commit 7aa6fb0

Please sign in to comment.