Skip to content

Commit

Permalink
add support for configuration file
Browse files Browse the repository at this point in the history
Support a `config.ini` file to set default
values for various options and flags, offering
greater control over the application's behavior
  • Loading branch information
kouloumos committed Jun 28, 2024
1 parent f905534 commit eafe28e
Show file tree
Hide file tree
Showing 7 changed files with 68 additions and 25 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,9 @@ ENV/
env.bak/
venv.bak/

# Configurations
config.ini

# Spyder project settings
.spyderproject
.spyproject
Expand Down
20 changes: 18 additions & 2 deletions Readme.md → README.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,23 @@ This transcription tool operates through a structured four-stage process:
- for other users, follow the instruction on
their [site](https://ffmpeg.org/) to install

- To use a specific [configuration profile](#configuration), set the `PROFILE` variable in your `.env` file.


## Configuration

This application supports configuration via a `config.ini` file.
This file allows you to set default values for various options and flags, reducing the need to specify them on the command line every time.
Additionally, the configuration file can include options not available through the command line, offering greater flexibility and control over the application's behavior.

### Creating a Configuration File

An example configuration file named `config.ini.example` is included in the repository.
To use it, copy it to `config.ini` and modify it according to your needs:
```sh
cp config.ini.example config.ini
```

## Install/Uninstall

Navigate to the application directory and run the below commands:
Expand All @@ -68,9 +85,8 @@ Navigate to the application directory and run the below commands:

`source venv/bin/activate` activates the virtual environment

`pip3 install . --use-pep517` to install the application
`pip3 install .` to install the application

To check the version:
`tstbtc --version` view the application version

`tstbtc --help` view the application help
Expand Down
12 changes: 12 additions & 0 deletions app/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import configparser
from dotenv import dotenv_values

def read_config(profile='DEFAULT'):
config = configparser.ConfigParser()
config.read('config.ini')

return config[profile]

# Get the current profile from an environment variable or default to 'DEFAULT'
env = dotenv_values(".env")
config = read_config(env["PROFILE"])
7 changes: 5 additions & 2 deletions app/services/deepgram.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
application,
utils
)
from app.config import config as config_profile
from app.data_writer import DataWriter
from app.logging import get_logger
from app.media_processor import MediaProcessor
Expand All @@ -33,14 +34,15 @@ def __init__(self, summarize, diarize, upload, data_writer: DataWriter):
self.diarize = diarize
self.upload = upload
self.data_writer = data_writer
self.one_sentence_per_line = True
self.one_sentence_per_line = config_profile.getboolean('one_sentence_per_line', True)
self.dev_mode = False # Extra capabilities during development mode
self.max_audio_length = 3600.0 # 60 minutes in seconds
self.processor = MediaProcessor(chunk_length=1200.0)

def audio_to_text(self, audio_file, chunk=None):
language = config_profile.get('language','en')
logger.info(
f"Transcribing audio {f'(chunk {chunk}) ' if chunk else ''}to text using deepgram...")
f"Transcribing audio {f'(chunk {chunk}) ' if chunk else ''}to text using deepgram[{language}]...")
try:
config = dotenv_values(".env")
dg_client = deepgram.Deepgram(config["DEEPGRAM_API_KEY"])
Expand All @@ -57,6 +59,7 @@ def audio_to_text(self, audio_file, chunk=None):
"smart_formatting": True,
"summarize": self.summarize,
"model": "whisper-large",
"language": language,
},
)
audio.close()
Expand Down
12 changes: 12 additions & 0 deletions config.ini.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
[DEFAULT]
deepgram = True
diarize = True
summarize = False
github = none
save_to_markdown = True
noqueue = True
needs_review = False
one_sentence_per_line = True

[development]
verbose_logging = True
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from setuptools import find_packages, setup


with open("Readme.md", "r", encoding="utf-8") as fh:
with open("README.md", "r", encoding="utf-8") as fh:
long_description = fh.read()
with open("requirements.txt", "r", encoding="utf-8") as fh:
requirements = fh.read()
Expand All @@ -15,7 +15,7 @@
description="transcribes youtube videos/media to bitcointranscript",
long_description=long_description,
long_description_content_type="text/markdown",
url="https://github.com/tvpeter/yt2btc",
url="https://github.com/bitcointranscripts/tstbtc",
py_modules=["transcriber"],
packages=find_packages(),
install_requires=[requirements],
Expand Down
35 changes: 16 additions & 19 deletions transcriber.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@
from app import (
__app_name__,
__version__,
commands,
utils
)
from app.commands import queue
from app.config import config
from app.logging import configure_logger, get_logger
from app.transcription import Transcription
from app.types import GitHubMode
Expand All @@ -24,7 +25,6 @@ def print_version(ctx, param, value):
click.echo(f"{__app_name__} v{__version__}")
ctx.exit()


@click.option(
"-v",
"--version",
Expand All @@ -38,14 +38,12 @@ def print_version(ctx, param, value):
def cli():
pass


def print_help(ctx, param, value):
if not value or ctx.resilient_parsing:
return
logging.info(ctx.get_help())
ctx.exit()


whisper = click.option(
"-m",
"--model",
Expand All @@ -62,35 +60,36 @@ def print_help(ctx, param, value):
"large-v2",
]
),
default="tiny.en",
default=config.get('model', 'tiny.en'),
show_default=True,
help="Select which whisper model to use for the transcription",
)
deepgram = click.option(
"-D",
"--deepgram",
is_flag=True,
default=False,
default=config.getboolean('deepgram', False),
help="Use deepgram for transcription",
)
diarize = click.option(
"-M",
"--diarize",
is_flag=True,
default=False,
default=config.getboolean('diarize', False),
help="Supply this flag if you have multiple speakers AKA "
"want to diarize the content",
)
summarize = click.option(
"-S",
"--summarize",
is_flag=True,
default=False,
default=config.getboolean('summarize', False),
help="Summarize the transcript [only available with deepgram]",
)
cutoff_date = click.option(
"--cutoff-date",
type=str,
default=config.get('cutoff_date', None),
help=("Specify a cutoff date (in YYYY-MM-DD format) to process only sources "
"published after this date. Sources with a publication date on or before "
"the cutoff will be excluded from processing. This option is useful for "
Expand All @@ -100,7 +99,7 @@ def print_help(ctx, param, value):
github = click.option(
"--github",
type=click.Choice(["remote", "local", "none"]),
default="none",
default=config.get('github', 'none'),
help=("Specify the GitHub operation mode."
"'remote': Create a new branch, push changes to it, and push it to the origin bitcointranscripts repo. "
"'local': Commit changes to the current local branch without pushing to the remote repo."
Expand All @@ -111,46 +110,46 @@ def print_help(ctx, param, value):
"-u",
"--upload",
is_flag=True,
default=False,
default=config.getboolean('upload_to_s3', False),
help="Upload processed model files to AWS S3",
)
save_to_markdown = click.option(
"--markdown",
is_flag=True,
default=False,
default=config.getboolean('save_to_markdown', False),
help="Save the resulting transcript to a markdown format supported by bitcointranscripts",
)
noqueue = click.option(
"--noqueue",
is_flag=True,
default=False,
default=config.getboolean('noqueue', False),
help="Do not push the resulting transcript to the Queuer backend",
)
needs_review = click.option(
"--needs-review",
is_flag=True,
default=False,
default=config.getboolean('needs_review', False),
help="Add 'needs review' flag to the resulting transcript",
)
model_output_dir = click.option(
"-o",
"--model_output_dir",
type=str,
default="local_models/",
default=config.get('model_output_dir', 'local_models/'),
show_default=True,
help="Set the directory for saving model outputs",
)
nocleanup = click.option(
"--nocleanup",
is_flag=True,
default=False,
default=config.getboolean('nocleanup', False),
help="Do not remove temp files on exit",
)
verbose_logging = click.option(
"-V",
"--verbose",
is_flag=True,
default=False,
default=config.getboolean('verbose_logging', False),
help="Supply this flag to enable verbose logging",
)

Expand Down Expand Up @@ -190,7 +189,6 @@ def print_help(ctx, param, value):
help="Add a category to the transcript's metadata (can be used multiple times)",
)


@cli.command()
@click.argument("source", nargs=1)
# Available transcription models and services
Expand Down Expand Up @@ -462,8 +460,7 @@ def postprocess(
logger.error(e)
traceback.print_exc()


cli.add_command(queue.commands)
cli.add_command(commands.queue)

if __name__ == '__main__':
cli()

0 comments on commit eafe28e

Please sign in to comment.