Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Interactive functionality #268

Merged
merged 3 commits into from
Oct 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,5 @@ tabulate>=0.8.9
torch>=2.2.1
tqdm==4.66.4
transformers>=4.12
rich>=10.0.0
questionary>=1.10.0
268 changes: 145 additions & 123 deletions src/scribe_data/cli/interactive.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,169 +23,191 @@
-->
"""

from scribe_data.cli.cli_utils import (
data_type_metadata,
language_metadata,
)
from pathlib import Path
from typing import List

import questionary
from questionary import Choice
from rich import print as rprint
from rich.console import Console
from rich.table import Table

from scribe_data.cli.cli_utils import data_type_metadata, language_metadata
from scribe_data.cli.get import get_data
from scribe_data.utils import (
DEFAULT_CSV_EXPORT_DIR,
DEFAULT_JSON_EXPORT_DIR,
DEFAULT_TSV_EXPORT_DIR,
)
from scribe_data.cli.version import get_version_message
from scribe_data.utils import DEFAULT_JSON_EXPORT_DIR

console = Console()


class ScribeDataConfig:
def __init__(self):
self.languages = [
lang["language"].capitalize() for lang in language_metadata["languages"]
]
self.data_types = list(data_type_metadata["data-types"].keys())
self.selected_languages: List[str] = []
self.selected_data_types: List[str] = []
self.output_type: str = "json"
self.output_dir: Path = Path(DEFAULT_JSON_EXPORT_DIR)
self.overwrite: bool = False

def get_selection(user_input: str, options: list[str]) -> list[str]:
"""
Parse user input to get selected options.

Parameters
----------
user_input : str
The user's input string.
config = ScribeDataConfig()

options : List[str]
The list of available options given the interactive mode stage.

Returns
-------
List[str]
The options available in interactive mode and CLI directions.
# MARK: Summary


def display_summary():
"""
if user_input.lower() == "a":
return options
Displays a summary of the interactive mode request to run.
"""
table = Table(title="Scribe-Data Configuration Summary")

try:
indices = [int(i.strip()) - 1 for i in user_input.split(",")]
table.add_column("Setting", style="cyan")
table.add_column("Value(s)", style="magenta")

return [options[i] for i in indices]
table.add_row("Languages", ", ".join(config.selected_languages) or "None")
table.add_row("Data Types", ", ".join(config.selected_data_types) or "None")
table.add_row("Output Type", config.output_type)
table.add_row("Output Directory", str(config.output_dir))
table.add_row("Overwrite", "Yes" if config.overwrite else "No")

except (ValueError, IndexError):
return [opt for opt in user_input.split(",") if opt in options]
console.print(table)


def select_languages() -> list[str]:
"""
Display language options and get user selection.
def configure_settings():
# MARK: Languages

Returns
-------
List[str]
The languages available in Scribe-Data and CLI directions.
"""
print("\nLanguage options:")
languages = [
lang["language"].capitalize() for lang in language_metadata["languages"]
]
if not config.selected_languages:
language_selected = False
language_choices = ["All"] + config.languages
selected_languages = questionary.checkbox(
message="Select languages and press enter:",
choices=language_choices,
).ask()

for i, lang in enumerate(languages, 1):
print(f"{i}. {lang}")
if "All" in selected_languages:
config.selected_languages = config.languages
language_selected = True

lang_input = input(
"\nPlease enter the languages to get data for, their numbers or (a) for all languages:"
)
elif selected_languages:
config.selected_languages = selected_languages
language_selected = True

return get_selection(lang_input, languages)
else:
rprint(
"[yellow]No language selected. Please select at least one option with space followed by enter.[/yellow]"
)
if questionary.confirm("Continue?").ask():
return configure_settings()

else:
language_selected = True

def select_data_types() -> list[str]:
"""
Display data type options and get user selection.
if language_selected:
# MARK: Data Types

Returns
-------
List[str]
The data types available in Scribe-Data and CLI directions.
"""
print("\nData type options:")
data_types = data_type_metadata["data-types"]
data_type_selected = False
data_type_choices = ["All"] + config.data_types
selected_data_types = questionary.checkbox(
"Select data types and press enter:",
choices=data_type_choices,
).ask()

for i, dt in enumerate(data_types, 1):
print(f"{i}. {dt}")
if "All" in selected_data_types:
config.selected_data_types = config.data_types
data_type_selected = True

dt_input = input(
"\nPlease enter the data types to get, their numbers or (a) for all data types:"
)
elif selected_data_types:
config.selected_data_types = selected_data_types
data_type_selected = True

return get_selection(dt_input, list(data_types.keys()))
else:
rprint(
"[yellow]No data type selected. Please select at least one option with space followed by enter.[/yellow]"
)
if questionary.confirm("Continue?").ask():
return configure_settings()

if data_type_selected:
# MARK: Output Type

def get_output_options() -> dict:
"""
Get output options from user.
config.output_type = questionary.select(
"Select output type:", choices=["json", "csv", "tsv"]
).ask()

Returns
-------
dict
Output options including type, directory, and overwrite flag
"""
valid_types = ["json", "csv", "tsv"]
output_type = (
input("File type to export (json, csv, tsv) [json]: ").strip().lower() or "json"
)
config.output_dir = Path(
questionary.text(
"Enter output directory:", default=str(config.output_dir)
).ask()
)

while output_type not in valid_types:
print(
f"Invalid output type '{output_type}'. Please choose from 'json', 'csv', or 'tsv'."
)
output_type = (
input("File type to export (json, csv, tsv) [json]: ").strip().lower()
or "json"
)
config.overwrite = questionary.confirm(
"Overwrite existing files?", default=config.overwrite
).ask()

if output_type == "csv":
default_export_dir = DEFAULT_CSV_EXPORT_DIR
display_summary()

elif output_type == "tsv":
default_export_dir = DEFAULT_TSV_EXPORT_DIR

else:
default_export_dir = DEFAULT_JSON_EXPORT_DIR
def run_export():
if not config.selected_languages or not config.selected_data_types:
rprint("[bold red]Error: Please configure languages and data types.[/bold red]")
return

output_dir = (
input(f"Export directory path [./{default_export_dir}]: ").strip()
or f"./{default_export_dir}"
)
overwrite = (
input("Overwrite existing data without asking (y/n) [n]: ").strip().lower()
== "y"
)
# MARK: Export Data

return {"type": output_type, "dir": output_dir, "overwrite": overwrite}
with console.status("[bold green]Exporting data...[/bold green]") as status:
for language in config.selected_languages:
for data_type in config.selected_data_types:
status.update(
f"[bold green]Exporting {language} {data_type} data...[/bold green]"
)

get_data(
language=language,
data_type=data_type,
output_type=config.output_type,
output_dir=str(config.output_dir),
overwrite=config.overwrite,
all=config.output_type,
)

def run_interactive_mode():
"""
Run the interactive mode for Scribe-Data CLI.
rprint(f"\n[green]✔[/green] Exported {language} {data_type} data.")

This function guides the user through selecting languages, data types and output options.
The process is then executed based on these selections.
"""
selected_languages = select_languages()
selected_data_types = select_data_types()
output_options = get_output_options()
rprint("[bold green]Data export completed successfully![/bold green]")

print(
f"Data will be exported as {output_options['type'].upper()} files to '{output_options['dir']}'."
)

for language in selected_languages:
for data_type in selected_data_types:
get_data(
language,
data_type,
output_options["dir"],
output_options["overwrite"],
output_options["type"],
)
# MARK: Start


# This function can be called from main.py when the -i or --interactive flag is used.
def start_interactive_mode():
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add docstring here, as well as for other functions.

print("Welcome to Scribe-Data interactive mode!")
run_interactive_mode()
rprint(
f"[bold green]Welcome to {get_version_message()} interactive mode![/bold green]"
)

while True:
choice = questionary.select(
"What would you like to do?",
choices=[
Choice("Configure request", "configure"),
Choice("Run configured data request", "run"),
Choice("Exit", "exit"),
],
).ask()

if choice == "configure":
configure_settings()

elif choice == "run":
run_export()
rprint("[bold cyan]Thank you for using Scribe-Data![/bold cyan]")
break

else:
break


if __name__ == "__main__":
# This allows for testing the interactive mode directly.
start_interactive_mode()
Loading