Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Interactive functionality #268

Merged
merged 3 commits into from
Oct 11, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,5 @@ tabulate>=0.8.9
torch>=2.2.1
tqdm==4.66.4
transformers>=4.12
rich>=10.0.0
questionary>=1.10.0
262 changes: 125 additions & 137 deletions src/scribe_data/cli/interactive.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,168 +23,156 @@
-->
"""

from scribe_data.cli.cli_utils import (
data_type_metadata,
language_metadata,
)
from typing import List
from pathlib import Path
from rich.console import Console
from rich.table import Table
from rich.panel import Panel
from rich import print as rprint
import questionary
from questionary import Choice
from scribe_data.cli.cli_utils import data_type_metadata, language_metadata
from scribe_data.cli.get import get_data
from scribe_data.utils import (
DEFAULT_CSV_EXPORT_DIR,
DEFAULT_JSON_EXPORT_DIR,
DEFAULT_TSV_EXPORT_DIR,
)


def get_selection(user_input: str, options: list[str]) -> list[str]:
"""
Parse user input to get selected options.

Parameters
----------
user_input : str
The user's input string.

options : List[str]
The list of available options given the interactive mode stage.

Returns
-------
List[str]
The options available in interactive mode and CLI directions.
"""
if user_input.lower() == "a":
return options

try:
indices = [int(i.strip()) - 1 for i in user_input.split(",")]

return [options[i] for i in indices]

except (ValueError, IndexError):
return [opt for opt in user_input.split(",") if opt in options]


def select_languages() -> list[str]:
"""
Display language options and get user selection.

Returns
-------
List[str]
The languages available in Scribe-Data and CLI directions.
"""
print("\nLanguage options:")
languages = [
lang["language"].capitalize() for lang in language_metadata["languages"]
]
for i, lang in enumerate(languages, 1):
print(f"{i}. {lang}")

lang_input = input(
"\nPlease enter the languages to get data for, their numbers or (a) for all languages: "
)
from scribe_data.utils import DEFAULT_JSON_EXPORT_DIR
from scribe_data.cli.version import get_version_message

return get_selection(lang_input, languages)
console = Console()


def select_data_types() -> list[str]:
"""
Display data type options and get user selection.
class ScribeDataConfig:
def __init__(self):
self.languages = [
lang["language"].capitalize() for lang in language_metadata["languages"]
]
self.data_types = list(data_type_metadata["data-types"].keys())
self.selected_languages: List[str] = []
self.selected_data_types: List[str] = []
self.output_type: str = "json"
self.output_dir: Path = Path(DEFAULT_JSON_EXPORT_DIR)
self.overwrite: bool = False

Returns
-------
List[str]
The data types available in Scribe-Data and CLI directions.
"""
print("\nData type options:")
data_types = data_type_metadata["data-types"]

for i, dt in enumerate(data_types, 1):
print(f"{i}. {dt}")
config = ScribeDataConfig()

dt_input = input(
"\nPlease enter the data types to get, their numbers or (a) for all data types: "
)

return get_selection(dt_input, list(data_types.keys()))
# MARK: Display Summary
def display_summary():
table = Table(title="Scribe-Data Configuration Summary")
table.add_column("Setting", style="cyan")
table.add_column("Value", style="magenta")

table.add_row("Languages", ", ".join(config.selected_languages) or "None")
table.add_row("Data Types", ", ".join(config.selected_data_types) or "None")
table.add_row("Output Type", config.output_type)
table.add_row("Output Directory", str(config.output_dir))
table.add_row("Overwrite", "Yes" if config.overwrite else "No")

def get_output_options() -> dict:
"""
Get output options from user.
console.print(table)

Returns
-------
dict
Output options including type, directory, and overwrite flag
"""
valid_types = ["json", "csv", "tsv"]
output_type = (
input("File type to export (json, csv, tsv) [json]: ").strip().lower() or "json"
)

while output_type not in valid_types:
print(
f"Invalid output type '{output_type}'. Please choose from 'json', 'csv', or 'tsv'."
def configure_settings():
# MARK: Language selection
language_choices = ["All"] + config.languages
selected_languages = questionary.checkbox(
"Select languages (use spacebar to select, enter to confirm):",
choices=language_choices,
).ask()

if "All" in selected_languages:
config.selected_languages = config.languages
elif selected_languages:
config.selected_languages = selected_languages
else:
rprint(
"[yellow]Warning: No languages selected. Please select at least one language.[/yellow]"
)
output_type = (
input("File type to export (json, csv, tsv) [json]: ").strip().lower()
or "json"
return configure_settings()

# MARK: Data type selection
data_type_choices = ["All"] + config.data_types
selected_data_types = questionary.checkbox(
"Select data types (use spacebar to select, enter to confirm):",
choices=data_type_choices,
).ask()

if "All" in selected_data_types:
config.selected_data_types = config.data_types
elif selected_data_types:
config.selected_data_types = selected_data_types
else:
rprint(
"[yellow]Warning: No data types selected. Please select at least one data type.[/yellow]"
)
return configure_settings()

if output_type == "csv":
default_export_dir = DEFAULT_CSV_EXPORT_DIR
# MARK: Output type selection
config.output_type = questionary.select(
"Select output type:", choices=["json", "csv", "tsv"]
).ask()

elif output_type == "tsv":
default_export_dir = DEFAULT_TSV_EXPORT_DIR

else:
default_export_dir = DEFAULT_JSON_EXPORT_DIR

output_dir = (
input(f"Export directory path [./{default_export_dir}]: ").strip()
or f"./{default_export_dir}"
)
overwrite = (
input("Overwrite existing data without asking (y/n) [n]: ").strip().lower()
== "y"
config.output_dir = Path(
questionary.text(
"Enter output directory:", default=str(config.output_dir)
).ask()
)

return {"type": output_type, "dir": output_dir, "overwrite": overwrite}
config.overwrite = questionary.confirm(
"Overwrite existing files?", default=config.overwrite
).ask()

display_summary()

def run_interactive_mode():
"""
Run the interactive mode for Scribe-Data CLI.

This function guides the user through selecting languages, data types and output options.
The process is then executed based on these selections.
"""
selected_languages = select_languages()
selected_data_types = select_data_types()
output_options = get_output_options()

print(
f"Data will be exported as {output_options['type'].upper()} files to '{output_options['dir']}'."
def run_export():
if not config.selected_languages or not config.selected_data_types:
rprint(
"[bold red]Error: Please configure languages and data types before running export.[/bold red]"
)
return

# MARK: Exporting data
with console.status("[bold green]Exporting data...[/bold green]") as status:
for language in config.selected_languages:
for data_type in config.selected_data_types:
status.update(
f"[bold green]Exporting {language} {data_type} data...[/bold green]"
)
get_data(
language,
data_type,
config.output_type,
str(config.output_dir),
config.overwrite,
config.output_type,
)
rprint(f"[green]✓[/green] Exported {language} {data_type} data.")

rprint(Panel.fit("[bold green]Data export completed successfully![/bold green]"))


# MARK: Start interactive mode functionality
def start_interactive_mode():
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add docstring here, as well as for other functions.

rprint(
f"[bold green]Welcome to {get_version_message()} interactive mode![/bold green]"
)

for language in selected_languages:
for data_type in selected_data_types:
get_data(
language,
data_type,
output_options["dir"],
output_options["overwrite"],
output_options["type"],
)

while True:
choice = questionary.select(
"What would you like to do?",
choices=[
Choice("Configure settings", "configure"),
Choice("Run data export", "run"),
Choice("Exit", "exit"),
],
).ask()

# This function can be called from main.py when the -i or --interactive flag is used.
def start_interactive_mode():
print("Welcome to Scribe-Data interactive mode!")
run_interactive_mode()
if choice == "configure":
configure_settings()
elif choice == "run":
run_export()
rprint("[bold yellow]Thank you for using Scribe-Data![/bold yellow]")
break


if __name__ == "__main__":
# This allows for testing the interactive mode directly.
start_interactive_mode()
Loading