Skip to content

Commit

Permalink
replace glossary_input() with st.file_uploader
Browse files Browse the repository at this point in the history
better enum for translation models
dont return empty string from file_uploader
  • Loading branch information
devxpy committed Jun 27, 2024
1 parent a513fb3 commit 274f8f7
Show file tree
Hide file tree
Showing 13 changed files with 178 additions and 158 deletions.
24 changes: 15 additions & 9 deletions daras_ai_v2/asr.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,15 +272,21 @@ class AsrOutputFormat(Enum):
vtt = "VTT"


class TranslationModels(Enum):
google = "Google Translate"
ghana_nlp = "Ghana NLP"
class TranslationModel(typing.NamedTuple):
label: str
supports_glossary: bool = False
supports_auto_detect: bool = False

def supports_glossary(self) -> bool:
return self in {self.google}

def supports_auto_detect(self) -> bool:
return self in {self.google}
class TranslationModels(TranslationModel, Enum):
google = TranslationModel(
label="Google Translate",
supports_glossary=True,
supports_auto_detect=True,
)
ghana_nlp = TranslationModel(
label="Ghana NLP Translate",
)


def translation_language_selector(
Expand All @@ -305,7 +311,7 @@ def translation_language_selector(
return st.selectbox(
label=label,
key=key,
format_func=lambda k: languages[k] if k else BLANK_OPTION,
format_func=lang_format_func,
options=options,
**kwargs,
)
Expand Down Expand Up @@ -1071,4 +1077,4 @@ def format_timestamp(seconds: float, always_include_hours: bool, decimal_marker:


def should_translate_lang(code: str) -> bool:
return code and not code.split("-")[0] != "en"
return code and code.split("-")[0] != "en"
35 changes: 13 additions & 22 deletions daras_ai_v2/doc_search_settings_widgets.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,11 @@ def is_user_uploaded_url(url: str) -> bool:
return _user_media_url_prefix in url


def document_uploader(
def bulk_documents_uploader(
label: str,
key: str = "documents",
accept: typing.Iterable[str] = None,
accept_multiple_files=True,
) -> list[str] | str:
) -> list[str]:
st.write(label, className="gui-input")
documents = st.session_state.get(key) or []
if isinstance(documents, str):
Expand All @@ -47,12 +46,8 @@ def document_uploader(
if st.session_state.get(f"__custom_checkbox_{key}"):
if not custom_key in st.session_state:
st.session_state[custom_key] = "\n".join(documents)
if accept_multiple_files:
widget = st.text_area
kwargs = dict(height=150)
else:
widget = st.text_input
kwargs = {}
widget = st.text_area
kwargs = dict(height=150)
text_value = widget(
label,
key=custom_key,
Expand All @@ -66,28 +61,24 @@ def document_uploader(
},
**kwargs,
)
if accept_multiple_files:
st.session_state[key] = filter(None, text_value.strip().splitlines())
else:
st.session_state[key] = text_value
st.session_state[key] = filter(None, text_value.strip().splitlines())
else:
st.session_state.pop(custom_key, None)
st.file_uploader(
label,
label_visibility="collapsed",
key=key,
accept=accept,
accept_multiple_files=accept_multiple_files,
accept_multiple_files=True,
)
st.checkbox("Submit Links in Bulk", key=f"__custom_checkbox_{key}")
documents = st.session_state.get(key, [])
if accept_multiple_files:
try:
documents = list(_expand_gdrive_folders(documents))
except Exception as e:
capture_exception(e)
st.error(f"Error expanding gdrive folders: {e}")
st.session_state[key] = documents
documents = st.session_state.setdefault(key, [])
try:
documents = list(_expand_gdrive_folders(documents))
st.session_state[key] = documents
except Exception as e:
capture_exception(e)
st.error(f"Error expanding gdrive folders: {e}")
st.session_state[custom_key] = "\n".join(documents)
return documents

Expand Down
17 changes: 0 additions & 17 deletions daras_ai_v2/glossary.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,5 @@
from daras_ai_v2.asr import google_translate_target_languages

from daras_ai_v2.doc_search_settings_widgets import (
document_uploader,
SUPPORTED_SPREADSHEET_TYPES,
)


def validate_glossary_document(document: str):
"""
Expand Down Expand Up @@ -40,18 +35,6 @@ def validate_glossary_document(document: str):
)


def glossary_input(
label: str = "##### Glossary",
key: str = "glossary_document",
) -> str:
return document_uploader(
label=label,
key=key,
accept=SUPPORTED_SPREADSHEET_TYPES,
accept_multiple_files=False,
) # type: ignore


def create_glossary(
*,
language_codes: list[str],
Expand Down
2 changes: 1 addition & 1 deletion daras_ai_v2/text_output_widget.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ def text_outputs(
label: str,
*,
key: str = None,
value: str = None,
value: str | list = None,
height: int = 200,
):
value = value or st.session_state.get(key)
Expand Down
8 changes: 4 additions & 4 deletions gooey_ui/components/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -577,7 +577,7 @@ def file_uploader(
label_visibility: LabelVisibility = "visible",
upload_meta: dict = None,
optional: bool = False,
) -> str | list[str]:
) -> str | list[str] | None:
if label_visibility != "visible":
label = None
key = upload_key or key
Expand All @@ -595,14 +595,14 @@ def file_uploader(
label, value=bool(state.session_state.get(key, value)), disabled=disabled
):
state.session_state.pop(key, None)
return ""
return None
label = None
value = state.session_state.setdefault(key, value)
if not value:
if accept_multiple_files:
value = []
else:
value = ""
value = None
state.session_state[key] = value
state.RenderTreeNode(
name="input",
Expand All @@ -618,7 +618,7 @@ def file_uploader(
uploadMeta=upload_meta,
),
).mount()
return value or ""
return value


def json(value: typing.Any, expanded: bool = False, depth: int = 1):
Expand Down
4 changes: 2 additions & 2 deletions recipes/BulkEval.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from daras_ai.image_input import upload_file_from_bytes
from daras_ai_v2.base import BasePage
from daras_ai_v2.doc_search_settings_widgets import (
document_uploader,
bulk_documents_uploader,
SUPPORTED_SPREADSHEET_TYPES,
)
from daras_ai_v2.field_render import field_title_desc
Expand Down Expand Up @@ -199,7 +199,7 @@ class ResponseModel(BaseModel):
aggregations: list[list[AggFunctionResult]] | None

def render_form_v2(self):
files = document_uploader(
files = bulk_documents_uploader(
f"##### {field_title_desc(self.RequestModel, 'documents')}",
accept=SUPPORTED_SPREADSHEET_TYPES,
)
Expand Down
4 changes: 2 additions & 2 deletions recipes/BulkRunner.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from daras_ai_v2.base import BasePage
from daras_ai_v2.breadcrumbs import get_title_breadcrumbs
from daras_ai_v2.doc_search_settings_widgets import (
document_uploader,
bulk_documents_uploader,
SUPPORTED_SPREADSHEET_TYPES,
)
from daras_ai_v2.field_render import field_title_desc
Expand Down Expand Up @@ -104,7 +104,7 @@ def render_form_v2(self):
flatten_dict_key="url",
)

files = document_uploader(
files = bulk_documents_uploader(
f"---\n##### {field_title_desc(self.RequestModel, 'documents')}",
accept=SUPPORTED_SPREADSHEET_TYPES,
)
Expand Down
29 changes: 19 additions & 10 deletions recipes/DocExtract.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,13 @@
import threading
import typing

from daras_ai_v2.field_render import field_title_desc
from daras_ai_v2.pydantic_validation import FieldHttpUrl
import requests
from aifail import retry_if
from django.db.models import IntegerChoices
from furl import furl
from pydantic import BaseModel
from pydantic import BaseModel, Field

import gooey_ui as st
from bots.models import Workflow
Expand All @@ -25,7 +26,10 @@
azure_doc_extract_page_num,
)
from daras_ai_v2.base import BasePage
from daras_ai_v2.doc_search_settings_widgets import document_uploader
from daras_ai_v2.doc_search_settings_widgets import (
bulk_documents_uploader,
SUPPORTED_SPREADSHEET_TYPES,
)
from daras_ai_v2.enum_selector_widget import enum_selector
from daras_ai_v2.exceptions import raise_for_status
from daras_ai_v2.fake_user_agents import FAKE_USER_AGENTS
Expand All @@ -34,7 +38,6 @@
flatapply_parallel,
)
from daras_ai_v2.gdrive_downloader import is_gdrive_url, gdrive_download
from daras_ai_v2.glossary import glossary_input
from daras_ai_v2.language_model import run_language_model, LargeLanguageModels
from daras_ai_v2.language_model_settings_widgets import language_model_settings
from daras_ai_v2.loom_video_widget import youtube_video
Expand Down Expand Up @@ -81,7 +84,11 @@ class RequestModel(BaseModel):
selected_asr_model: typing.Literal[tuple(e.name for e in AsrModels)] | None
# language: str | None
google_translate_target: str | None
glossary_document: FieldHttpUrl | None
glossary_document: FieldHttpUrl | None = Field(
title="Translation Glossary",
description="""Provide a glossary to customize translation and improve accuracy of domain-specific terms.
If not specified or invalid, no glossary will be used. Read about the expected format [here](https://docs.google.com/document/d/1TwzAvFmFYekloRKql2PXNPIyqCbsHRL8ZtnWkzAYrh8/edit?usp=sharing).""",
)

task_instructions: str | None

Expand All @@ -101,7 +108,7 @@ def preview_image(self, state: dict) -> str | None:
return DEFAULT_YOUTUBE_BOT_META_IMG

def render_form_v2(self):
document_uploader(
bulk_documents_uploader(
"#### 🤖 Youtube/PDF/Drive URLs",
accept=("audio/*", "application/pdf", "video/*"),
)
Expand Down Expand Up @@ -132,15 +139,17 @@ def render_settings(self):
height=300,
)
language_model_settings()
"##### Document AI Model"

enum_selector(AsrModels, label="##### ASR Model", key="selected_asr_model")
st.write("---")

google_translate_language_selector()
glossary_input()
st.file_uploader(
label=f"###### {field_title_desc(self.RequestModel, 'glossary_document')}",
key="glossary_document",
accept=SUPPORTED_SPREADSHEET_TYPES,
)
st.write("---")
# enum_selector(
# AsrOutputFormat, label="###### Output Format", key="output_format"
# )

def related_workflows(self) -> list:
from recipes.asr_page import AsrPage
Expand Down
4 changes: 2 additions & 2 deletions recipes/DocSearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from bots.models import Workflow
from daras_ai_v2.base import BasePage
from daras_ai_v2.doc_search_settings_widgets import (
document_uploader,
bulk_documents_uploader,
is_user_uploaded_url,
citation_style_selector,
doc_search_advanced_settings,
Expand Down Expand Up @@ -93,7 +93,7 @@ def get_example_preferred_fields(self, state: dict) -> list[str]:

def render_form_v2(self):
st.text_area("#### Search Query", key="search_query")
document_uploader("#### Documents")
bulk_documents_uploader("#### Documents")
prompt_vars_widget("task_instructions", "query_instructions")

def validate_form_v2(self):
Expand Down
4 changes: 2 additions & 2 deletions recipes/DocSummary.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from daras_ai_v2.asr import AsrModels
from daras_ai_v2.base import BasePage
from daras_ai_v2.doc_search_settings_widgets import (
document_uploader,
bulk_documents_uploader,
)
from daras_ai_v2.functional import map_parallel
from daras_ai_v2.language_model import (
Expand Down Expand Up @@ -91,7 +91,7 @@ def preview_image(self, state: dict) -> str | None:
return DEFAULT_DOC_SUMMARY_META_IMG

def render_form_v2(self):
document_uploader("#### 📎 Documents")
bulk_documents_uploader("#### 📎 Documents")
st.text_area("#### 👩‍💻 Instructions", key="task_instructions")

def render_settings(self):
Expand Down
Loading

0 comments on commit 274f8f7

Please sign in to comment.