From ee7a86a8b70e0492bec393edf840f91edc06ed5e Mon Sep 17 00:00:00 2001 From: StephanAkkerman Date: Sat, 4 Jan 2025 11:28:27 +0100 Subject: [PATCH 01/15] Started working on backend restructuring #187 --- README.md | 4 +- {fluentai => backend/fluentai}/__init__.py | 0 .../card_gen => backend/fluentai/api}/api.py | 0 .../fluentai/constants}/__init__.py | 0 .../fluentai}/constants/config.py | 0 .../fluentai}/constants/languages.py | 0 .../utils => backend/fluentai}/logger.py | 0 .../fluentai}/services/__init__.py | 0 .../fluentai/services/imagine}/__init__.py | 0 .../fluentai/services}/imagine/image_gen.py | 0 .../fluentai/services}/imagine/verbal_cue.py | 0 .../fluentai/services}/main.py | 0 .../fluentai/services}/mnemonic/__init__.py | 0 .../mnemonic/imageability}/__init__.py | 0 .../mnemonic/imageability/embeddings.py | 0 .../mnemonic/imageability/imag_models/data.py | 0 .../imageability/imag_models/ensemble.py | 0 .../imageability/imag_models/models.py | 0 .../imageability/imag_models/optimization.py | 0 .../mnemonic/imageability/imageability.py | 0 .../imageability/make_data/complete.py | 0 .../make_data/imageabilitycorpus.py | 0 .../mnemonic/orthographic}/__init__.py | 0 .../services}/mnemonic/orthographic/eval.py | 0 .../mnemonic/orthographic/orthographic.py | 0 .../services/mnemonic/phonetic}/__init__.py | 0 .../services}/mnemonic/phonetic/clts_utils.py | 0 .../services}/mnemonic/phonetic/eval.py | 0 .../services}/mnemonic/phonetic/g2p.py | 0 .../services}/mnemonic/phonetic/ipa2vec.py | 0 .../phonetic/make_data/filter_words.py | 0 .../services}/mnemonic/phonetic/phonetic.py | 0 .../services}/mnemonic/phonetic/utils.py | 0 .../services}/mnemonic/phonetic/vectorizer.py | 0 .../services/mnemonic/semantic}/__init__.py | 0 .../services}/mnemonic/semantic/eval.py | 0 .../mnemonic/semantic/make_data}/__init__.py | 0 .../mnemonic/semantic/make_data/data.py | 0 .../services}/mnemonic/semantic/semantic.py | 0 .../services}/mnemonic/semantic/translator.py | 0 .../services}/mnemonic/word2mnemonic.py | 0 .../fluentai/services}/tts/fallback.py | 0 .../fluentai/services}/tts/tts.py | 0 .../fluentai/utils}/__init__.py | 0 .../fluentai}/utils/fasttext.py | 0 .../fluentai}/utils/lang_codes.py | 0 .../fluentai}/utils/load_models.py | 0 .../gpu.txt => backend/gpu-requirements.txt | 0 pyproject.toml => backend/pyproject.toml | 0 {requirements => backend}/requirements.txt | 0 setup.py => backend/setup.py | 0 .../make_data => backend/tests}/__init__.py | 0 .../tests/test_services}/__init__.py | 0 .../tests/test_services}/test_imageability.py | 0 .../tests/test_services}/test_orthographic.py | 0 .../tests/test_services}/test_phonetic.py | 0 .../tests/test_services}/test_semantic.py | 0 fluentai/anki/anki.py | 308 ------------------ {fluentai/frontend => frontend}/.gitignore | 0 {fluentai/frontend => frontend}/README.md | 0 .../frontend => frontend}/eslint.config.mjs | 0 .../frontend => frontend}/next.config.ts | 0 .../frontend => frontend}/package-lock.json | 0 {fluentai/frontend => frontend}/package.json | 0 .../frontend => frontend}/postcss.config.mjs | 0 .../frontend => frontend}/public/logo.png | Bin .../src/app/api/anki/route.ts | 0 .../src/app/api/createCard.ts | 0 .../src/app/api/index.ts | 0 .../src/app/api/languageService.ts | 0 .../frontend => frontend}/src/app/globals.css | 0 .../frontend => frontend}/src/app/layout.tsx | 0 .../src/app/library/page.tsx | 0 .../frontend => frontend}/src/app/logo.png | Bin .../frontend => frontend}/src/app/page.tsx | 0 .../src/components/CardGenerator.tsx | 0 .../src/components/Flashcard.tsx | 0 .../src/components/FlashcardLibrary.tsx | 0 .../src/components/Header.tsx | 0 .../src/components/SaveToAnki.tsx | 0 .../src/components/StatusChecker.tsx | 0 .../src/components/ui/AutoCompleteInput.tsx | 0 .../src/components/ui/Button.tsx | 0 .../src/components/ui/FormField.tsx | 0 .../src/config/constants.ts | 0 .../src/interfaces/AnkiInterface.ts | 0 .../src/interfaces/CardInterfaces.ts | 0 .../src/interfaces/LanguageInterface.ts | 0 .../src/interfaces/ModelInterface.ts | 0 .../src/services/anki/AnkiConnect.ts | 0 .../src/services/anki/ankiService.ts | 0 .../src/services/modelService.ts | 0 .../frontend => frontend}/tailwind.config.ts | 0 {fluentai/frontend => frontend}/tsconfig.json | 0 package-lock.json | 6 - package.json | 1 - tests/__init__.py | 0 tests/card_gen/__init__.py | 0 tests/vocab/__init__.py | 0 99 files changed, 2 insertions(+), 317 deletions(-) rename {fluentai => backend/fluentai}/__init__.py (100%) rename {fluentai/services/card_gen => backend/fluentai/api}/api.py (100%) rename {fluentai/anki => backend/fluentai/constants}/__init__.py (100%) rename {fluentai/services/card_gen => backend/fluentai}/constants/config.py (100%) rename {fluentai/services/card_gen => backend/fluentai}/constants/languages.py (100%) rename {fluentai/services/card_gen/utils => backend/fluentai}/logger.py (100%) rename {fluentai => backend/fluentai}/services/__init__.py (100%) rename {fluentai/services/card_gen => backend/fluentai/services/imagine}/__init__.py (100%) rename {fluentai/services/card_gen => backend/fluentai/services}/imagine/image_gen.py (100%) rename {fluentai/services/card_gen => backend/fluentai/services}/imagine/verbal_cue.py (100%) rename {fluentai/services/card_gen => backend/fluentai/services}/main.py (100%) rename {fluentai/services/card_gen => backend/fluentai/services}/mnemonic/__init__.py (100%) rename {fluentai/services/card_gen/constants => backend/fluentai/services/mnemonic/imageability}/__init__.py (100%) rename {fluentai/services/card_gen => backend/fluentai/services}/mnemonic/imageability/embeddings.py (100%) rename {fluentai/services/card_gen => backend/fluentai/services}/mnemonic/imageability/imag_models/data.py (100%) rename {fluentai/services/card_gen => backend/fluentai/services}/mnemonic/imageability/imag_models/ensemble.py (100%) rename {fluentai/services/card_gen => backend/fluentai/services}/mnemonic/imageability/imag_models/models.py (100%) rename {fluentai/services/card_gen => backend/fluentai/services}/mnemonic/imageability/imag_models/optimization.py (100%) rename {fluentai/services/card_gen => backend/fluentai/services}/mnemonic/imageability/imageability.py (100%) rename {fluentai/services/card_gen => backend/fluentai/services}/mnemonic/imageability/make_data/complete.py (100%) rename {fluentai/services/card_gen => backend/fluentai/services}/mnemonic/imageability/make_data/imageabilitycorpus.py (100%) rename {fluentai/services/card_gen/imagine => backend/fluentai/services/mnemonic/orthographic}/__init__.py (100%) rename {fluentai/services/card_gen => backend/fluentai/services}/mnemonic/orthographic/eval.py (100%) rename {fluentai/services/card_gen => backend/fluentai/services}/mnemonic/orthographic/orthographic.py (100%) rename {fluentai/services/card_gen/mnemonic/imageability => backend/fluentai/services/mnemonic/phonetic}/__init__.py (100%) rename {fluentai/services/card_gen => backend/fluentai/services}/mnemonic/phonetic/clts_utils.py (100%) rename {fluentai/services/card_gen => backend/fluentai/services}/mnemonic/phonetic/eval.py (100%) rename {fluentai/services/card_gen => backend/fluentai/services}/mnemonic/phonetic/g2p.py (100%) rename {fluentai/services/card_gen => backend/fluentai/services}/mnemonic/phonetic/ipa2vec.py (100%) rename {fluentai/services/card_gen => backend/fluentai/services}/mnemonic/phonetic/make_data/filter_words.py (100%) rename {fluentai/services/card_gen => backend/fluentai/services}/mnemonic/phonetic/phonetic.py (100%) rename {fluentai/services/card_gen => backend/fluentai/services}/mnemonic/phonetic/utils.py (100%) rename {fluentai/services/card_gen => backend/fluentai/services}/mnemonic/phonetic/vectorizer.py (100%) rename {fluentai/services/card_gen/mnemonic/orthographic => backend/fluentai/services/mnemonic/semantic}/__init__.py (100%) rename {fluentai/services/card_gen => backend/fluentai/services}/mnemonic/semantic/eval.py (100%) rename {fluentai/services/card_gen/mnemonic/phonetic => backend/fluentai/services/mnemonic/semantic/make_data}/__init__.py (100%) rename {fluentai/services/card_gen => backend/fluentai/services}/mnemonic/semantic/make_data/data.py (100%) rename {fluentai/services/card_gen => backend/fluentai/services}/mnemonic/semantic/semantic.py (100%) rename {fluentai/services/card_gen => backend/fluentai/services}/mnemonic/semantic/translator.py (100%) rename {fluentai/services/card_gen => backend/fluentai/services}/mnemonic/word2mnemonic.py (100%) rename {fluentai/services/card_gen => backend/fluentai/services}/tts/fallback.py (100%) rename {fluentai/services/card_gen => backend/fluentai/services}/tts/tts.py (100%) rename {fluentai/services/card_gen/mnemonic/semantic => backend/fluentai/utils}/__init__.py (100%) rename {fluentai/services/card_gen => backend/fluentai}/utils/fasttext.py (100%) rename {fluentai/services/card_gen => backend/fluentai}/utils/lang_codes.py (100%) rename {fluentai/services/card_gen => backend/fluentai}/utils/load_models.py (100%) rename requirements/gpu.txt => backend/gpu-requirements.txt (100%) rename pyproject.toml => backend/pyproject.toml (100%) rename {requirements => backend}/requirements.txt (100%) rename setup.py => backend/setup.py (100%) rename {fluentai/services/card_gen/mnemonic/semantic/make_data => backend/tests}/__init__.py (100%) rename {fluentai/services/card_gen/utils => backend/tests/test_services}/__init__.py (100%) rename {tests/card_gen => backend/tests/test_services}/test_imageability.py (100%) rename {tests/card_gen => backend/tests/test_services}/test_orthographic.py (100%) rename {tests/card_gen => backend/tests/test_services}/test_phonetic.py (100%) rename {tests/card_gen => backend/tests/test_services}/test_semantic.py (100%) delete mode 100644 fluentai/anki/anki.py rename {fluentai/frontend => frontend}/.gitignore (100%) rename {fluentai/frontend => frontend}/README.md (100%) rename {fluentai/frontend => frontend}/eslint.config.mjs (100%) rename {fluentai/frontend => frontend}/next.config.ts (100%) rename {fluentai/frontend => frontend}/package-lock.json (100%) rename {fluentai/frontend => frontend}/package.json (100%) rename {fluentai/frontend => frontend}/postcss.config.mjs (100%) rename {fluentai/frontend => frontend}/public/logo.png (100%) rename {fluentai/frontend => frontend}/src/app/api/anki/route.ts (100%) rename {fluentai/frontend => frontend}/src/app/api/createCard.ts (100%) rename {fluentai/frontend => frontend}/src/app/api/index.ts (100%) rename {fluentai/frontend => frontend}/src/app/api/languageService.ts (100%) rename {fluentai/frontend => frontend}/src/app/globals.css (100%) rename {fluentai/frontend => frontend}/src/app/layout.tsx (100%) rename {fluentai/frontend => frontend}/src/app/library/page.tsx (100%) rename {fluentai/frontend => frontend}/src/app/logo.png (100%) rename {fluentai/frontend => frontend}/src/app/page.tsx (100%) rename {fluentai/frontend => frontend}/src/components/CardGenerator.tsx (100%) rename {fluentai/frontend => frontend}/src/components/Flashcard.tsx (100%) rename {fluentai/frontend => frontend}/src/components/FlashcardLibrary.tsx (100%) rename {fluentai/frontend => frontend}/src/components/Header.tsx (100%) rename {fluentai/frontend => frontend}/src/components/SaveToAnki.tsx (100%) rename {fluentai/frontend => frontend}/src/components/StatusChecker.tsx (100%) rename {fluentai/frontend => frontend}/src/components/ui/AutoCompleteInput.tsx (100%) rename {fluentai/frontend => frontend}/src/components/ui/Button.tsx (100%) rename {fluentai/frontend => frontend}/src/components/ui/FormField.tsx (100%) rename {fluentai/frontend => frontend}/src/config/constants.ts (100%) rename {fluentai/frontend => frontend}/src/interfaces/AnkiInterface.ts (100%) rename {fluentai/frontend => frontend}/src/interfaces/CardInterfaces.ts (100%) rename {fluentai/frontend => frontend}/src/interfaces/LanguageInterface.ts (100%) rename {fluentai/frontend => frontend}/src/interfaces/ModelInterface.ts (100%) rename {fluentai/frontend => frontend}/src/services/anki/AnkiConnect.ts (100%) rename {fluentai/frontend => frontend}/src/services/anki/ankiService.ts (100%) rename {fluentai/frontend => frontend}/src/services/modelService.ts (100%) rename {fluentai/frontend => frontend}/tailwind.config.ts (100%) rename {fluentai/frontend => frontend}/tsconfig.json (100%) delete mode 100644 package-lock.json delete mode 100644 package.json delete mode 100644 tests/__init__.py delete mode 100644 tests/card_gen/__init__.py delete mode 100644 tests/vocab/__init__.py diff --git a/README.md b/README.md index 0483adc..591040b 100644 --- a/README.md +++ b/README.md @@ -81,7 +81,7 @@ If you prefer to build from source, follow these steps: 3. Install the dependencies: ```bash - pip install -r requirements.txt + pip install -r backend/requirements.txt ``` ### Install with GPU Support (Recommended) @@ -90,7 +90,7 @@ If you would like to run the code on a GPU, you can install the `torch` package After installing the required dependencies, run the following command: ```bash -pip install -r requirements/gpu.txt +pip install -r backend/gpu-requirements.txt ``` ## Usage ⌨️ diff --git a/fluentai/__init__.py b/backend/fluentai/__init__.py similarity index 100% rename from fluentai/__init__.py rename to backend/fluentai/__init__.py diff --git a/fluentai/services/card_gen/api.py b/backend/fluentai/api/api.py similarity index 100% rename from fluentai/services/card_gen/api.py rename to backend/fluentai/api/api.py diff --git a/fluentai/anki/__init__.py b/backend/fluentai/constants/__init__.py similarity index 100% rename from fluentai/anki/__init__.py rename to backend/fluentai/constants/__init__.py diff --git a/fluentai/services/card_gen/constants/config.py b/backend/fluentai/constants/config.py similarity index 100% rename from fluentai/services/card_gen/constants/config.py rename to backend/fluentai/constants/config.py diff --git a/fluentai/services/card_gen/constants/languages.py b/backend/fluentai/constants/languages.py similarity index 100% rename from fluentai/services/card_gen/constants/languages.py rename to backend/fluentai/constants/languages.py diff --git a/fluentai/services/card_gen/utils/logger.py b/backend/fluentai/logger.py similarity index 100% rename from fluentai/services/card_gen/utils/logger.py rename to backend/fluentai/logger.py diff --git a/fluentai/services/__init__.py b/backend/fluentai/services/__init__.py similarity index 100% rename from fluentai/services/__init__.py rename to backend/fluentai/services/__init__.py diff --git a/fluentai/services/card_gen/__init__.py b/backend/fluentai/services/imagine/__init__.py similarity index 100% rename from fluentai/services/card_gen/__init__.py rename to backend/fluentai/services/imagine/__init__.py diff --git a/fluentai/services/card_gen/imagine/image_gen.py b/backend/fluentai/services/imagine/image_gen.py similarity index 100% rename from fluentai/services/card_gen/imagine/image_gen.py rename to backend/fluentai/services/imagine/image_gen.py diff --git a/fluentai/services/card_gen/imagine/verbal_cue.py b/backend/fluentai/services/imagine/verbal_cue.py similarity index 100% rename from fluentai/services/card_gen/imagine/verbal_cue.py rename to backend/fluentai/services/imagine/verbal_cue.py diff --git a/fluentai/services/card_gen/main.py b/backend/fluentai/services/main.py similarity index 100% rename from fluentai/services/card_gen/main.py rename to backend/fluentai/services/main.py diff --git a/fluentai/services/card_gen/mnemonic/__init__.py b/backend/fluentai/services/mnemonic/__init__.py similarity index 100% rename from fluentai/services/card_gen/mnemonic/__init__.py rename to backend/fluentai/services/mnemonic/__init__.py diff --git a/fluentai/services/card_gen/constants/__init__.py b/backend/fluentai/services/mnemonic/imageability/__init__.py similarity index 100% rename from fluentai/services/card_gen/constants/__init__.py rename to backend/fluentai/services/mnemonic/imageability/__init__.py diff --git a/fluentai/services/card_gen/mnemonic/imageability/embeddings.py b/backend/fluentai/services/mnemonic/imageability/embeddings.py similarity index 100% rename from fluentai/services/card_gen/mnemonic/imageability/embeddings.py rename to backend/fluentai/services/mnemonic/imageability/embeddings.py diff --git a/fluentai/services/card_gen/mnemonic/imageability/imag_models/data.py b/backend/fluentai/services/mnemonic/imageability/imag_models/data.py similarity index 100% rename from fluentai/services/card_gen/mnemonic/imageability/imag_models/data.py rename to backend/fluentai/services/mnemonic/imageability/imag_models/data.py diff --git a/fluentai/services/card_gen/mnemonic/imageability/imag_models/ensemble.py b/backend/fluentai/services/mnemonic/imageability/imag_models/ensemble.py similarity index 100% rename from fluentai/services/card_gen/mnemonic/imageability/imag_models/ensemble.py rename to backend/fluentai/services/mnemonic/imageability/imag_models/ensemble.py diff --git a/fluentai/services/card_gen/mnemonic/imageability/imag_models/models.py b/backend/fluentai/services/mnemonic/imageability/imag_models/models.py similarity index 100% rename from fluentai/services/card_gen/mnemonic/imageability/imag_models/models.py rename to backend/fluentai/services/mnemonic/imageability/imag_models/models.py diff --git a/fluentai/services/card_gen/mnemonic/imageability/imag_models/optimization.py b/backend/fluentai/services/mnemonic/imageability/imag_models/optimization.py similarity index 100% rename from fluentai/services/card_gen/mnemonic/imageability/imag_models/optimization.py rename to backend/fluentai/services/mnemonic/imageability/imag_models/optimization.py diff --git a/fluentai/services/card_gen/mnemonic/imageability/imageability.py b/backend/fluentai/services/mnemonic/imageability/imageability.py similarity index 100% rename from fluentai/services/card_gen/mnemonic/imageability/imageability.py rename to backend/fluentai/services/mnemonic/imageability/imageability.py diff --git a/fluentai/services/card_gen/mnemonic/imageability/make_data/complete.py b/backend/fluentai/services/mnemonic/imageability/make_data/complete.py similarity index 100% rename from fluentai/services/card_gen/mnemonic/imageability/make_data/complete.py rename to backend/fluentai/services/mnemonic/imageability/make_data/complete.py diff --git a/fluentai/services/card_gen/mnemonic/imageability/make_data/imageabilitycorpus.py b/backend/fluentai/services/mnemonic/imageability/make_data/imageabilitycorpus.py similarity index 100% rename from fluentai/services/card_gen/mnemonic/imageability/make_data/imageabilitycorpus.py rename to backend/fluentai/services/mnemonic/imageability/make_data/imageabilitycorpus.py diff --git a/fluentai/services/card_gen/imagine/__init__.py b/backend/fluentai/services/mnemonic/orthographic/__init__.py similarity index 100% rename from fluentai/services/card_gen/imagine/__init__.py rename to backend/fluentai/services/mnemonic/orthographic/__init__.py diff --git a/fluentai/services/card_gen/mnemonic/orthographic/eval.py b/backend/fluentai/services/mnemonic/orthographic/eval.py similarity index 100% rename from fluentai/services/card_gen/mnemonic/orthographic/eval.py rename to backend/fluentai/services/mnemonic/orthographic/eval.py diff --git a/fluentai/services/card_gen/mnemonic/orthographic/orthographic.py b/backend/fluentai/services/mnemonic/orthographic/orthographic.py similarity index 100% rename from fluentai/services/card_gen/mnemonic/orthographic/orthographic.py rename to backend/fluentai/services/mnemonic/orthographic/orthographic.py diff --git a/fluentai/services/card_gen/mnemonic/imageability/__init__.py b/backend/fluentai/services/mnemonic/phonetic/__init__.py similarity index 100% rename from fluentai/services/card_gen/mnemonic/imageability/__init__.py rename to backend/fluentai/services/mnemonic/phonetic/__init__.py diff --git a/fluentai/services/card_gen/mnemonic/phonetic/clts_utils.py b/backend/fluentai/services/mnemonic/phonetic/clts_utils.py similarity index 100% rename from fluentai/services/card_gen/mnemonic/phonetic/clts_utils.py rename to backend/fluentai/services/mnemonic/phonetic/clts_utils.py diff --git a/fluentai/services/card_gen/mnemonic/phonetic/eval.py b/backend/fluentai/services/mnemonic/phonetic/eval.py similarity index 100% rename from fluentai/services/card_gen/mnemonic/phonetic/eval.py rename to backend/fluentai/services/mnemonic/phonetic/eval.py diff --git a/fluentai/services/card_gen/mnemonic/phonetic/g2p.py b/backend/fluentai/services/mnemonic/phonetic/g2p.py similarity index 100% rename from fluentai/services/card_gen/mnemonic/phonetic/g2p.py rename to backend/fluentai/services/mnemonic/phonetic/g2p.py diff --git a/fluentai/services/card_gen/mnemonic/phonetic/ipa2vec.py b/backend/fluentai/services/mnemonic/phonetic/ipa2vec.py similarity index 100% rename from fluentai/services/card_gen/mnemonic/phonetic/ipa2vec.py rename to backend/fluentai/services/mnemonic/phonetic/ipa2vec.py diff --git a/fluentai/services/card_gen/mnemonic/phonetic/make_data/filter_words.py b/backend/fluentai/services/mnemonic/phonetic/make_data/filter_words.py similarity index 100% rename from fluentai/services/card_gen/mnemonic/phonetic/make_data/filter_words.py rename to backend/fluentai/services/mnemonic/phonetic/make_data/filter_words.py diff --git a/fluentai/services/card_gen/mnemonic/phonetic/phonetic.py b/backend/fluentai/services/mnemonic/phonetic/phonetic.py similarity index 100% rename from fluentai/services/card_gen/mnemonic/phonetic/phonetic.py rename to backend/fluentai/services/mnemonic/phonetic/phonetic.py diff --git a/fluentai/services/card_gen/mnemonic/phonetic/utils.py b/backend/fluentai/services/mnemonic/phonetic/utils.py similarity index 100% rename from fluentai/services/card_gen/mnemonic/phonetic/utils.py rename to backend/fluentai/services/mnemonic/phonetic/utils.py diff --git a/fluentai/services/card_gen/mnemonic/phonetic/vectorizer.py b/backend/fluentai/services/mnemonic/phonetic/vectorizer.py similarity index 100% rename from fluentai/services/card_gen/mnemonic/phonetic/vectorizer.py rename to backend/fluentai/services/mnemonic/phonetic/vectorizer.py diff --git a/fluentai/services/card_gen/mnemonic/orthographic/__init__.py b/backend/fluentai/services/mnemonic/semantic/__init__.py similarity index 100% rename from fluentai/services/card_gen/mnemonic/orthographic/__init__.py rename to backend/fluentai/services/mnemonic/semantic/__init__.py diff --git a/fluentai/services/card_gen/mnemonic/semantic/eval.py b/backend/fluentai/services/mnemonic/semantic/eval.py similarity index 100% rename from fluentai/services/card_gen/mnemonic/semantic/eval.py rename to backend/fluentai/services/mnemonic/semantic/eval.py diff --git a/fluentai/services/card_gen/mnemonic/phonetic/__init__.py b/backend/fluentai/services/mnemonic/semantic/make_data/__init__.py similarity index 100% rename from fluentai/services/card_gen/mnemonic/phonetic/__init__.py rename to backend/fluentai/services/mnemonic/semantic/make_data/__init__.py diff --git a/fluentai/services/card_gen/mnemonic/semantic/make_data/data.py b/backend/fluentai/services/mnemonic/semantic/make_data/data.py similarity index 100% rename from fluentai/services/card_gen/mnemonic/semantic/make_data/data.py rename to backend/fluentai/services/mnemonic/semantic/make_data/data.py diff --git a/fluentai/services/card_gen/mnemonic/semantic/semantic.py b/backend/fluentai/services/mnemonic/semantic/semantic.py similarity index 100% rename from fluentai/services/card_gen/mnemonic/semantic/semantic.py rename to backend/fluentai/services/mnemonic/semantic/semantic.py diff --git a/fluentai/services/card_gen/mnemonic/semantic/translator.py b/backend/fluentai/services/mnemonic/semantic/translator.py similarity index 100% rename from fluentai/services/card_gen/mnemonic/semantic/translator.py rename to backend/fluentai/services/mnemonic/semantic/translator.py diff --git a/fluentai/services/card_gen/mnemonic/word2mnemonic.py b/backend/fluentai/services/mnemonic/word2mnemonic.py similarity index 100% rename from fluentai/services/card_gen/mnemonic/word2mnemonic.py rename to backend/fluentai/services/mnemonic/word2mnemonic.py diff --git a/fluentai/services/card_gen/tts/fallback.py b/backend/fluentai/services/tts/fallback.py similarity index 100% rename from fluentai/services/card_gen/tts/fallback.py rename to backend/fluentai/services/tts/fallback.py diff --git a/fluentai/services/card_gen/tts/tts.py b/backend/fluentai/services/tts/tts.py similarity index 100% rename from fluentai/services/card_gen/tts/tts.py rename to backend/fluentai/services/tts/tts.py diff --git a/fluentai/services/card_gen/mnemonic/semantic/__init__.py b/backend/fluentai/utils/__init__.py similarity index 100% rename from fluentai/services/card_gen/mnemonic/semantic/__init__.py rename to backend/fluentai/utils/__init__.py diff --git a/fluentai/services/card_gen/utils/fasttext.py b/backend/fluentai/utils/fasttext.py similarity index 100% rename from fluentai/services/card_gen/utils/fasttext.py rename to backend/fluentai/utils/fasttext.py diff --git a/fluentai/services/card_gen/utils/lang_codes.py b/backend/fluentai/utils/lang_codes.py similarity index 100% rename from fluentai/services/card_gen/utils/lang_codes.py rename to backend/fluentai/utils/lang_codes.py diff --git a/fluentai/services/card_gen/utils/load_models.py b/backend/fluentai/utils/load_models.py similarity index 100% rename from fluentai/services/card_gen/utils/load_models.py rename to backend/fluentai/utils/load_models.py diff --git a/requirements/gpu.txt b/backend/gpu-requirements.txt similarity index 100% rename from requirements/gpu.txt rename to backend/gpu-requirements.txt diff --git a/pyproject.toml b/backend/pyproject.toml similarity index 100% rename from pyproject.toml rename to backend/pyproject.toml diff --git a/requirements/requirements.txt b/backend/requirements.txt similarity index 100% rename from requirements/requirements.txt rename to backend/requirements.txt diff --git a/setup.py b/backend/setup.py similarity index 100% rename from setup.py rename to backend/setup.py diff --git a/fluentai/services/card_gen/mnemonic/semantic/make_data/__init__.py b/backend/tests/__init__.py similarity index 100% rename from fluentai/services/card_gen/mnemonic/semantic/make_data/__init__.py rename to backend/tests/__init__.py diff --git a/fluentai/services/card_gen/utils/__init__.py b/backend/tests/test_services/__init__.py similarity index 100% rename from fluentai/services/card_gen/utils/__init__.py rename to backend/tests/test_services/__init__.py diff --git a/tests/card_gen/test_imageability.py b/backend/tests/test_services/test_imageability.py similarity index 100% rename from tests/card_gen/test_imageability.py rename to backend/tests/test_services/test_imageability.py diff --git a/tests/card_gen/test_orthographic.py b/backend/tests/test_services/test_orthographic.py similarity index 100% rename from tests/card_gen/test_orthographic.py rename to backend/tests/test_services/test_orthographic.py diff --git a/tests/card_gen/test_phonetic.py b/backend/tests/test_services/test_phonetic.py similarity index 100% rename from tests/card_gen/test_phonetic.py rename to backend/tests/test_services/test_phonetic.py diff --git a/tests/card_gen/test_semantic.py b/backend/tests/test_services/test_semantic.py similarity index 100% rename from tests/card_gen/test_semantic.py rename to backend/tests/test_services/test_semantic.py diff --git a/fluentai/anki/anki.py b/fluentai/anki/anki.py deleted file mode 100644 index 480fa3a..0000000 --- a/fluentai/anki/anki.py +++ /dev/null @@ -1,308 +0,0 @@ -import base64 -import html -import os - -import requests - -from fluentai.services.card_gen.constants.config import config -from fluentai.services.card_gen.utils.logger import logger - - -class AnkiConnect: - # URL and version for AnkiConnect - URL = "http://localhost:8765/" - VERSION = 6 - - def invoke(self, action: str, params: dict = None): - """Invoke an AnkiConnect action with optional parameters. - - Parameters - ---------- - action : str - The action to invoke. See the AnkiConnect API documentation for a list of actions. - params : dict, optional - The parameters associated with this action, by default None - - Returns - ------- - dict - The result of the action. - - Raises - ------ - Exception - If the response does not contain the expected fields. - Exception - If the response contains an error message. - Exception - If the response contains an unexpected number of fields. - """ - payload = {"action": action, "version": self.VERSION} - - # Add parameters if they exist - if params: - payload["params"] = params - - try: - response = requests.post(self.URL, json=payload).json() - except requests.exceptions.ConnectionError: - logger.error( - """Could not establish connection with Anki. -This can be caused by two things: -1. Anki is not running -2. Anki does not have the Anki-Connect plugin: https://foosoft.net/projects/anki-connect/.""" - ) - return - - if len(response) != 2: - logger.error("Unexpected number of fields in response") - logger.error(response) - return - - if "error" not in response or "result" not in response: - logger.error("Response is missing required fields") - logger.error(response) - return - - if response["error"] is not None: - if "model was not found" in response["error"]: - logger.warning( - f"The {response['error']}. We will create the model now and try again." - ) - # Create the model if it does not exist - create_model(params["note"]["modelName"]) - - # Retry the action - return self.invoke(action, params) - - if "deck was not found" in response["error"]: - logger.error( - f"The following {response['error']}. Please ensure a deck with that name exists in Anki." - ) - else: - logger.error(response["error"]) - return - - return response["result"] - - def get_deck_names(self) -> list[str]: - """Retrieves a list of deck names from Anki. - - Returns - ------- - list[str] - List of deck names. - """ - try: - return self.invoke("deckNames") - except Exception: - logger.error("Could not establish connection with Anki") - logger.error( - "Please make sure Anki is running and AnkiConnect is installed" - ) - - def store_media_file(self, src_file_path: str, word: str) -> str: - """Stores a media file in Anki's collection. - - Parameters - ---------- - src_file_path : str - The path to the file to store. - word : str - The word to use as the filename in Anki. - - Returns - ------- - str - Returns the filename used in Anki. - """ - # Sanitize the word to remove special characters - sanitized_word = "".join( - [c for c in word if c.isalnum() or c in (" ", "-")] - ).rstrip() - - # Get the file extension - ext = os.path.splitext(src_file_path)[1] - dst = f"{sanitized_word}{ext}" - - # Encode the file as base64 - with open(src_file_path, "rb") as f: - b64_output = base64.b64encode(f.read()).decode("utf-8") - - self.invoke("storeMediaFile", {"filename": dst, "data": b64_output}) - - return dst - - @staticmethod - def format_notes(notes: str) -> str: - """Formats notes by escaping HTML and converting line breaks. - - Parameters - ---------- - notes : str - The notes to format. - - Returns - ------- - str - The formatted notes. - """ - html_notes = "
".join(html.escape(notes.strip()).split("\n")) - return f"
{html_notes}
" - - def add_note( - self, - word: str, - answer: str, - image_paths: list[str], - word_usage: str, - notes: str, - recording_file_path: str, - ipa_text: str, - test_spelling: bool, - deck_name: str = config["DECK_NAME"], - tags: list[str] = [], - ) -> int: - """Adds a new note to the specified Anki deck with provided fields. - - Parameters - ---------- - deck_name : str - The name of the deck to add the note to. - word : str - The word to add to the note (front side). - answer : str - The answer to the word (back side). - image_paths : list[str] - The paths to the images to add to the note (front side). - word_usage : str - The usage of the word in a sentence (back side). - notes : str - Additional notes for the word (back side). - recording_file_path : str - The path to the recording file. - ipa_text : str - The IPA pronunciation of the word (back side). - test_spelling : bool - Whether to test spelling for the word. - tags : list[str], optional - The tags to add to the note, by default [] - - Returns - ------- - int - Returns the note ID - """ - # Store the images in Anki - stored_images = [ - self.store_media_file(image_path, f"{word}-{i}") - for i, image_path in enumerate(image_paths) - ] - - # Create the picture field with the stored images - picture_field = "".join(f'' for img in stored_images) - - # Format - escaped_usage = html.escape(word_usage.replace("&", "&")) - formatted_notes = self.format_notes(notes) - gender_notes_field = escaped_usage + formatted_notes + answer - - pronunciation_field = ipa_text - - # Store the recording in Anki - if recording_file_path: - stored_audio_filename = self.store_media_file(recording_file_path, word) - pronunciation_field += f"[sound:{stored_audio_filename}]" - - test_spelling = "y" if test_spelling else "" - - params = { - "note": { - "deckName": deck_name, - "modelName": "FluentAI Model", - "fields": { - "Word": word, - "Answer": answer, - "Picture": picture_field, - "Gender, Personal Connection, Extra Info (Back side)": gender_notes_field, - "Pronunciation (Recording and/or IPA)": pronunciation_field, - "Test Spelling? (y = yes, blank = no)": test_spelling, - }, - "tags": tags, - } - } - - return self.invoke("addNote", params) - - -def main(): - """ - Main function to test adding a note via AnkiConnect. - """ - # Create an instance of AnkiConnect - anki = AnkiConnect() - - # Add the note - note_id = anki.add_note( - word="Example", - answer="Answer: Example", - image_paths=["img/logo.jpg"], # Update with actual paths if you have images - word_usage="This is an example of how the word 'example' is used in a sentence.", - notes="These are sample notes for testing purposes.", - recording_file_path="local_data/tts/tts.wav", # Update with actual path if you have an audio file - ipa_text="", # Update with IPA pronunciation if available - test_spelling=False, - ) - if note_id: - logger.info(f"Note added successfully with ID: {note_id}") - - -def create_model(model_name: str = "FluentAI Model"): - """Create the FluentAI model deck in Anki.""" - anki = AnkiConnect() - action = "createModel" - params = { - "modelName": model_name, - "inOrderFields": [ - "Word", - "Picture", - "Gender, Personal Connection, Extra Info (Back side)", - "Pronunciation (Recording and/or IPA)", - "Test Spelling? (y = yes, blank = no)", - ], - "css": """ - .card { - font-family: arial; - font-size: 30px; - text-align: center; - color: black; - background-color: white; - } - - .card1 { background-color: #FFFFFF; } - .card2 { background-color: #FFFFFF; } - """, - "isCloze": False, - "cardTemplates": [ - { - "Name": "Word - Mnemonic", - "Front": "{{Word}}\n\n", - "Back": '{{FrontSide}}\n\n
\n{{Picture}}\n\n{{#Pronunciation (Recording and/or IPA)}}\n
\n{{Pronunciation (Recording and/or IPA)}}{{/Pronunciation (Recording and/or IPA)}}
\n\n\n\n{{Gender, Personal Connection, Extra Info (Back side)}}\n

\n', - }, - { - "Name": "Mnemonic - Word", - "Front": "{{Picture}}

\n\n

\n

\n", - "Back": '{{FrontSide}}\n\n
\n
\n{{Word}}
\n\n\n{{#Pronunciation (Recording and/or IPA)}}
{{Pronunciation (Recording and/or IPA)}}{{/Pronunciation (Recording and/or IPA)}}\n\n{{#Gender, Personal Connection, Extra Info (Back side)}}
{{Gender, Personal Connection, Extra Info (Back side)}}{{/Gender, Personal Connection, Extra Info (Back side)}}\n\n\n', - }, - { - "Name": "Mnemonic - Spelling", - "Front": "{{#Test Spelling? (y = yes, blank = no)}}\nSpell this word:

\n{{Picture}}
\n\n{{#Pronunciation (Recording and/or IPA)}}
{{Pronunciation (Recording and/or IPA)}}{{/Pronunciation (Recording and/or IPA)}}\n
\n\n{{/Test Spelling? (y = yes, blank = no)}}\n\n\n", - "Back": '{{Word}}

\n\n\n{{Picture}}
\n\n{{Gender, Personal Connection, Extra Info (Back side)}}\n', - }, - ], - } - anki.invoke(action, params) - - -if __name__ == "__main__": - main() diff --git a/fluentai/frontend/.gitignore b/frontend/.gitignore similarity index 100% rename from fluentai/frontend/.gitignore rename to frontend/.gitignore diff --git a/fluentai/frontend/README.md b/frontend/README.md similarity index 100% rename from fluentai/frontend/README.md rename to frontend/README.md diff --git a/fluentai/frontend/eslint.config.mjs b/frontend/eslint.config.mjs similarity index 100% rename from fluentai/frontend/eslint.config.mjs rename to frontend/eslint.config.mjs diff --git a/fluentai/frontend/next.config.ts b/frontend/next.config.ts similarity index 100% rename from fluentai/frontend/next.config.ts rename to frontend/next.config.ts diff --git a/fluentai/frontend/package-lock.json b/frontend/package-lock.json similarity index 100% rename from fluentai/frontend/package-lock.json rename to frontend/package-lock.json diff --git a/fluentai/frontend/package.json b/frontend/package.json similarity index 100% rename from fluentai/frontend/package.json rename to frontend/package.json diff --git a/fluentai/frontend/postcss.config.mjs b/frontend/postcss.config.mjs similarity index 100% rename from fluentai/frontend/postcss.config.mjs rename to frontend/postcss.config.mjs diff --git a/fluentai/frontend/public/logo.png b/frontend/public/logo.png similarity index 100% rename from fluentai/frontend/public/logo.png rename to frontend/public/logo.png diff --git a/fluentai/frontend/src/app/api/anki/route.ts b/frontend/src/app/api/anki/route.ts similarity index 100% rename from fluentai/frontend/src/app/api/anki/route.ts rename to frontend/src/app/api/anki/route.ts diff --git a/fluentai/frontend/src/app/api/createCard.ts b/frontend/src/app/api/createCard.ts similarity index 100% rename from fluentai/frontend/src/app/api/createCard.ts rename to frontend/src/app/api/createCard.ts diff --git a/fluentai/frontend/src/app/api/index.ts b/frontend/src/app/api/index.ts similarity index 100% rename from fluentai/frontend/src/app/api/index.ts rename to frontend/src/app/api/index.ts diff --git a/fluentai/frontend/src/app/api/languageService.ts b/frontend/src/app/api/languageService.ts similarity index 100% rename from fluentai/frontend/src/app/api/languageService.ts rename to frontend/src/app/api/languageService.ts diff --git a/fluentai/frontend/src/app/globals.css b/frontend/src/app/globals.css similarity index 100% rename from fluentai/frontend/src/app/globals.css rename to frontend/src/app/globals.css diff --git a/fluentai/frontend/src/app/layout.tsx b/frontend/src/app/layout.tsx similarity index 100% rename from fluentai/frontend/src/app/layout.tsx rename to frontend/src/app/layout.tsx diff --git a/fluentai/frontend/src/app/library/page.tsx b/frontend/src/app/library/page.tsx similarity index 100% rename from fluentai/frontend/src/app/library/page.tsx rename to frontend/src/app/library/page.tsx diff --git a/fluentai/frontend/src/app/logo.png b/frontend/src/app/logo.png similarity index 100% rename from fluentai/frontend/src/app/logo.png rename to frontend/src/app/logo.png diff --git a/fluentai/frontend/src/app/page.tsx b/frontend/src/app/page.tsx similarity index 100% rename from fluentai/frontend/src/app/page.tsx rename to frontend/src/app/page.tsx diff --git a/fluentai/frontend/src/components/CardGenerator.tsx b/frontend/src/components/CardGenerator.tsx similarity index 100% rename from fluentai/frontend/src/components/CardGenerator.tsx rename to frontend/src/components/CardGenerator.tsx diff --git a/fluentai/frontend/src/components/Flashcard.tsx b/frontend/src/components/Flashcard.tsx similarity index 100% rename from fluentai/frontend/src/components/Flashcard.tsx rename to frontend/src/components/Flashcard.tsx diff --git a/fluentai/frontend/src/components/FlashcardLibrary.tsx b/frontend/src/components/FlashcardLibrary.tsx similarity index 100% rename from fluentai/frontend/src/components/FlashcardLibrary.tsx rename to frontend/src/components/FlashcardLibrary.tsx diff --git a/fluentai/frontend/src/components/Header.tsx b/frontend/src/components/Header.tsx similarity index 100% rename from fluentai/frontend/src/components/Header.tsx rename to frontend/src/components/Header.tsx diff --git a/fluentai/frontend/src/components/SaveToAnki.tsx b/frontend/src/components/SaveToAnki.tsx similarity index 100% rename from fluentai/frontend/src/components/SaveToAnki.tsx rename to frontend/src/components/SaveToAnki.tsx diff --git a/fluentai/frontend/src/components/StatusChecker.tsx b/frontend/src/components/StatusChecker.tsx similarity index 100% rename from fluentai/frontend/src/components/StatusChecker.tsx rename to frontend/src/components/StatusChecker.tsx diff --git a/fluentai/frontend/src/components/ui/AutoCompleteInput.tsx b/frontend/src/components/ui/AutoCompleteInput.tsx similarity index 100% rename from fluentai/frontend/src/components/ui/AutoCompleteInput.tsx rename to frontend/src/components/ui/AutoCompleteInput.tsx diff --git a/fluentai/frontend/src/components/ui/Button.tsx b/frontend/src/components/ui/Button.tsx similarity index 100% rename from fluentai/frontend/src/components/ui/Button.tsx rename to frontend/src/components/ui/Button.tsx diff --git a/fluentai/frontend/src/components/ui/FormField.tsx b/frontend/src/components/ui/FormField.tsx similarity index 100% rename from fluentai/frontend/src/components/ui/FormField.tsx rename to frontend/src/components/ui/FormField.tsx diff --git a/fluentai/frontend/src/config/constants.ts b/frontend/src/config/constants.ts similarity index 100% rename from fluentai/frontend/src/config/constants.ts rename to frontend/src/config/constants.ts diff --git a/fluentai/frontend/src/interfaces/AnkiInterface.ts b/frontend/src/interfaces/AnkiInterface.ts similarity index 100% rename from fluentai/frontend/src/interfaces/AnkiInterface.ts rename to frontend/src/interfaces/AnkiInterface.ts diff --git a/fluentai/frontend/src/interfaces/CardInterfaces.ts b/frontend/src/interfaces/CardInterfaces.ts similarity index 100% rename from fluentai/frontend/src/interfaces/CardInterfaces.ts rename to frontend/src/interfaces/CardInterfaces.ts diff --git a/fluentai/frontend/src/interfaces/LanguageInterface.ts b/frontend/src/interfaces/LanguageInterface.ts similarity index 100% rename from fluentai/frontend/src/interfaces/LanguageInterface.ts rename to frontend/src/interfaces/LanguageInterface.ts diff --git a/fluentai/frontend/src/interfaces/ModelInterface.ts b/frontend/src/interfaces/ModelInterface.ts similarity index 100% rename from fluentai/frontend/src/interfaces/ModelInterface.ts rename to frontend/src/interfaces/ModelInterface.ts diff --git a/fluentai/frontend/src/services/anki/AnkiConnect.ts b/frontend/src/services/anki/AnkiConnect.ts similarity index 100% rename from fluentai/frontend/src/services/anki/AnkiConnect.ts rename to frontend/src/services/anki/AnkiConnect.ts diff --git a/fluentai/frontend/src/services/anki/ankiService.ts b/frontend/src/services/anki/ankiService.ts similarity index 100% rename from fluentai/frontend/src/services/anki/ankiService.ts rename to frontend/src/services/anki/ankiService.ts diff --git a/fluentai/frontend/src/services/modelService.ts b/frontend/src/services/modelService.ts similarity index 100% rename from fluentai/frontend/src/services/modelService.ts rename to frontend/src/services/modelService.ts diff --git a/fluentai/frontend/tailwind.config.ts b/frontend/tailwind.config.ts similarity index 100% rename from fluentai/frontend/tailwind.config.ts rename to frontend/tailwind.config.ts diff --git a/fluentai/frontend/tsconfig.json b/frontend/tsconfig.json similarity index 100% rename from fluentai/frontend/tsconfig.json rename to frontend/tsconfig.json diff --git a/package-lock.json b/package-lock.json deleted file mode 100644 index ba22778..0000000 --- a/package-lock.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "name": "FluentAI", - "lockfileVersion": 3, - "requires": true, - "packages": {} -} diff --git a/package.json b/package.json deleted file mode 100644 index 0967ef4..0000000 --- a/package.json +++ /dev/null @@ -1 +0,0 @@ -{} diff --git a/tests/__init__.py b/tests/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/card_gen/__init__.py b/tests/card_gen/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/vocab/__init__.py b/tests/vocab/__init__.py deleted file mode 100644 index e69de29..0000000 From f12082673ecec6f768d41035322d0a9ce6896cbf Mon Sep 17 00:00:00 2001 From: StephanAkkerman Date: Sat, 4 Jan 2025 11:39:54 +0100 Subject: [PATCH 02/15] Fix imports, setup and readme --- backend/fluentai/api/api.py | 8 ++++---- backend/fluentai/logger.py | 2 +- backend/fluentai/services/imagine/image_gen.py | 4 ++-- .../fluentai/services/imagine/verbal_cue.py | 4 ++-- backend/fluentai/services/main.py | 10 +++++----- .../mnemonic/imageability/embeddings.py | 6 +++--- .../mnemonic/imageability/imag_models/data.py | 4 ++-- .../imageability/imag_models/ensemble.py | 6 +++--- .../imageability/imag_models/models.py | 10 +++++----- .../imageability/imag_models/optimization.py | 4 ++-- .../mnemonic/imageability/imageability.py | 6 +++--- .../services/mnemonic/orthographic/eval.py | 4 ++-- .../services/mnemonic/phonetic/clts_utils.py | 2 +- .../services/mnemonic/phonetic/eval.py | 6 +++--- .../fluentai/services/mnemonic/phonetic/g2p.py | 4 ++-- .../services/mnemonic/phonetic/ipa2vec.py | 4 ++-- .../phonetic/make_data/filter_words.py | 2 +- .../services/mnemonic/phonetic/phonetic.py | 10 +++++----- .../services/mnemonic/phonetic/utils.py | 4 ++-- .../services/mnemonic/phonetic/vectorizer.py | 8 ++++---- .../services/mnemonic/semantic/eval.py | 4 ++-- .../mnemonic/semantic/make_data/data.py | 2 +- .../services/mnemonic/semantic/semantic.py | 4 ++-- .../services/mnemonic/semantic/translator.py | 2 +- .../services/mnemonic/word2mnemonic.py | 18 +++++++++--------- backend/fluentai/services/tts/fallback.py | 2 +- backend/fluentai/services/tts/tts.py | 2 +- backend/fluentai/utils/fasttext.py | 2 +- backend/fluentai/utils/load_models.py | 4 ++-- backend/setup.py | 10 ++-------- 30 files changed, 76 insertions(+), 82 deletions(-) diff --git a/backend/fluentai/api/api.py b/backend/fluentai/api/api.py index 8e6eee0..3a59ca1 100644 --- a/backend/fluentai/api/api.py +++ b/backend/fluentai/api/api.py @@ -10,10 +10,10 @@ from fastapi.responses import JSONResponse from pydantic import BaseModel -from fluentai.services.card_gen.constants.config import config -from fluentai.services.card_gen.main import generate_mnemonic_img -from fluentai.services.card_gen.utils.load_models import download_all_models -from fluentai.services.card_gen.utils.logger import logger +from fluentai.constants.config import config +from fluentai.logger import logger +from fluentai.services.main import generate_mnemonic_img +from fluentai.utils.load_models import download_all_models app = FastAPI() diff --git a/backend/fluentai/logger.py b/backend/fluentai/logger.py index 501b4b7..70a5ae4 100644 --- a/backend/fluentai/logger.py +++ b/backend/fluentai/logger.py @@ -2,7 +2,7 @@ import os import sys -from fluentai.services.card_gen.constants.config import config +from fluentai.constants.config import config class UTF8StreamHandler(logging.StreamHandler): diff --git a/backend/fluentai/services/imagine/image_gen.py b/backend/fluentai/services/imagine/image_gen.py index 2d36c32..3a4f857 100644 --- a/backend/fluentai/services/imagine/image_gen.py +++ b/backend/fluentai/services/imagine/image_gen.py @@ -6,8 +6,8 @@ import torch from diffusers import AutoPipelineForText2Image, SanaPipeline -from fluentai.services.card_gen.constants.config import config -from fluentai.services.card_gen.utils.logger import logger +from fluentai.constants.config import config +from fluentai.logger import logger def manage_model_memory(method): diff --git a/backend/fluentai/services/imagine/verbal_cue.py b/backend/fluentai/services/imagine/verbal_cue.py index 9ed3810..19c96dd 100644 --- a/backend/fluentai/services/imagine/verbal_cue.py +++ b/backend/fluentai/services/imagine/verbal_cue.py @@ -4,8 +4,8 @@ import torch from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline -from fluentai.services.card_gen.constants.config import config -from fluentai.services.card_gen.utils.logger import logger +from fluentai.constants.config import config +from fluentai.logger import logger def manage_model_memory(method): diff --git a/backend/fluentai/services/main.py b/backend/fluentai/services/main.py index 7eab9c8..0584452 100644 --- a/backend/fluentai/services/main.py +++ b/backend/fluentai/services/main.py @@ -1,10 +1,10 @@ import torch -from fluentai.services.card_gen.imagine.image_gen import ImageGen -from fluentai.services.card_gen.imagine.verbal_cue import VerbalCue -from fluentai.services.card_gen.mnemonic.word2mnemonic import Word2Mnemonic -from fluentai.services.card_gen.tts.tts import TTS -from fluentai.services.card_gen.utils.logger import logger +from fluentai.logger import logger +from fluentai.services.imagine.image_gen import ImageGen +from fluentai.services.imagine.verbal_cue import VerbalCue +from fluentai.services.mnemonic.word2mnemonic import Word2Mnemonic +from fluentai.services.tts.tts import TTS def generate_mnemonic_img( diff --git a/backend/fluentai/services/mnemonic/imageability/embeddings.py b/backend/fluentai/services/mnemonic/imageability/embeddings.py index 6706556..1d719eb 100644 --- a/backend/fluentai/services/mnemonic/imageability/embeddings.py +++ b/backend/fluentai/services/mnemonic/imageability/embeddings.py @@ -8,8 +8,8 @@ from sentence_transformers import SentenceTransformer from tqdm import tqdm -from fluentai.services.card_gen.constants.config import config -from fluentai.services.card_gen.utils.logger import logger +from fluentai.constants.config import config +from fluentai.logger import logger EMBEDDING_MODEL = None @@ -26,7 +26,7 @@ def load_embedding_model(self): Load the specified embedding model. """ if self.model_name == "fasttext": - from fluentai.services.card_gen.utils.fasttext import fasttext_model + from fluentai.utils.fasttext import fasttext_model return fasttext_model diff --git a/backend/fluentai/services/mnemonic/imageability/imag_models/data.py b/backend/fluentai/services/mnemonic/imageability/imag_models/data.py index 6f009b3..0a263e4 100644 --- a/backend/fluentai/services/mnemonic/imageability/imag_models/data.py +++ b/backend/fluentai/services/mnemonic/imageability/imag_models/data.py @@ -8,8 +8,8 @@ from huggingface_hub import HfApi, hf_hub_download from sklearn.model_selection import train_test_split -from fluentai.services.card_gen.constants.config import config -from fluentai.services.card_gen.utils.logger import logger +from fluentai.constants.config import config +from fluentai.logger import logger def upload_model(model_path: str): diff --git a/backend/fluentai/services/mnemonic/imageability/imag_models/ensemble.py b/backend/fluentai/services/mnemonic/imageability/imag_models/ensemble.py index 26ebc39..26f2d41 100644 --- a/backend/fluentai/services/mnemonic/imageability/imag_models/ensemble.py +++ b/backend/fluentai/services/mnemonic/imageability/imag_models/ensemble.py @@ -9,11 +9,11 @@ from sklearn.linear_model import Ridge from sklearn.metrics import mean_squared_error, r2_score -from fluentai.services.card_gen.constants.config import config -from fluentai.services.card_gen.mnemonic.imageability.imag_models.data import ( +from fluentai.constants.config import config +from fluentai.logger import logger +from fluentai.services.mnemonic.imageability.imag_models.data import ( append_to_log, ) -from fluentai.services.card_gen.utils.logger import logger def implement_ensemble_methods( diff --git a/backend/fluentai/services/mnemonic/imageability/imag_models/models.py b/backend/fluentai/services/mnemonic/imageability/imag_models/models.py index b0b291e..701e3e3 100644 --- a/backend/fluentai/services/mnemonic/imageability/imag_models/models.py +++ b/backend/fluentai/services/mnemonic/imageability/imag_models/models.py @@ -18,8 +18,9 @@ from tqdm import tqdm from xgboost import XGBRegressor -from fluentai.services.card_gen.constants.config import config -from fluentai.services.card_gen.mnemonic.imageability.imag_models.data import ( +from fluentai.constants.config import config +from fluentai.logger import logger +from fluentai.services.mnemonic.imageability.imag_models.data import ( append_to_log, ensure_logs_directory, load_data, @@ -28,13 +29,12 @@ split_dataset, upload_model, ) -from fluentai.services.card_gen.mnemonic.imageability.imag_models.ensemble import ( +from fluentai.services.mnemonic.imageability.imag_models.ensemble import ( implement_ensemble_methods, ) -from fluentai.services.card_gen.mnemonic.imageability.imag_models.optimization import ( +from fluentai.services.mnemonic.imageability.imag_models.optimization import ( objective, ) -from fluentai.services.card_gen.utils.logger import logger def train_and_evaluate_models(X_train, X_test, y_train, y_test, dataset_hash): diff --git a/backend/fluentai/services/mnemonic/imageability/imag_models/optimization.py b/backend/fluentai/services/mnemonic/imageability/imag_models/optimization.py index 477acee..7c07037 100644 --- a/backend/fluentai/services/mnemonic/imageability/imag_models/optimization.py +++ b/backend/fluentai/services/mnemonic/imageability/imag_models/optimization.py @@ -15,10 +15,10 @@ from sklearn.svm import SVR from xgboost import XGBRegressor -from fluentai.services.card_gen.mnemonic.imageability.imag_models.data import ( +from fluentai.logger import logger +from fluentai.services.mnemonic.imageability.imag_models.data import ( append_hyperparameters_log, ) -from fluentai.services.card_gen.utils.logger import logger def get_optuna_search_space(trial, model_name): diff --git a/backend/fluentai/services/mnemonic/imageability/imageability.py b/backend/fluentai/services/mnemonic/imageability/imageability.py index 7b09868..745ce5e 100644 --- a/backend/fluentai/services/mnemonic/imageability/imageability.py +++ b/backend/fluentai/services/mnemonic/imageability/imageability.py @@ -2,9 +2,9 @@ import numpy as np from huggingface_hub import hf_hub_download -from fluentai.services.card_gen.constants.config import config -from fluentai.services.card_gen.utils.fasttext import fasttext_model -from fluentai.services.card_gen.utils.logger import logger +from fluentai.constants.config import config +from fluentai.logger import logger +from fluentai.utils.fasttext import fasttext_model class ImageabilityPredictor: diff --git a/backend/fluentai/services/mnemonic/orthographic/eval.py b/backend/fluentai/services/mnemonic/orthographic/eval.py index 11c74dc..f3e4ddf 100644 --- a/backend/fluentai/services/mnemonic/orthographic/eval.py +++ b/backend/fluentai/services/mnemonic/orthographic/eval.py @@ -4,8 +4,8 @@ from scipy.stats import pearsonr, spearmanr from sklearn.preprocessing import MinMaxScaler -from fluentai.services.card_gen.constants.config import config -from fluentai.services.card_gen.utils.logger import logger +from fluentai.constants.config import config +from fluentai.logger import logger def scale_ratings(ratings: pd.Series) -> pd.Series: diff --git a/backend/fluentai/services/mnemonic/phonetic/clts_utils.py b/backend/fluentai/services/mnemonic/phonetic/clts_utils.py index 6b5aedb..e8ae427 100644 --- a/backend/fluentai/services/mnemonic/phonetic/clts_utils.py +++ b/backend/fluentai/services/mnemonic/phonetic/clts_utils.py @@ -4,7 +4,7 @@ from git import GitCommandError, RemoteProgress, Repo from tqdm import tqdm -from fluentai.services.card_gen.utils.logger import logger +from fluentai.logger import logger def check_directory_exists(directory_path): diff --git a/backend/fluentai/services/mnemonic/phonetic/eval.py b/backend/fluentai/services/mnemonic/phonetic/eval.py index d0b6c41..98b7c85 100644 --- a/backend/fluentai/services/mnemonic/phonetic/eval.py +++ b/backend/fluentai/services/mnemonic/phonetic/eval.py @@ -9,9 +9,9 @@ from sklearn.preprocessing import MinMaxScaler from tqdm import tqdm -from fluentai.services.card_gen.constants.config import config -from fluentai.services.card_gen.mnemonic.phonetic.ipa2vec import panphon_vec, soundvec -from fluentai.services.card_gen.utils.logger import logger +from fluentai.constants.config import config +from fluentai.logger import logger +from fluentai.services.mnemonic.phonetic.ipa2vec import panphon_vec, soundvec def word2ipa( diff --git a/backend/fluentai/services/mnemonic/phonetic/g2p.py b/backend/fluentai/services/mnemonic/phonetic/g2p.py index 5560bc8..967ad5d 100644 --- a/backend/fluentai/services/mnemonic/phonetic/g2p.py +++ b/backend/fluentai/services/mnemonic/phonetic/g2p.py @@ -1,7 +1,7 @@ from transformers import AutoTokenizer, T5ForConditionalGeneration -from fluentai.services.card_gen.constants.config import config -from fluentai.services.card_gen.utils.logger import logger +from fluentai.constants.config import config +from fluentai.logger import logger class G2P: diff --git a/backend/fluentai/services/mnemonic/phonetic/ipa2vec.py b/backend/fluentai/services/mnemonic/phonetic/ipa2vec.py index 29cdb35..538fab7 100644 --- a/backend/fluentai/services/mnemonic/phonetic/ipa2vec.py +++ b/backend/fluentai/services/mnemonic/phonetic/ipa2vec.py @@ -5,8 +5,8 @@ from pyclts import CLTS from soundvectors import SoundVectors -from fluentai.services.card_gen.mnemonic.phonetic.clts_utils import get_clts -from fluentai.services.card_gen.utils.logger import logger +from fluentai.logger import logger +from fluentai.services.mnemonic.phonetic.clts_utils import get_clts # Test if data/clts exists if not os.path.exists("local_data/clts"): diff --git a/backend/fluentai/services/mnemonic/phonetic/make_data/filter_words.py b/backend/fluentai/services/mnemonic/phonetic/make_data/filter_words.py index a0210d5..1ffe645 100644 --- a/backend/fluentai/services/mnemonic/phonetic/make_data/filter_words.py +++ b/backend/fluentai/services/mnemonic/phonetic/make_data/filter_words.py @@ -6,7 +6,7 @@ from nltk.stem import WordNetLemmatizer from tqdm import tqdm -from fluentai.services.card_gen.constants.config import config +from fluentai.constants.config import config # Download required NLTK data nltk.download("words") diff --git a/backend/fluentai/services/mnemonic/phonetic/phonetic.py b/backend/fluentai/services/mnemonic/phonetic/phonetic.py index d12eeb8..18c581d 100644 --- a/backend/fluentai/services/mnemonic/phonetic/phonetic.py +++ b/backend/fluentai/services/mnemonic/phonetic/phonetic.py @@ -3,14 +3,14 @@ import pandas as pd from huggingface_hub import hf_hub_download -from fluentai.services.card_gen.constants.config import config -from fluentai.services.card_gen.mnemonic.phonetic.ipa2vec import panphon_vec, soundvec -from fluentai.services.card_gen.mnemonic.phonetic.utils import ( +from fluentai.constants.config import config +from fluentai.logger import logger +from fluentai.services.mnemonic.phonetic.ipa2vec import panphon_vec, soundvec +from fluentai.services.mnemonic.phonetic.utils import ( convert_to_matrix, load_cache, pad_vectors, ) -from fluentai.services.card_gen.utils.logger import logger def word2ipa( @@ -175,7 +175,7 @@ def top_phonetic( os.environ["KMP_DUPLICATE_LIB_OK"] = "True" # Load the G2P model - from fluentai.services.card_gen.mnemonic.phonetic.g2p import G2P + from fluentai.services.mnemonic.phonetic.g2p import G2P result = top_phonetic(word_input, language_code, top_n, G2P()) print(result) diff --git a/backend/fluentai/services/mnemonic/phonetic/utils.py b/backend/fluentai/services/mnemonic/phonetic/utils.py index 4f0eb70..b019fdb 100644 --- a/backend/fluentai/services/mnemonic/phonetic/utils.py +++ b/backend/fluentai/services/mnemonic/phonetic/utils.py @@ -4,8 +4,8 @@ import pandas as pd from huggingface_hub import hf_hub_download -from fluentai.services.card_gen.constants.config import config -from fluentai.services.card_gen.utils.logger import logger +from fluentai.constants.config import config +from fluentai.logger import logger def pad_vectors(vectors): diff --git a/backend/fluentai/services/mnemonic/phonetic/vectorizer.py b/backend/fluentai/services/mnemonic/phonetic/vectorizer.py index 3a48998..b86fc11 100644 --- a/backend/fluentai/services/mnemonic/phonetic/vectorizer.py +++ b/backend/fluentai/services/mnemonic/phonetic/vectorizer.py @@ -5,10 +5,10 @@ from huggingface_hub import hf_hub_download from tqdm import tqdm -from fluentai.services.card_gen.constants.config import config -from fluentai.services.card_gen.mnemonic.phonetic.ipa2vec import ft, sv -from fluentai.services.card_gen.mnemonic.phonetic.utils import flatten_vectors -from fluentai.services.card_gen.utils.logger import logger +from fluentai.constants.config import config +from fluentai.logger import logger +from fluentai.services.mnemonic.phonetic.ipa2vec import ft, sv +from fluentai.services.mnemonic.phonetic.utils import flatten_vectors def vectorize_word_clts(word, sv): diff --git a/backend/fluentai/services/mnemonic/semantic/eval.py b/backend/fluentai/services/mnemonic/semantic/eval.py index 378a9d4..ed6ae79 100644 --- a/backend/fluentai/services/mnemonic/semantic/eval.py +++ b/backend/fluentai/services/mnemonic/semantic/eval.py @@ -9,9 +9,9 @@ from scipy.stats import pearsonr, spearmanr from tqdm import tqdm -from fluentai.services.card_gen.constants.config import config +from fluentai.constants.config import config +from fluentai.logger import logger from fluentai.services.card_gen.mnemonic.semantic.semantic import SemanticSimilarity -from fluentai.services.card_gen.utils.logger import logger def compute_dataset_hash(df: pd.DataFrame) -> str: diff --git a/backend/fluentai/services/mnemonic/semantic/make_data/data.py b/backend/fluentai/services/mnemonic/semantic/make_data/data.py index f34e403..4c736f5 100644 --- a/backend/fluentai/services/mnemonic/semantic/make_data/data.py +++ b/backend/fluentai/services/mnemonic/semantic/make_data/data.py @@ -1,7 +1,7 @@ import pandas as pd from sklearn.preprocessing import MinMaxScaler -from fluentai.services.card_gen.utils.logger import logger +from fluentai.logger import logger # Function to scale similarity scores to 0-1 range diff --git a/backend/fluentai/services/mnemonic/semantic/semantic.py b/backend/fluentai/services/mnemonic/semantic/semantic.py index 8c4d38b..6189fab 100644 --- a/backend/fluentai/services/mnemonic/semantic/semantic.py +++ b/backend/fluentai/services/mnemonic/semantic/semantic.py @@ -1,8 +1,8 @@ from gensim.models.fasttext import FastTextKeyedVectors from sentence_transformers import SentenceTransformer -from fluentai.services.card_gen.constants.config import config -from fluentai.services.card_gen.utils.logger import logger +from fluentai.constants.config import config +from fluentai.logger import logger class SemanticSimilarity: diff --git a/backend/fluentai/services/mnemonic/semantic/translator.py b/backend/fluentai/services/mnemonic/semantic/translator.py index 6e7d2b1..9f01597 100644 --- a/backend/fluentai/services/mnemonic/semantic/translator.py +++ b/backend/fluentai/services/mnemonic/semantic/translator.py @@ -3,8 +3,8 @@ from googletrans import Translator +from fluentai.logger import logger from fluentai.services.card_gen.utils.lang_codes import map_language_code -from fluentai.services.card_gen.utils.logger import logger translator = Translator() diff --git a/backend/fluentai/services/mnemonic/word2mnemonic.py b/backend/fluentai/services/mnemonic/word2mnemonic.py index 9787747..9b0b19a 100644 --- a/backend/fluentai/services/mnemonic/word2mnemonic.py +++ b/backend/fluentai/services/mnemonic/word2mnemonic.py @@ -1,16 +1,16 @@ -from fluentai.services.card_gen.constants.config import config, weights_percentages -from fluentai.services.card_gen.constants.languages import G2P_LANGUAGES -from fluentai.services.card_gen.mnemonic.imageability.predictions import ( +from fluentai.constants.config import config, weights_percentages +from fluentai.constants.languages import G2P_LANGUAGES +from fluentai.logger import logger +from fluentai.services.mnemonic.imageability.imageability import ( ImageabilityPredictor, ) -from fluentai.services.card_gen.mnemonic.orthographic.orthographic import ( +from fluentai.services.mnemonic.orthographic.orthographic import ( compute_damerau_levenshtein_similarity, ) -from fluentai.services.card_gen.mnemonic.phonetic.g2p import G2P -from fluentai.services.card_gen.mnemonic.phonetic.phonetic import top_phonetic, word2ipa -from fluentai.services.card_gen.mnemonic.semantic.semantic import SemanticSimilarity -from fluentai.services.card_gen.mnemonic.semantic.translator import translate_word -from fluentai.services.card_gen.utils.logger import logger +from fluentai.services.mnemonic.phonetic.g2p import G2P +from fluentai.services.mnemonic.phonetic.phonetic import top_phonetic, word2ipa +from fluentai.services.mnemonic.semantic.semantic import SemanticSimilarity +from fluentai.services.mnemonic.semantic.translator import translate_word class Word2Mnemonic: diff --git a/backend/fluentai/services/tts/fallback.py b/backend/fluentai/services/tts/fallback.py index ebc106a..6315943 100644 --- a/backend/fluentai/services/tts/fallback.py +++ b/backend/fluentai/services/tts/fallback.py @@ -4,7 +4,7 @@ import scipy from transformers import VitsModel, VitsTokenizer, pipeline -from fluentai.services.card_gen.utils.logger import logger +from fluentai.logger import logger # Check if the language code is supported supported_languages = pd.read_parquet("data/tts-languages.parquet") diff --git a/backend/fluentai/services/tts/tts.py b/backend/fluentai/services/tts/tts.py index 37e886c..23c1369 100644 --- a/backend/fluentai/services/tts/tts.py +++ b/backend/fluentai/services/tts/tts.py @@ -3,8 +3,8 @@ import gtts from gtts import gTTS +from fluentai.logger import logger from fluentai.services.card_gen.utils.lang_codes import map_language_code -from fluentai.services.card_gen.utils.logger import logger class TTS: diff --git a/backend/fluentai/utils/fasttext.py b/backend/fluentai/utils/fasttext.py index 7172f2a..0ce9284 100644 --- a/backend/fluentai/utils/fasttext.py +++ b/backend/fluentai/utils/fasttext.py @@ -9,7 +9,7 @@ from gensim.models.fasttext import FastTextKeyedVectors, load_facebook_vectors from tqdm import tqdm -from fluentai.services.card_gen.utils.logger import logger +from fluentai.logger import logger def download_file(url, dest_path, chunk_size=1024): diff --git a/backend/fluentai/utils/load_models.py b/backend/fluentai/utils/load_models.py index 6c9a2d0..4ec5c4e 100644 --- a/backend/fluentai/utils/load_models.py +++ b/backend/fluentai/utils/load_models.py @@ -3,12 +3,12 @@ import torch -from fluentai.services.card_gen.constants.config import config +from fluentai.constants.config import config +from fluentai.logger import logger from fluentai.services.card_gen.imagine.image_gen import ImageGen from fluentai.services.card_gen.imagine.verbal_cue import VerbalCue from fluentai.services.card_gen.mnemonic.phonetic.g2p import G2P from fluentai.services.card_gen.mnemonic.semantic.semantic import SemanticSimilarity -from fluentai.services.card_gen.utils.logger import logger def get_model_dir_name(model: str) -> str: diff --git a/backend/setup.py b/backend/setup.py index c53d0a6..41c80f0 100644 --- a/backend/setup.py +++ b/backend/setup.py @@ -29,11 +29,7 @@ def parse_requirements(filename: str) -> list[str]: # Read dependencies from requirements.txt -requirements = parse_requirements("requirements/requirements.txt") - -# Read the long description from README.md -with open("README.md", encoding="utf-8") as fh: - long_description = fh.read() +requirements = parse_requirements("requirements.txt") setup( name="fluentai", @@ -41,8 +37,6 @@ def parse_requirements(filename: str) -> list[str]: packages=find_packages(), install_requires=requirements, description="FluentAI short description", - long_description=long_description, - long_description_content_type="text/markdown", url="https://github.com/StephanAkkerman/", classifiers=[ "Programming Language :: Python :: 3", @@ -51,7 +45,7 @@ def parse_requirements(filename: str) -> list[str]: ], entry_points={ "console_scripts": [ - "fluentai-main=fluentai.main:main", # Adjust as needed + "fluentai-main=fluentai.main:main", ], }, python_requires=">=3.10", From e96772ca4f3d8a89725357cd1730a3b169b30a27 Mon Sep 17 00:00:00 2001 From: StephanAkkerman Date: Sat, 4 Jan 2025 11:47:49 +0100 Subject: [PATCH 03/15] remove all card_gen references --- .../mnemonic/imageability/embeddings.py | 1 + .../mnemonic/imageability/imageability.py | 143 +++++++++--------- .../services/mnemonic/semantic/eval.py | 2 +- .../services/mnemonic/semantic/semantic.py | 2 +- .../services/mnemonic/semantic/translator.py | 2 +- backend/fluentai/services/tts/tts.py | 2 +- backend/fluentai/utils/lang_codes.py | 2 +- backend/fluentai/utils/load_models.py | 8 +- 8 files changed, 85 insertions(+), 77 deletions(-) diff --git a/backend/fluentai/services/mnemonic/imageability/embeddings.py b/backend/fluentai/services/mnemonic/imageability/embeddings.py index 1d719eb..72bbb90 100644 --- a/backend/fluentai/services/mnemonic/imageability/embeddings.py +++ b/backend/fluentai/services/mnemonic/imageability/embeddings.py @@ -26,6 +26,7 @@ def load_embedding_model(self): Load the specified embedding model. """ if self.model_name == "fasttext": + logger.info("Loading FastText model for imageability embeddings...") from fluentai.utils.fasttext import fasttext_model return fasttext_model diff --git a/backend/fluentai/services/mnemonic/imageability/imageability.py b/backend/fluentai/services/mnemonic/imageability/imageability.py index 745ce5e..bbfc60e 100644 --- a/backend/fluentai/services/mnemonic/imageability/imageability.py +++ b/backend/fluentai/services/mnemonic/imageability/imageability.py @@ -1,110 +1,117 @@ +import os + import joblib -import numpy as np +import pandas as pd from huggingface_hub import hf_hub_download +from tqdm import tqdm from fluentai.constants.config import config -from fluentai.logger import logger -from fluentai.utils.fasttext import fasttext_model +from fluentai.services.mnemonic.imageability.embeddings import ( + ImageabilityEmbeddings, +) + + +def make_predictions(): + """ + Generate imageability score predictions for the IPA dataset and save them to a CSV file. + """ + embedding_model = ImageabilityEmbeddings(model_name="fasttext") + regression_model = joblib.load( + hf_hub_download( + repo_id=config.get("IMAGEABILITY").get("PREDICTOR").get("REPO"), + filename=config.get("IMAGEABILITY").get("PREDICTOR").get("FILE"), + cache_dir="models", + ) + ) + + ipa_dataset = pd.read_csv( + hf_hub_download( + repo_id=config.get("PHONETIC_SIM").get("IPA").get("REPO"), + filename=config.get("PHONETIC_SIM").get("IPA").get("FILE"), + cache_dir="datasets", + repo_type="dataset", + ) + ) + # Only keep the unique words in the column "token_ort" + ipa_dataset = ipa_dataset.drop_duplicates(subset=["token_ort"]) -class ImageabilityPredictor: - def __init__( - self, - ): - """ - Initialize the ImageabilityPredictor by loading the embedding model and regression model. + predictions = [] + + # Generate embeddings and predictions for the IPA dataset + for idx, row in tqdm(ipa_dataset.iterrows(), total=len(ipa_dataset)): + word = row["token_ort"] + embedding = embedding_model.get_embedding(word) + prediction = regression_model.predict(embedding.reshape(1, -1))[0] + predictions.append((word, prediction)) + + # Convert the predictions to a DataFrame + predictions_df = pd.DataFrame(predictions, columns=["word", "imageability_score"]) + + # Create a directory to save the predictions + os.makedirs("local_data/imageability", exist_ok=True) + + # Save the predictions + predictions_df.to_csv("local_data/imageability/predictions.csv", index=False) - Args: - embedding_model_name (str, optional): Name of the embedding model to load from Gensim. - Defaults to "fasttext-wiki-news-subwords-300". - regression_model_path (str, optional): Path to the trained regression model (.joblib file). - Defaults to "models/best_model_LGBMRegressor.joblib". - """ - # Load the embedding model - self.embedding_model = fasttext_model - self.regression_model = joblib.load( +class ImageabilityPredictor: + def __init__(self): + self.predictions_df = pd.read_csv( hf_hub_download( - repo_id=config.get("IMAGEABILITY").get("MODEL"), - filename=config.get("IMAGEABILITY").get("MODEL_FILE"), - cache_dir="models", + repo_id=config.get("IMAGEABILITY").get("PREDICTIONS").get("REPO"), + filename=config.get("IMAGEABILITY").get("PREDICTIONS").get("FILE"), + cache_dir="datasets", + repo_type="dataset", ) ) - logger.info("Regression model loaded successfully.") - def get_embedding(self, word): + def get_prediction(self, word): """ - Retrieve the embedding vector for a given word. + Get the imageability score prediction for a given word. Args: - word (str): The word to retrieve the embedding for. - - Returns - ------- - np.ndarray: Embedding vector for the word. - """ - try: - embedding = self.embedding_model.get_vector(word) - except KeyError: - # Handle out-of-vocabulary (OOV) words by returning a zero vector - embedding = np.zeros(self.embedding_model.vector_size, dtype=np.float32) - return embedding - - def predict_imageability(self, embedding): - """ - Predict the imageability score based on the embedding. - - Args: - embedding (np.ndarray): Embedding vector of the word. + word (str): The word to get the prediction for. Returns ------- float: Predicted imageability score. """ - # Reshape embedding for prediction (1 sample) - embedding = embedding.reshape(1, -1) - imageability = self.regression_model.predict(embedding)[0] - return imageability + prediction = self.predictions_df[self.predictions_df["word"] == word][ + "imageability_score" + ].values[0] + return prediction - def get_imageability(self, word): + def get_predictions(self, words): """ - Generate the imageability score for a given word. + Get the imageability score predictions for a list of words. Args: - word (str): The word to evaluate. + words (List[str]): List of words to get the predictions for. Returns ------- - float: Predicted imageability score. + List[float]: Predicted imageability scores. """ - embedding = self.get_embedding(word) - imageability = self.predict_imageability(embedding) - return imageability + predictions = [self.get_prediction(word) for word in words] + return predictions def get_column_imageability(self, dataframe, column_name): """ Generate the imageability score for a given column in a DataFrame. Args: - dataframe (pd.DataFrame): The DataFrame containing the column to evaluate. - column_name (str): The name of the column to evaluate. + dataframe (pd.DataFrame): The DataFrame containing the words. + column_name (str): The name of the column containing the words. Returns ------- - pd.Series: Predicted imageability scores for the column. + List[float]: Predicted imageability scores. """ - embeddings = dataframe[column_name].apply(self.get_embedding) - imageabilities = embeddings.apply(self.predict_imageability) - return imageabilities + predictions = self.get_predictions(dataframe[column_name].tolist()) + return predictions -# Example Usage if __name__ == "__main__": predictor = ImageabilityPredictor() - - # Example words - words_to_predict = ["apple", "banana", "orange", "unknownword"] - - for word in words_to_predict: - score = predictor.get_imageability(word) - logger.info(f"Word: '{word}' | Predicted Imageability: {score:.4f}") + print(predictor.get_prediction("apple")) diff --git a/backend/fluentai/services/mnemonic/semantic/eval.py b/backend/fluentai/services/mnemonic/semantic/eval.py index ed6ae79..831b598 100644 --- a/backend/fluentai/services/mnemonic/semantic/eval.py +++ b/backend/fluentai/services/mnemonic/semantic/eval.py @@ -11,7 +11,7 @@ from fluentai.constants.config import config from fluentai.logger import logger -from fluentai.services.card_gen.mnemonic.semantic.semantic import SemanticSimilarity +from fluentai.services.mnemonic.semantic.semantic import SemanticSimilarity def compute_dataset_hash(df: pd.DataFrame) -> str: diff --git a/backend/fluentai/services/mnemonic/semantic/semantic.py b/backend/fluentai/services/mnemonic/semantic/semantic.py index 6189fab..a672a90 100644 --- a/backend/fluentai/services/mnemonic/semantic/semantic.py +++ b/backend/fluentai/services/mnemonic/semantic/semantic.py @@ -68,7 +68,7 @@ def load_semantic_model(self) -> SentenceTransformer | FastTextKeyedVectors: _description_ """ if self.model_name == "fasttext": - from fluentai.services.card_gen.utils.fasttext import fasttext_model + from fluentai.utils.fasttext import fasttext_model return fasttext_model diff --git a/backend/fluentai/services/mnemonic/semantic/translator.py b/backend/fluentai/services/mnemonic/semantic/translator.py index 9f01597..997e244 100644 --- a/backend/fluentai/services/mnemonic/semantic/translator.py +++ b/backend/fluentai/services/mnemonic/semantic/translator.py @@ -4,7 +4,7 @@ from googletrans import Translator from fluentai.logger import logger -from fluentai.services.card_gen.utils.lang_codes import map_language_code +from fluentai.utils.lang_codes import map_language_code translator = Translator() diff --git a/backend/fluentai/services/tts/tts.py b/backend/fluentai/services/tts/tts.py index 23c1369..14b7f2b 100644 --- a/backend/fluentai/services/tts/tts.py +++ b/backend/fluentai/services/tts/tts.py @@ -4,7 +4,7 @@ from gtts import gTTS from fluentai.logger import logger -from fluentai.services.card_gen.utils.lang_codes import map_language_code +from fluentai.utils.lang_codes import map_language_code class TTS: diff --git a/backend/fluentai/utils/lang_codes.py b/backend/fluentai/utils/lang_codes.py index 72441b9..8d7e4ba 100644 --- a/backend/fluentai/utils/lang_codes.py +++ b/backend/fluentai/utils/lang_codes.py @@ -1,6 +1,6 @@ import pycountry -from fluentai.services.card_gen.constants.languages import ( +from fluentai.constants.languages import ( G2P_LANGCODES, TRANSLATE_LANGCODES, ) diff --git a/backend/fluentai/utils/load_models.py b/backend/fluentai/utils/load_models.py index 4ec5c4e..8317470 100644 --- a/backend/fluentai/utils/load_models.py +++ b/backend/fluentai/utils/load_models.py @@ -5,10 +5,10 @@ from fluentai.constants.config import config from fluentai.logger import logger -from fluentai.services.card_gen.imagine.image_gen import ImageGen -from fluentai.services.card_gen.imagine.verbal_cue import VerbalCue -from fluentai.services.card_gen.mnemonic.phonetic.g2p import G2P -from fluentai.services.card_gen.mnemonic.semantic.semantic import SemanticSimilarity +from fluentai.services.imagine.image_gen import ImageGen +from fluentai.services.imagine.verbal_cue import VerbalCue +from fluentai.services.mnemonic.phonetic.g2p import G2P +from fluentai.services.mnemonic.semantic.semantic import SemanticSimilarity def get_model_dir_name(model: str) -> str: From 94052b90146f48f400fd61bd64129d220e36b6e1 Mon Sep 17 00:00:00 2001 From: StephanAkkerman Date: Sat, 4 Jan 2025 12:08:47 +0100 Subject: [PATCH 04/15] also fix feature request template --- .github/ISSUE_TEMPLATE/feature_request.md | 18 +++++++++--------- backend/fluentai/api/api.py | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md index 6be1030..3914bac 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.md +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -6,22 +6,22 @@ labels: '' assignees: '' --- + 1. Description: - - Problem: - + - Problem: - - Solution: - - - Prerequisites: - + - Solution: -2. Tasks: + + - Prerequisites: + + +2. Tasks: - [ ] Task 1 - [ ] Task 2 - [ ] Task 3 -3. Additional context - \ No newline at end of file +3. Additional context diff --git a/backend/fluentai/api/api.py b/backend/fluentai/api/api.py index 3a59ca1..eefb625 100644 --- a/backend/fluentai/api/api.py +++ b/backend/fluentai/api/api.py @@ -4,13 +4,13 @@ import httpx import uvicorn -from constants.languages import G2P_LANGCODES, G2P_LANGUAGES from fastapi import FastAPI, HTTPException, Query, Request from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import JSONResponse from pydantic import BaseModel from fluentai.constants.config import config +from fluentai.constants.languages import G2P_LANGCODES, G2P_LANGUAGES from fluentai.logger import logger from fluentai.services.main import generate_mnemonic_img from fluentai.utils.load_models import download_all_models From 1ba0c1040aba1bae56dc01daf817e08abcb28b25 Mon Sep 17 00:00:00 2001 From: StephanAkkerman Date: Sat, 4 Jan 2025 12:12:15 +0100 Subject: [PATCH 05/15] add folder for phonetic utils --- backend/fluentai/services/mnemonic/phonetic/ipa2vec.py | 2 +- backend/fluentai/services/mnemonic/phonetic/phonetic.py | 2 +- .../services/mnemonic/phonetic/{ => utils}/clts_utils.py | 0 .../fluentai/services/mnemonic/phonetic/{ => utils}/utils.py | 0 backend/fluentai/services/mnemonic/phonetic/vectorizer.py | 2 +- 5 files changed, 3 insertions(+), 3 deletions(-) rename backend/fluentai/services/mnemonic/phonetic/{ => utils}/clts_utils.py (100%) rename backend/fluentai/services/mnemonic/phonetic/{ => utils}/utils.py (100%) diff --git a/backend/fluentai/services/mnemonic/phonetic/ipa2vec.py b/backend/fluentai/services/mnemonic/phonetic/ipa2vec.py index 538fab7..6aafc64 100644 --- a/backend/fluentai/services/mnemonic/phonetic/ipa2vec.py +++ b/backend/fluentai/services/mnemonic/phonetic/ipa2vec.py @@ -6,7 +6,7 @@ from soundvectors import SoundVectors from fluentai.logger import logger -from fluentai.services.mnemonic.phonetic.clts_utils import get_clts +from fluentai.services.mnemonic.phonetic.utils.clts_utils import get_clts # Test if data/clts exists if not os.path.exists("local_data/clts"): diff --git a/backend/fluentai/services/mnemonic/phonetic/phonetic.py b/backend/fluentai/services/mnemonic/phonetic/phonetic.py index 18c581d..c68c839 100644 --- a/backend/fluentai/services/mnemonic/phonetic/phonetic.py +++ b/backend/fluentai/services/mnemonic/phonetic/phonetic.py @@ -6,7 +6,7 @@ from fluentai.constants.config import config from fluentai.logger import logger from fluentai.services.mnemonic.phonetic.ipa2vec import panphon_vec, soundvec -from fluentai.services.mnemonic.phonetic.utils import ( +from fluentai.services.mnemonic.phonetic.utils.utils import ( convert_to_matrix, load_cache, pad_vectors, diff --git a/backend/fluentai/services/mnemonic/phonetic/clts_utils.py b/backend/fluentai/services/mnemonic/phonetic/utils/clts_utils.py similarity index 100% rename from backend/fluentai/services/mnemonic/phonetic/clts_utils.py rename to backend/fluentai/services/mnemonic/phonetic/utils/clts_utils.py diff --git a/backend/fluentai/services/mnemonic/phonetic/utils.py b/backend/fluentai/services/mnemonic/phonetic/utils/utils.py similarity index 100% rename from backend/fluentai/services/mnemonic/phonetic/utils.py rename to backend/fluentai/services/mnemonic/phonetic/utils/utils.py diff --git a/backend/fluentai/services/mnemonic/phonetic/vectorizer.py b/backend/fluentai/services/mnemonic/phonetic/vectorizer.py index b86fc11..63438b8 100644 --- a/backend/fluentai/services/mnemonic/phonetic/vectorizer.py +++ b/backend/fluentai/services/mnemonic/phonetic/vectorizer.py @@ -8,7 +8,7 @@ from fluentai.constants.config import config from fluentai.logger import logger from fluentai.services.mnemonic.phonetic.ipa2vec import ft, sv -from fluentai.services.mnemonic.phonetic.utils import flatten_vectors +from fluentai.services.mnemonic.phonetic.utils.utils import flatten_vectors def vectorize_word_clts(word, sv): From f8d947b25f45a643a6c1536d9e794a3f683a87de Mon Sep 17 00:00:00 2001 From: StephanAkkerman Date: Sat, 4 Jan 2025 12:16:13 +0100 Subject: [PATCH 06/15] fix pytests --- .../tests/test_services/test_imageability.py | 22 +++++++-------- .../tests/test_services/test_orthographic.py | 2 +- backend/tests/test_services/test_phonetic.py | 28 ++++++++----------- backend/tests/test_services/test_semantic.py | 16 +++++------ 4 files changed, 31 insertions(+), 37 deletions(-) diff --git a/backend/tests/test_services/test_imageability.py b/backend/tests/test_services/test_imageability.py index d68c554..3749497 100644 --- a/backend/tests/test_services/test_imageability.py +++ b/backend/tests/test_services/test_imageability.py @@ -1,4 +1,4 @@ -# tests/test_card_gen.py +# tests/test_py import os from unittest.mock import MagicMock @@ -10,7 +10,7 @@ os.environ["FLUENTAI_CONFIG_PATH"] = "config.yaml" # noqa # Import the functions and classes to be tested -from fluentai.services.card_gen.mnemonic.imageability.predictions import ( +from fluentai.services.mnemonic.imageability.imageability import ( ImageabilityPredictor, make_predictions, ) @@ -22,7 +22,7 @@ def mock_hf_hub_download(mocker): Fixture to mock hf_hub_download function. """ return mocker.patch( - "fluentai.services.card_gen.mnemonic.imageability.predictions.hf_hub_download" + "fluentai.services.mnemonic.imageability.imageability.hf_hub_download" ) @@ -32,7 +32,7 @@ def mock_joblib_load(mocker): Fixture to mock joblib.load function. """ return mocker.patch( - "fluentai.services.card_gen.mnemonic.imageability.predictions.joblib.load" + "fluentai.services.mnemonic.imageability.imageability.joblib.load" ) @@ -42,7 +42,7 @@ def mock_pd_read_csv(mocker): Fixture to mock pandas.read_csv function. """ return mocker.patch( - "fluentai.services.card_gen.mnemonic.imageability.predictions.pd.read_csv" + "fluentai.services.mnemonic.imageability.imageability.pd.read_csv" ) @@ -52,7 +52,7 @@ def mock_ImageabilityEmbeddings(mocker): Fixture to mock ImageabilityEmbeddings class. """ mock_class = mocker.patch( - "fluentai.services.card_gen.mnemonic.imageability.predictions.ImageabilityEmbeddings" + "fluentai.services.mnemonic.imageability.imageability.ImageabilityEmbeddings" ) mock_instance = MagicMock() mock_instance.get_embedding.side_effect = lambda word: np.array([1.0, 2.0, 3.0]) @@ -72,7 +72,7 @@ def test_make_predictions( """ # Mock the configuration mock_config = mocker.patch( - "fluentai.services.card_gen.mnemonic.imageability.predictions.config" + "fluentai.services.mnemonic.imageability.imageability.config" ) mock_config.get.side_effect = lambda key: { "IMAGEABILITY": { @@ -109,7 +109,7 @@ def test_make_predictions( # Mock tqdm to just return the iterator mocker.patch( - "fluentai.services.card_gen.mnemonic.imageability.predictions.tqdm", + "fluentai.services.mnemonic.imageability.imageability.tqdm", side_effect=lambda x, total=None: x, ) @@ -142,7 +142,7 @@ def test_ImageabilityPredictor_get_prediction( """ # Mock the configuration mock_config = mocker.patch( - "fluentai.services.card_gen.mnemonic.imageability.predictions.config" + "fluentai.services.mnemonic.imageability.imageability.config" ) mock_config.get.side_effect = lambda key: { "IMAGEABILITY": { @@ -185,7 +185,7 @@ def test_ImageabilityPredictor_get_predictions( """ # Mock the configuration mock_config = mocker.patch( - "fluentai.services.card_gen.mnemonic.imageability.predictions.config" + "fluentai.services.mnemonic.imageability.imageability.config" ) mock_config.get.side_effect = lambda key: { "IMAGEABILITY": { @@ -225,7 +225,7 @@ def test_ImageabilityPredictor_get_column_imageability( """ # Mock the configuration mock_config = mocker.patch( - "fluentai.services.card_gen.mnemonic.imageability.predictions.config" + "fluentai.services.mnemonic.imageability.imageability.config" ) mock_config.get.side_effect = lambda key: { "IMAGEABILITY": { diff --git a/backend/tests/test_services/test_orthographic.py b/backend/tests/test_services/test_orthographic.py index f137252..4b18ac0 100644 --- a/backend/tests/test_services/test_orthographic.py +++ b/backend/tests/test_services/test_orthographic.py @@ -6,7 +6,7 @@ os.environ["FLUENTAI_CONFIG_PATH"] = "config.yaml" # noqa -from fluentai.services.card_gen.mnemonic.orthographic.orthographic import ( +from fluentai.services.mnemonic.orthographic.orthographic import ( compute_damerau_levenshtein_similarity, ) diff --git a/backend/tests/test_services/test_phonetic.py b/backend/tests/test_services/test_phonetic.py index f9bf266..2bf3e58 100644 --- a/backend/tests/test_services/test_phonetic.py +++ b/backend/tests/test_services/test_phonetic.py @@ -10,7 +10,7 @@ os.environ["FLUENTAI_CONFIG_PATH"] = "config.yaml" # noqa # Import the top_phonetic function -from fluentai.services.card_gen.mnemonic.phonetic.phonetic import top_phonetic +from fluentai.services.mnemonic.phonetic.phonetic import top_phonetic @pytest.fixture @@ -18,7 +18,7 @@ def mock_config(mocker): """ Fixture to mock the config.get method. """ - return mocker.patch("fluentai.services.card_gen.mnemonic.phonetic.phonetic.config") + return mocker.patch("fluentai.services.mnemonic.phonetic.phonetic.config") @pytest.fixture @@ -26,9 +26,7 @@ def mock_word2ipa(mocker): """ Fixture to mock the word2ipa function. """ - return mocker.patch( - "fluentai.services.card_gen.mnemonic.phonetic.phonetic.word2ipa" - ) + return mocker.patch("fluentai.services.mnemonic.phonetic.phonetic.word2ipa") @pytest.fixture @@ -36,9 +34,7 @@ def mock_load_cache(mocker): """ Fixture to mock the load_cache function. """ - return mocker.patch( - "fluentai.services.card_gen.mnemonic.phonetic.phonetic.load_cache" - ) + return mocker.patch("fluentai.services.mnemonic.phonetic.phonetic.load_cache") @pytest.fixture @@ -46,9 +42,7 @@ def mock_pad_vectors(mocker): """ Fixture to mock the pad_vectors function. """ - return mocker.patch( - "fluentai.services.card_gen.mnemonic.phonetic.phonetic.pad_vectors" - ) + return mocker.patch("fluentai.services.mnemonic.phonetic.phonetic.pad_vectors") @pytest.fixture @@ -57,7 +51,7 @@ def mock_convert_to_matrix(mocker): Fixture to mock the convert_to_matrix function. """ return mocker.patch( - "fluentai.services.card_gen.mnemonic.phonetic.phonetic.convert_to_matrix" + "fluentai.services.mnemonic.phonetic.phonetic.convert_to_matrix" ) @@ -67,7 +61,7 @@ def mock_faiss_normalize_L2(mocker): Fixture to mock the faiss.normalize_L2 function. """ return mocker.patch( - "fluentai.services.card_gen.mnemonic.phonetic.phonetic.faiss.normalize_L2" + "fluentai.services.mnemonic.phonetic.phonetic.faiss.normalize_L2" ) @@ -80,7 +74,7 @@ def mock_faiss_IndexFlatIP(mocker): """ instance_mock = MagicMock() constructor_mock = mocker.patch( - "fluentai.services.card_gen.mnemonic.phonetic.phonetic.faiss.IndexFlatIP", + "fluentai.services.mnemonic.phonetic.phonetic.faiss.IndexFlatIP", return_value=instance_mock, ) return constructor_mock, instance_mock @@ -150,7 +144,7 @@ def test_top_phonetic_success( # Create a mock vectorizer function (panphon_vec or soundvec) with patch( - "fluentai.services.card_gen.mnemonic.phonetic.phonetic.panphon_vec", + "fluentai.services.mnemonic.phonetic.phonetic.panphon_vec", return_value=[[0.1, 0.2, 0.3]], ): # Initialize a mock g2p_model with a g2p method @@ -269,7 +263,7 @@ def test_top_phonetic_no_results( # Create a mock vectorizer function (panphon_vec or soundvec) with patch( - "fluentai.services.card_gen.mnemonic.phonetic.phonetic.panphon_vec", + "fluentai.services.mnemonic.phonetic.phonetic.panphon_vec", return_value=[[]], ): # Initialize a mock g2p_model with a g2p method @@ -390,7 +384,7 @@ def test_top_phonetic_invalid_language_code( # Create a mock vectorizer function (soundvec) with patch( - "fluentai.services.card_gen.mnemonic.phonetic.phonetic.soundvec", + "fluentai.services.mnemonic.phonetic.phonetic.soundvec", return_value=[[0.2, 0.3, 0.4]], ): # Initialize a mock g2p_model with a g2p method diff --git a/backend/tests/test_services/test_semantic.py b/backend/tests/test_services/test_semantic.py index 9b28ab9..b18c1b5 100644 --- a/backend/tests/test_services/test_semantic.py +++ b/backend/tests/test_services/test_semantic.py @@ -7,8 +7,8 @@ os.environ["FLUENTAI_CONFIG_PATH"] = "config.yaml" # noqa -from fluentai.services.card_gen.constants.config import config -from fluentai.services.card_gen.mnemonic.semantic.semantic import SemanticSimilarity +from fluentai.constants.config import config +from fluentai.services.mnemonic.semantic.semantic import SemanticSimilarity model_name = config.get("SEMANTIC_SIM").get("MODEL").lower() @@ -18,7 +18,7 @@ def mock_config(mocker): """ Fixture to mock the config.get method. """ - return mocker.patch("fluentai.services.card_gen.mnemonic.semantic.semantic.config") + return mocker.patch("fluentai.services.mnemonic.semantic.semantic.config") @pytest.fixture @@ -48,7 +48,7 @@ def test_compute_similarity_transformer(mock_config, mock_sentence_transformer): # Patch 'SentenceTransformer' to return the mock transformer model with patch( - "fluentai.services.card_gen.mnemonic.semantic.semantic.SentenceTransformer", + "fluentai.services.mnemonic.semantic.semantic.SentenceTransformer", return_value=mock_sentence_transformer, ): # Initialize SemanticSimilarity @@ -87,7 +87,7 @@ def test_compute_similarity_word_not_in_transformer( # Patch 'SentenceTransformer' to return the mock transformer model with patch( - "fluentai.services.card_gen.mnemonic.semantic.semantic.SentenceTransformer", + "fluentai.services.mnemonic.semantic.semantic.SentenceTransformer", return_value=mock_sentence_transformer, ): # Initialize SemanticSimilarity @@ -112,7 +112,7 @@ def test_load_semantic_model_transformer(mock_config, mock_sentence_transformer) # Patch 'SentenceTransformer' to return the mock transformer model with patch( - "fluentai.services.card_gen.mnemonic.semantic.semantic.SentenceTransformer", + "fluentai.services.mnemonic.semantic.semantic.SentenceTransformer", return_value=mock_sentence_transformer, ): # Initialize SemanticSimilarity @@ -130,7 +130,7 @@ def test_example_function(mocker, mock_config, mock_sentence_transformer, caplog """ Test the example function to ensure it logs similarities correctly. """ - from fluentai.services.card_gen.mnemonic.semantic.semantic import example + from fluentai.services.mnemonic.semantic.semantic import example # Setup mock config to return models mock_config.get.return_value = { @@ -140,7 +140,7 @@ def test_example_function(mocker, mock_config, mock_sentence_transformer, caplog # Patch 'SentenceTransformer' to return the mock transformer model with patch( - "fluentai.services.card_gen.mnemonic.semantic.semantic.SentenceTransformer", + "fluentai.services.mnemonic.semantic.semantic.SentenceTransformer", return_value=mock_sentence_transformer, ): # Configure the mock models From 6ddda5704659efb3bfe181a693676c8f025ed562 Mon Sep 17 00:00:00 2001 From: StephanAkkerman Date: Sat, 4 Jan 2025 12:18:21 +0100 Subject: [PATCH 07/15] fix pytests workflow --- .github/workflows/pytests.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/pytests.yaml b/.github/workflows/pytests.yaml index 8d0dcd0..281a2a4 100644 --- a/.github/workflows/pytests.yaml +++ b/.github/workflows/pytests.yaml @@ -7,8 +7,6 @@ on: - 'requirements.txt' # Dependency file - 'setup.py' # Setup script - 'pyproject.toml' # Modern Python project configuration - # Add any other relevant paths as needed - permissions: contents: read @@ -26,6 +24,7 @@ jobs: - name: Install dependencies run: | + cd backend python -m pip install --upgrade pip pip install pytest pytest-mock pip install -e . From e18d44d3b34222af33beeb350c62b6d1ab9180a6 Mon Sep 17 00:00:00 2001 From: StephanAkkerman Date: Sun, 5 Jan 2025 13:29:29 +0100 Subject: [PATCH 08/15] Resolve comments --- README.md | 2 +- backend/fluentai/api/app.py | 47 +++++++++ backend/fluentai/api/routes/anki.py | 51 ++++++++++ .../api/{api.py => routes/create_card.py} | 95 ++----------------- backend/fluentai/constants/languages.py | 74 +-------------- backend/fluentai/{services/main.py => run.py} | 2 +- .../fluentai/services/imagine/image_gen.py | 47 ++------- .../{verbal_cue.py => verbal_cue_gen.py} | 51 +--------- .../data.py | 0 .../ensemble.py | 0 .../models.py | 0 .../optimization.py | 0 .../{imageabilitycorpus.py => corpus.py} | 0 .../make_data/{complete.py => run.py} | 0 .../{imageability.py => predictor.py} | 0 .../{orthographic.py => compute.py} | 0 .../services/mnemonic/orthographic/eval.py | 2 +- .../phonetic/{phonetic.py => compute.py} | 10 +- .../services/mnemonic/phonetic/eval.py | 4 +- .../phonetic/{g2p.py => grapheme2phoneme.py} | 4 +- .../services/mnemonic/phonetic/ipa2vec.py | 2 +- .../services/mnemonic/phonetic/utils/cache.py | 34 +++++++ .../phonetic/utils/{clts_utils.py => clts.py} | 0 .../phonetic/utils/{utils.py => vectors.py} | 32 ------- .../services/mnemonic/phonetic/vectorizer.py | 2 +- .../semantic/{semantic.py => compute.py} | 0 .../services/mnemonic/semantic/eval.py | 2 +- .../services/mnemonic/word2mnemonic.py | 14 +-- backend/fluentai/utils/lang_codes.py | 2 +- backend/fluentai/utils/load_models.py | 8 +- backend/fluentai/utils/model_mem.py | 71 ++++++++++++++ .../tests/test_services/test_imageability.py | 26 ++--- .../tests/test_services/test_orthographic.py | 4 +- backend/tests/test_services/test_phonetic.py | 26 +++-- backend/tests/test_services/test_semantic.py | 4 +- config.yaml | 1 + 36 files changed, 276 insertions(+), 341 deletions(-) create mode 100644 backend/fluentai/api/app.py create mode 100644 backend/fluentai/api/routes/anki.py rename backend/fluentai/api/{api.py => routes/create_card.py} (67%) rename backend/fluentai/{services/main.py => run.py} (97%) rename backend/fluentai/services/imagine/{verbal_cue.py => verbal_cue_gen.py} (67%) rename backend/fluentai/services/mnemonic/imageability/{imag_models => imageability_models}/data.py (100%) rename backend/fluentai/services/mnemonic/imageability/{imag_models => imageability_models}/ensemble.py (100%) rename backend/fluentai/services/mnemonic/imageability/{imag_models => imageability_models}/models.py (100%) rename backend/fluentai/services/mnemonic/imageability/{imag_models => imageability_models}/optimization.py (100%) rename backend/fluentai/services/mnemonic/imageability/make_data/{imageabilitycorpus.py => corpus.py} (100%) rename backend/fluentai/services/mnemonic/imageability/make_data/{complete.py => run.py} (100%) rename backend/fluentai/services/mnemonic/imageability/{imageability.py => predictor.py} (100%) rename backend/fluentai/services/mnemonic/orthographic/{orthographic.py => compute.py} (100%) rename backend/fluentai/services/mnemonic/phonetic/{phonetic.py => compute.py} (93%) rename backend/fluentai/services/mnemonic/phonetic/{g2p.py => grapheme2phoneme.py} (97%) create mode 100644 backend/fluentai/services/mnemonic/phonetic/utils/cache.py rename backend/fluentai/services/mnemonic/phonetic/utils/{clts_utils.py => clts.py} (100%) rename backend/fluentai/services/mnemonic/phonetic/utils/{utils.py => vectors.py} (75%) rename backend/fluentai/services/mnemonic/semantic/{semantic.py => compute.py} (100%) create mode 100644 backend/fluentai/utils/model_mem.py diff --git a/README.md b/README.md index 591040b..063973e 100644 --- a/README.md +++ b/README.md @@ -53,7 +53,7 @@ Before starting, make sure you have the following requirements: We have bundled all required dependencies into a package for easy installation. To get started, simply run one of the following commands: ```bash -pip install . +pip install backend/. ``` or install directly from the repository: diff --git a/backend/fluentai/api/app.py b/backend/fluentai/api/app.py new file mode 100644 index 0000000..a6dc35f --- /dev/null +++ b/backend/fluentai/api/app.py @@ -0,0 +1,47 @@ +import argparse + +import uvicorn +from fastapi import FastAPI +from fastapi.middleware.cors import CORSMiddleware + +from fluentai.api.routes.anki import anki_router +from fluentai.api.routes.create_card import create_card_router +from fluentai.utils.load_models import download_all_models + +# Initialize FastAPI app +app = FastAPI() + +# Configure CORS middleware +app.add_middleware( + CORSMiddleware, + allow_origins=[ + "http://localhost:3000", + "https://akkerman.ai", + ], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +app.include_router(anki_router) +app.include_router(create_card_router) + + +def main(): + """Start the FastAPI application.""" + # Start by downloading all models + download_all_models() + + parser = argparse.ArgumentParser(description="") + parser.add_argument( + "--host", type=str, default="127.0.0.1", help="Hosting default: 127.0.0.1" + ) + parser.add_argument("--port", type=int, default=8000) + + args = parser.parse_args() + + uvicorn.run("app:app", host=args.host, port=args.port) + + +if __name__ == "__main__": + main() diff --git a/backend/fluentai/api/routes/anki.py b/backend/fluentai/api/routes/anki.py new file mode 100644 index 0000000..71102c1 --- /dev/null +++ b/backend/fluentai/api/routes/anki.py @@ -0,0 +1,51 @@ +import httpx +from fastapi import APIRouter, Request +from fastapi.responses import JSONResponse + +anki_router = APIRouter() + + +@anki_router.post("/api/anki") +async def anki_proxy(request: Request): + """ + Proxy API endpoint for forwarding requests to the Anki server. + + This function receives a JSON request from the client, forwards it to the Anki + server running on localhost, and returns the response back to the client. + + HACK: This uses the backend as a proxy for when the frontend is deployed in GH Pages + + Parameters + ---------- + request : Request + The incoming HTTP request object containing the JSON payload to be forwarded. + + Returns + ------- + JSONResponse + A JSON response containing the Anki server response or an error message if + the request fails. + """ + try: + # Forward the incoming request body to the Anki server + request_body = await request.json() + + async with httpx.AsyncClient() as client: + response = await client.post( + "http://127.0.0.1:8765", # Assuming Anki is running on localhost with default port + json=request_body, + ) + + # Return the JSON response from Anki server + return JSONResponse(content=response.json(), status_code=response.status_code) + + except httpx.RequestError as e: + return JSONResponse( + content={"error": "Failed to connect to Anki server.", "details": str(e)}, + status_code=500, + ) + except Exception as e: + return JSONResponse( + content={"error": "An unexpected error occurred.", "details": str(e)}, + status_code=500, + ) diff --git a/backend/fluentai/api/api.py b/backend/fluentai/api/routes/create_card.py similarity index 67% rename from backend/fluentai/api/api.py rename to backend/fluentai/api/routes/create_card.py index eefb625..b584ebd 100644 --- a/backend/fluentai/api/api.py +++ b/backend/fluentai/api/routes/create_card.py @@ -1,33 +1,16 @@ -import argparse import base64 import os -import httpx -import uvicorn -from fastapi import FastAPI, HTTPException, Query, Request -from fastapi.middleware.cors import CORSMiddleware +from fastapi import APIRouter, HTTPException, Query from fastapi.responses import JSONResponse from pydantic import BaseModel from fluentai.constants.config import config from fluentai.constants.languages import G2P_LANGCODES, G2P_LANGUAGES from fluentai.logger import logger -from fluentai.services.main import generate_mnemonic_img -from fluentai.utils.load_models import download_all_models +from fluentai.run import generate_mnemonic_img -app = FastAPI() - -# Allow all origins for development (adjust in production) -app.add_middleware( - CORSMiddleware, - allow_origins=[ - "http://localhost:3000", - "https://akkerman.ai", - ], # Replace "*" with your front-end URL in production - allow_credentials=True, - allow_methods=["*"], - allow_headers=["*"], -) +create_card_router = APIRouter() # Define Pydantic models for request and responses @@ -41,7 +24,7 @@ class CreateCardResponse(BaseModel): recording: str = None # Placeholder for future implementation -@app.post("/create_card/word_data", response_model=CreateCardResponse) +@create_card_router.post("/create_card/word_data", response_model=CreateCardResponse) async def api_generate_mnemonic(request: CreateCardRequest) -> dict: """ Calls the main function to generate a mnemonic for a given word and language code. @@ -85,7 +68,7 @@ async def api_generate_mnemonic(request: CreateCardRequest) -> dict: raise HTTPException(status_code=500, detail="Internal Server Error") -@app.get("/create_card/img") +@create_card_router.get("/create_card/img") async def get_image( word: str = Query(...), language_code: str = Query(...), @@ -157,7 +140,7 @@ async def get_image( raise HTTPException(status_code=500, detail=f"Internal Server Error: {e}") -@app.get("/create_card/supported_languages") +@create_card_router.get("/create_card/supported_languages") async def get_supported_languages() -> JSONResponse: """ Returns a list of languages that the backend supports. @@ -170,7 +153,7 @@ async def get_supported_languages() -> JSONResponse: return JSONResponse(content={"languages": G2P_LANGCODES}) -@app.get("/create_card/image_models") +@create_card_router.get("/create_card/image_models") async def get_image_models() -> JSONResponse: """ Returns a list of available image generation models, with the recommended model at the top. @@ -191,7 +174,7 @@ async def get_image_models() -> JSONResponse: return JSONResponse(content={"models": available_models}) -@app.get("/create_card/llm_models") +@create_card_router.get("/create_card/llm_models") async def get_llm_models() -> JSONResponse: """ Returns a list of available LLM models, with the recommended model at the top. @@ -211,65 +194,3 @@ async def get_llm_models() -> JSONResponse: available_models = [recommended_model] + models["all"] return JSONResponse(content={"models": available_models}) - - -# HACK: This uses the backend as a proxy for when the frontend is deployed in GH Pages - - -@app.post("/api/anki") -async def anki_proxy(request: Request): - """ - Proxy API endpoint for forwarding requests to the Anki server. - - This function receives a JSON request from the client, forwards it to the Anki - server running on localhost, and returns the response back to the client. - - Parameters - ---------- - request : Request - The incoming HTTP request object containing the JSON payload to be forwarded. - - Returns - ------- - JSONResponse - A JSON response containing the Anki server response or an error message if - the request fails. - """ - try: - # Forward the incoming request body to the Anki server - request_body = await request.json() - - async with httpx.AsyncClient() as client: - response = await client.post( - "http://127.0.0.1:8765", # Assuming Anki is running on localhost with default port - json=request_body, - ) - - # Return the JSON response from Anki server - return JSONResponse(content=response.json(), status_code=response.status_code) - - except httpx.RequestError as e: - return JSONResponse( - content={"error": "Failed to connect to Anki server.", "details": str(e)}, - status_code=500, - ) - except Exception as e: - return JSONResponse( - content={"error": "An unexpected error occurred.", "details": str(e)}, - status_code=500, - ) - - -if __name__ == "__main__": - # Start by downloading all models - download_all_models() - - parser = argparse.ArgumentParser(description="") - parser.add_argument( - "--host", type=str, default="127.0.0.1", help="Hosting default: 127.0.0.1" - ) - parser.add_argument("--port", type=int, default=8000) - - args = parser.parse_args() - - uvicorn.run("api:app", host=args.host, port=args.port) diff --git a/backend/fluentai/constants/languages.py b/backend/fluentai/constants/languages.py index 8ce1769..d419500 100644 --- a/backend/fluentai/constants/languages.py +++ b/backend/fluentai/constants/languages.py @@ -1,77 +1,7 @@ import json -from googletrans import LANGCODES, LANGUAGES +from fluentai.constants.config import config -with open("data/languages.json") as f: +with open(config.get("G2P").get("LANGUAGE_JSON")) as f: G2P_LANGCODES = json.load(f) G2P_LANGUAGES: dict = dict(map(reversed, G2P_LANGCODES.items())) - -# Google Translate -TRANSLATE_LANGUAGES: dict = LANGUAGES -TRANSLATE_LANGCODES: dict = LANGCODES - -# Vocab Languages -VOCAB_LANGUAGES = [ - "af", - "ar", - "bg", - "bn", - "br", - "bs", - "ca", - "cs", - "da", - "de", - "el", - "en", - "eo", - "es", - "et", - "eu", - "fa", - "fi", - "fr", - "gl", - "he", - "hi", - "hr", - "hu", - "hy", - "id", - "is", - "it", - "ja", - "ka", - "kk", - "ko", - "lt", - "lv", - "mk", - "ml", - "ms", - "nl", - "no", - "pl", - "pt", - "pt_br", # Brazilian Portuguese - "ro", - "ru", - "si", - "sk", - "sl", - "sq", - "sr", - "sv", - "ta", - "te", - "th", - "tl", - "tr", - "uk", - "ur", - "vi", - "ze_en", # Chinese & English - "ze_zh", # Chinese & English - "zh_cn", # Simplified Chinese - "zh_tw", # Traditional Chinese -] diff --git a/backend/fluentai/services/main.py b/backend/fluentai/run.py similarity index 97% rename from backend/fluentai/services/main.py rename to backend/fluentai/run.py index 0584452..4edbecc 100644 --- a/backend/fluentai/services/main.py +++ b/backend/fluentai/run.py @@ -2,7 +2,7 @@ from fluentai.logger import logger from fluentai.services.imagine.image_gen import ImageGen -from fluentai.services.imagine.verbal_cue import VerbalCue +from fluentai.services.imagine.verbal_cue_gen import VerbalCue from fluentai.services.mnemonic.word2mnemonic import Word2Mnemonic from fluentai.services.tts.tts import TTS diff --git a/backend/fluentai/services/imagine/image_gen.py b/backend/fluentai/services/imagine/image_gen.py index 3a4f857..83ac5e2 100644 --- a/backend/fluentai/services/imagine/image_gen.py +++ b/backend/fluentai/services/imagine/image_gen.py @@ -1,5 +1,3 @@ -import functools -import gc import os from pathlib import Path @@ -8,44 +6,7 @@ from fluentai.constants.config import config from fluentai.logger import logger - - -def manage_model_memory(method): - """ - Decorator to manage model memory by offloading to GPU before the method call. - """ - - @functools.wraps(method) - def wrapper(self, *args, **kwargs): - # Initialize the pipe if it's not already loaded - if self.pipe is None: - self._initialize_pipe() - - # Move to GPU if offloading is enabled - if self.offload: - logger.debug("Moving the pipeline to GPU (cuda).") - self.pipe.to("cuda") - - try: - # Execute the decorated method - result = method(self, *args, **kwargs) - finally: - # Delete the pipeline if DELETE_AFTER_USE is True - if self.config.get("DELETE_AFTER_USE", True): - logger.debug("Deleting the pipeline to free up memory.") - del self.pipe - self.pipe = None - gc.collect() - torch.cuda.empty_cache() - - # Move the pipeline back to CPU if offloading is enabled - if self.offload and self.pipe is not None: - logger.debug("Moving the pipeline back to CPU.") - self.pipe.to("cpu", silence_dtype_warnings=True) - - return result - - return wrapper +from fluentai.utils.model_mem import manage_memory class ImageGen: @@ -102,7 +63,11 @@ def _initialize_pipe(self): cache_dir="models", ) - @manage_model_memory + @manage_memory( + targets=["pipe"], + delete_attrs=["pipe"], + move_kwargs={"silence_dtype_warnings": True}, + ) def generate_img( self, prompt: str = "A flashy bottle that stands out from the other bottles.", diff --git a/backend/fluentai/services/imagine/verbal_cue.py b/backend/fluentai/services/imagine/verbal_cue_gen.py similarity index 67% rename from backend/fluentai/services/imagine/verbal_cue.py rename to backend/fluentai/services/imagine/verbal_cue_gen.py index 19c96dd..b01358c 100644 --- a/backend/fluentai/services/imagine/verbal_cue.py +++ b/backend/fluentai/services/imagine/verbal_cue_gen.py @@ -1,53 +1,8 @@ -import functools -import gc - -import torch from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline from fluentai.constants.config import config from fluentai.logger import logger - - -def manage_model_memory(method): - """ - Decorator to manage model memory by offloading to GPU before the method call. - """ - - @functools.wraps(method) - def wrapper(self, *args, **kwargs): - # Initialize the pipe if it's not already loaded - if self.pipe is None: - self._initialize_pipe() - - # Move to GPU if offloading is enabled - if self.offload: - logger.debug("Moving the model to GPU (cuda).") - self.model.to("cuda") - - try: - # Execute the decorated method - result = method(self, *args, **kwargs) - finally: - # Delete the pipeline if DELETE_AFTER_USE is True - if self.config.get("DELETE_AFTER_USE", True): - logger.debug("Deleting the model to free up memory.") - del self.model - del self.pipe - del self.tokenizer - self.model = None - self.pipe = None - self.tokenizer = None - gc.collect() - torch.cuda.empty_cache() - - # Move the pipeline back to CPU if offloading is enabled - if self.offload and self.pipe is not None: - logger.debug("Moving the model back to CPU.") - self.model.to("cpu") - - return result - - return wrapper +from fluentai.utils.model_mem import manage_memory class VerbalCue: @@ -101,7 +56,9 @@ def _initialize_pipe(self): tokenizer=self.tokenizer, ) - @manage_model_memory + @manage_memory( + targets=["model"], delete_attrs=["model", "pipe", "tokenizer"], move_kwargs={} + ) def generate_cue(self, word1: str, word2: str) -> str: """ Generate a verbal cue that connects two words. diff --git a/backend/fluentai/services/mnemonic/imageability/imag_models/data.py b/backend/fluentai/services/mnemonic/imageability/imageability_models/data.py similarity index 100% rename from backend/fluentai/services/mnemonic/imageability/imag_models/data.py rename to backend/fluentai/services/mnemonic/imageability/imageability_models/data.py diff --git a/backend/fluentai/services/mnemonic/imageability/imag_models/ensemble.py b/backend/fluentai/services/mnemonic/imageability/imageability_models/ensemble.py similarity index 100% rename from backend/fluentai/services/mnemonic/imageability/imag_models/ensemble.py rename to backend/fluentai/services/mnemonic/imageability/imageability_models/ensemble.py diff --git a/backend/fluentai/services/mnemonic/imageability/imag_models/models.py b/backend/fluentai/services/mnemonic/imageability/imageability_models/models.py similarity index 100% rename from backend/fluentai/services/mnemonic/imageability/imag_models/models.py rename to backend/fluentai/services/mnemonic/imageability/imageability_models/models.py diff --git a/backend/fluentai/services/mnemonic/imageability/imag_models/optimization.py b/backend/fluentai/services/mnemonic/imageability/imageability_models/optimization.py similarity index 100% rename from backend/fluentai/services/mnemonic/imageability/imag_models/optimization.py rename to backend/fluentai/services/mnemonic/imageability/imageability_models/optimization.py diff --git a/backend/fluentai/services/mnemonic/imageability/make_data/imageabilitycorpus.py b/backend/fluentai/services/mnemonic/imageability/make_data/corpus.py similarity index 100% rename from backend/fluentai/services/mnemonic/imageability/make_data/imageabilitycorpus.py rename to backend/fluentai/services/mnemonic/imageability/make_data/corpus.py diff --git a/backend/fluentai/services/mnemonic/imageability/make_data/complete.py b/backend/fluentai/services/mnemonic/imageability/make_data/run.py similarity index 100% rename from backend/fluentai/services/mnemonic/imageability/make_data/complete.py rename to backend/fluentai/services/mnemonic/imageability/make_data/run.py diff --git a/backend/fluentai/services/mnemonic/imageability/imageability.py b/backend/fluentai/services/mnemonic/imageability/predictor.py similarity index 100% rename from backend/fluentai/services/mnemonic/imageability/imageability.py rename to backend/fluentai/services/mnemonic/imageability/predictor.py diff --git a/backend/fluentai/services/mnemonic/orthographic/orthographic.py b/backend/fluentai/services/mnemonic/orthographic/compute.py similarity index 100% rename from backend/fluentai/services/mnemonic/orthographic/orthographic.py rename to backend/fluentai/services/mnemonic/orthographic/compute.py diff --git a/backend/fluentai/services/mnemonic/orthographic/eval.py b/backend/fluentai/services/mnemonic/orthographic/eval.py index f3e4ddf..7758872 100644 --- a/backend/fluentai/services/mnemonic/orthographic/eval.py +++ b/backend/fluentai/services/mnemonic/orthographic/eval.py @@ -1,11 +1,11 @@ import pandas as pd from datasets import load_dataset -from orthographic import compute_similarity from scipy.stats import pearsonr, spearmanr from sklearn.preprocessing import MinMaxScaler from fluentai.constants.config import config from fluentai.logger import logger +from fluentai.services.mnemonic.orthographic.compute import compute_similarity def scale_ratings(ratings: pd.Series) -> pd.Series: diff --git a/backend/fluentai/services/mnemonic/phonetic/phonetic.py b/backend/fluentai/services/mnemonic/phonetic/compute.py similarity index 93% rename from backend/fluentai/services/mnemonic/phonetic/phonetic.py rename to backend/fluentai/services/mnemonic/phonetic/compute.py index c68c839..2dd5fa2 100644 --- a/backend/fluentai/services/mnemonic/phonetic/phonetic.py +++ b/backend/fluentai/services/mnemonic/phonetic/compute.py @@ -6,9 +6,9 @@ from fluentai.constants.config import config from fluentai.logger import logger from fluentai.services.mnemonic.phonetic.ipa2vec import panphon_vec, soundvec -from fluentai.services.mnemonic.phonetic.utils.utils import ( +from fluentai.services.mnemonic.phonetic.utils.cache import load_from_cache +from fluentai.services.mnemonic.phonetic.utils.vectors import ( convert_to_matrix, - load_cache, pad_vectors, ) @@ -124,7 +124,7 @@ def top_phonetic( ipa = word2ipa(input_word, language_code, g2p_model) # Attempt to load from cache - dataset = load_cache(method) + dataset = load_from_cache(method) dataset_vectors_flat = dataset["flattened_vectors"].tolist() @@ -175,7 +175,7 @@ def top_phonetic( os.environ["KMP_DUPLICATE_LIB_OK"] = "True" # Load the G2P model - from fluentai.services.mnemonic.phonetic.g2p import G2P + from fluentai.services.mnemonic.phonetic.grapheme2phoneme import Grapheme2Phoneme - result = top_phonetic(word_input, language_code, top_n, G2P()) + result = top_phonetic(word_input, language_code, top_n, Grapheme2Phoneme()) print(result) diff --git a/backend/fluentai/services/mnemonic/phonetic/eval.py b/backend/fluentai/services/mnemonic/phonetic/eval.py index 98b7c85..534ab90 100644 --- a/backend/fluentai/services/mnemonic/phonetic/eval.py +++ b/backend/fluentai/services/mnemonic/phonetic/eval.py @@ -2,7 +2,6 @@ import numpy as np import pandas as pd from datasets import load_dataset -from g2p import G2P from huggingface_hub import hf_hub_download from scipy.stats import pearsonr, spearmanr from sklearn.metrics.pairwise import cosine_similarity @@ -11,6 +10,7 @@ from fluentai.constants.config import config from fluentai.logger import logger +from fluentai.services.mnemonic.phonetic.grapheme2phoneme import Grapheme2Phoneme from fluentai.services.mnemonic.phonetic.ipa2vec import panphon_vec, soundvec @@ -18,7 +18,7 @@ def word2ipa( word: str, ipa_dataset: pd.DataFrame, use_fallback: bool = True, - g2p_model: G2P = G2P(), + g2p_model: Grapheme2Phoneme = Grapheme2Phoneme(), ) -> str: """ Convert a word to its IPA transcription using the dataset and fallback to the g2p model if necessary. diff --git a/backend/fluentai/services/mnemonic/phonetic/g2p.py b/backend/fluentai/services/mnemonic/phonetic/grapheme2phoneme.py similarity index 97% rename from backend/fluentai/services/mnemonic/phonetic/g2p.py rename to backend/fluentai/services/mnemonic/phonetic/grapheme2phoneme.py index 967ad5d..2e03d76 100644 --- a/backend/fluentai/services/mnemonic/phonetic/g2p.py +++ b/backend/fluentai/services/mnemonic/phonetic/grapheme2phoneme.py @@ -4,7 +4,7 @@ from fluentai.logger import logger -class G2P: +class Grapheme2Phoneme: def __init__(self): # https://github.com/lingjzhu/CharsiuG2P logger.debug("Loading G2P model") @@ -49,7 +49,7 @@ def example(): """ Example usage of the G2P module. It prints the phonetic transcription of the words in Indonesian, English, and Dutch. """ - g2p = G2P() + g2p = Grapheme2Phoneme() # https://en.wiktionary.org/wiki/kucing#Indonesian # IPA(key): /ˈkut͡ʃɪŋ/ diff --git a/backend/fluentai/services/mnemonic/phonetic/ipa2vec.py b/backend/fluentai/services/mnemonic/phonetic/ipa2vec.py index 6aafc64..ad9b416 100644 --- a/backend/fluentai/services/mnemonic/phonetic/ipa2vec.py +++ b/backend/fluentai/services/mnemonic/phonetic/ipa2vec.py @@ -6,7 +6,7 @@ from soundvectors import SoundVectors from fluentai.logger import logger -from fluentai.services.mnemonic.phonetic.utils.clts_utils import get_clts +from fluentai.services.mnemonic.phonetic.utils.clts import get_clts # Test if data/clts exists if not os.path.exists("local_data/clts"): diff --git a/backend/fluentai/services/mnemonic/phonetic/utils/cache.py b/backend/fluentai/services/mnemonic/phonetic/utils/cache.py new file mode 100644 index 0000000..8283467 --- /dev/null +++ b/backend/fluentai/services/mnemonic/phonetic/utils/cache.py @@ -0,0 +1,34 @@ +import pandas as pd +from huggingface_hub import hf_hub_download + +from fluentai.constants.config import config +from fluentai.logger import logger + + +def load_from_cache(method: str = "panphon"): + """ + Load the processed dataset from a cache file. + + Parameters + ---------- + - cache_file: String, path to the cache file + + Returns + ------- + - DataFrame containing the cached dataset + """ + repo = config.get("PHONETIC_SIM").get("EMBEDDINGS").get("REPO") + # Remove the file extension to get the dataset name + dataset = config.get("PHONETIC_SIM").get("IPA").get("FILE").split(".")[0] + file = f"{dataset}_{method}.parquet" + + dataset = pd.read_parquet( + hf_hub_download( + repo_id=repo, + filename=file, + cache_dir="datasets", + repo_type="dataset", + ) + ) + logger.info(f"Loaded parsed dataset from '{repo}' and file {file}.") + return dataset diff --git a/backend/fluentai/services/mnemonic/phonetic/utils/clts_utils.py b/backend/fluentai/services/mnemonic/phonetic/utils/clts.py similarity index 100% rename from backend/fluentai/services/mnemonic/phonetic/utils/clts_utils.py rename to backend/fluentai/services/mnemonic/phonetic/utils/clts.py diff --git a/backend/fluentai/services/mnemonic/phonetic/utils/utils.py b/backend/fluentai/services/mnemonic/phonetic/utils/vectors.py similarity index 75% rename from backend/fluentai/services/mnemonic/phonetic/utils/utils.py rename to backend/fluentai/services/mnemonic/phonetic/utils/vectors.py index b019fdb..b2d8110 100644 --- a/backend/fluentai/services/mnemonic/phonetic/utils/utils.py +++ b/backend/fluentai/services/mnemonic/phonetic/utils/vectors.py @@ -1,10 +1,7 @@ import ast import numpy as np -import pandas as pd -from huggingface_hub import hf_hub_download -from fluentai.constants.config import config from fluentai.logger import logger @@ -65,35 +62,6 @@ def parse_vectors(dataset, vector_column="vectors"): return dataset -def load_cache(method: str = "panphon"): - """ - Load the processed dataset from a cache file. - - Parameters - ---------- - - cache_file: String, path to the cache file - - Returns - ------- - - DataFrame containing the cached dataset - """ - repo = config.get("PHONETIC_SIM").get("EMBEDDINGS").get("REPO") - # Remove the file extension to get the dataset name - dataset = config.get("PHONETIC_SIM").get("IPA").get("FILE").split(".")[0] - file = f"{dataset}_{method}.parquet" - - dataset = pd.read_parquet( - hf_hub_download( - repo_id=repo, - filename=file, - cache_dir="datasets", - repo_type="dataset", - ) - ) - logger.info(f"Loaded parsed dataset from '{repo}' and file {file}.") - return dataset - - def flatten_vector(vec): """ Flatten a nested list of vectors into a single 1D NumPy array. diff --git a/backend/fluentai/services/mnemonic/phonetic/vectorizer.py b/backend/fluentai/services/mnemonic/phonetic/vectorizer.py index 63438b8..e816911 100644 --- a/backend/fluentai/services/mnemonic/phonetic/vectorizer.py +++ b/backend/fluentai/services/mnemonic/phonetic/vectorizer.py @@ -8,7 +8,7 @@ from fluentai.constants.config import config from fluentai.logger import logger from fluentai.services.mnemonic.phonetic.ipa2vec import ft, sv -from fluentai.services.mnemonic.phonetic.utils.utils import flatten_vectors +from fluentai.services.mnemonic.phonetic.utils.vectors import flatten_vectors def vectorize_word_clts(word, sv): diff --git a/backend/fluentai/services/mnemonic/semantic/semantic.py b/backend/fluentai/services/mnemonic/semantic/compute.py similarity index 100% rename from backend/fluentai/services/mnemonic/semantic/semantic.py rename to backend/fluentai/services/mnemonic/semantic/compute.py diff --git a/backend/fluentai/services/mnemonic/semantic/eval.py b/backend/fluentai/services/mnemonic/semantic/eval.py index 831b598..a51b456 100644 --- a/backend/fluentai/services/mnemonic/semantic/eval.py +++ b/backend/fluentai/services/mnemonic/semantic/eval.py @@ -11,7 +11,7 @@ from fluentai.constants.config import config from fluentai.logger import logger -from fluentai.services.mnemonic.semantic.semantic import SemanticSimilarity +from fluentai.services.mnemonic.semantic.compute import SemanticSimilarity def compute_dataset_hash(df: pd.DataFrame) -> str: diff --git a/backend/fluentai/services/mnemonic/word2mnemonic.py b/backend/fluentai/services/mnemonic/word2mnemonic.py index 9b0b19a..0762ef9 100644 --- a/backend/fluentai/services/mnemonic/word2mnemonic.py +++ b/backend/fluentai/services/mnemonic/word2mnemonic.py @@ -1,21 +1,23 @@ from fluentai.constants.config import config, weights_percentages from fluentai.constants.languages import G2P_LANGUAGES from fluentai.logger import logger -from fluentai.services.mnemonic.imageability.imageability import ( +from fluentai.services.mnemonic.imageability.predictor import ( ImageabilityPredictor, ) -from fluentai.services.mnemonic.orthographic.orthographic import ( +from fluentai.services.mnemonic.orthographic.compute import ( compute_damerau_levenshtein_similarity, ) -from fluentai.services.mnemonic.phonetic.g2p import G2P -from fluentai.services.mnemonic.phonetic.phonetic import top_phonetic, word2ipa -from fluentai.services.mnemonic.semantic.semantic import SemanticSimilarity +from fluentai.services.mnemonic.phonetic.compute import top_phonetic, word2ipa +from fluentai.services.mnemonic.phonetic.grapheme2phoneme import ( + Grapheme2Phoneme, +) +from fluentai.services.mnemonic.semantic.compute import SemanticSimilarity from fluentai.services.mnemonic.semantic.translator import translate_word class Word2Mnemonic: def __init__(self): - self.g2p_model = G2P() + self.g2p_model = Grapheme2Phoneme() self.imageability_predictor = ImageabilityPredictor() self.semantic_sim = SemanticSimilarity() diff --git a/backend/fluentai/utils/lang_codes.py b/backend/fluentai/utils/lang_codes.py index 8d7e4ba..46e1ba0 100644 --- a/backend/fluentai/utils/lang_codes.py +++ b/backend/fluentai/utils/lang_codes.py @@ -1,8 +1,8 @@ import pycountry +from googletrans import LANGCODES as TRANSLATE_LANGCODES from fluentai.constants.languages import ( G2P_LANGCODES, - TRANSLATE_LANGCODES, ) diff --git a/backend/fluentai/utils/load_models.py b/backend/fluentai/utils/load_models.py index 8317470..c714fd1 100644 --- a/backend/fluentai/utils/load_models.py +++ b/backend/fluentai/utils/load_models.py @@ -6,9 +6,9 @@ from fluentai.constants.config import config from fluentai.logger import logger from fluentai.services.imagine.image_gen import ImageGen -from fluentai.services.imagine.verbal_cue import VerbalCue -from fluentai.services.mnemonic.phonetic.g2p import G2P -from fluentai.services.mnemonic.semantic.semantic import SemanticSimilarity +from fluentai.services.imagine.verbal_cue_gen import VerbalCue +from fluentai.services.mnemonic.phonetic.grapheme2phoneme import Grapheme2Phoneme +from fluentai.services.mnemonic.semantic.compute import SemanticSimilarity def get_model_dir_name(model: str) -> str: @@ -43,7 +43,7 @@ def download_all_models(): g2p_model = config.get("G2P").get("MODEL") if get_model_dir_name(g2p_model) not in downloaded_models: logger.info(f"Downloading G2P model: {g2p_model}") - clean(G2P()) + clean(Grapheme2Phoneme()) # LLM model llm_model = config.get("LLM").get("MODEL") diff --git a/backend/fluentai/utils/model_mem.py b/backend/fluentai/utils/model_mem.py new file mode 100644 index 0000000..614b519 --- /dev/null +++ b/backend/fluentai/utils/model_mem.py @@ -0,0 +1,71 @@ +import functools +import gc + +import torch + +from fluentai.logger import logger + + +def manage_memory(targets=None, delete_attrs=None, move_kwargs=None): + """ + Decorator to manage memory by moving specified attributes to GPU before method call and back to CPU after method. + + Args: + targets (list[str]): List of attribute names to move to GPU (e.g., ['model', 'pipe']). + delete_attrs (list[str]): List of attribute names to delete after method execution. + move_kwargs (dict): Additional keyword arguments to pass to the `.to()` method. + + Returns + ------- + function: Decorated method. + """ + if targets is None: + targets = [] + if delete_attrs is None: + delete_attrs = [] + if move_kwargs is None: + move_kwargs = {} + + def decorator(method): + @functools.wraps(method) + def wrapper(self, *args, **kwargs): + # Initialize the pipe if it's not already loaded + if getattr(self, "pipe", None) is None: + self._initialize_pipe() + + # Move specified targets to GPU if offloading is enabled + if getattr(self, "offload", False): + for target in targets: + attr = getattr(self, target, None) + if attr is not None: + logger.debug(f"Moving {target} to GPU (cuda).") + attr.to("cuda", **move_kwargs) + + try: + # Execute the decorated method + result = method(self, *args, **kwargs) + finally: + # Delete specified attributes if DELETE_AFTER_USE is True + if self.config.get("DELETE_AFTER_USE", True): + for attr_name in delete_attrs: + attr = getattr(self, attr_name, None) + if attr is not None: + logger.debug(f"Deleting {attr_name} to free up memory.") + delattr(self, attr_name) + setattr(self, attr_name, None) + gc.collect() + torch.cuda.empty_cache() + + # Move specified targets back to CPU if offloading is enabled + if getattr(self, "offload", False): + for target in targets: + attr = getattr(self, target, None) + if attr is not None: + logger.debug(f"Moving {target} back to CPU.") + attr.to("cpu", **move_kwargs) + + return result + + return wrapper + + return decorator diff --git a/backend/tests/test_services/test_imageability.py b/backend/tests/test_services/test_imageability.py index 3749497..1b0e793 100644 --- a/backend/tests/test_services/test_imageability.py +++ b/backend/tests/test_services/test_imageability.py @@ -1,5 +1,3 @@ -# tests/test_py - import os from unittest.mock import MagicMock @@ -10,7 +8,7 @@ os.environ["FLUENTAI_CONFIG_PATH"] = "config.yaml" # noqa # Import the functions and classes to be tested -from fluentai.services.mnemonic.imageability.imageability import ( +from backend.fluentai.services.mnemonic.imageability.predictor import ( ImageabilityPredictor, make_predictions, ) @@ -22,7 +20,7 @@ def mock_hf_hub_download(mocker): Fixture to mock hf_hub_download function. """ return mocker.patch( - "fluentai.services.mnemonic.imageability.imageability.hf_hub_download" + "fluentai.services.mnemonic.imageability.predictor.hf_hub_download" ) @@ -31,9 +29,7 @@ def mock_joblib_load(mocker): """ Fixture to mock joblib.load function. """ - return mocker.patch( - "fluentai.services.mnemonic.imageability.imageability.joblib.load" - ) + return mocker.patch("fluentai.services.mnemonic.imageability.predictor.joblib.load") @pytest.fixture @@ -41,9 +37,7 @@ def mock_pd_read_csv(mocker): """ Fixture to mock pandas.read_csv function. """ - return mocker.patch( - "fluentai.services.mnemonic.imageability.imageability.pd.read_csv" - ) + return mocker.patch("fluentai.services.mnemonic.imageability.predictor.pd.read_csv") @pytest.fixture @@ -52,7 +46,7 @@ def mock_ImageabilityEmbeddings(mocker): Fixture to mock ImageabilityEmbeddings class. """ mock_class = mocker.patch( - "fluentai.services.mnemonic.imageability.imageability.ImageabilityEmbeddings" + "fluentai.services.mnemonic.imageability.predictor.ImageabilityEmbeddings" ) mock_instance = MagicMock() mock_instance.get_embedding.side_effect = lambda word: np.array([1.0, 2.0, 3.0]) @@ -72,7 +66,7 @@ def test_make_predictions( """ # Mock the configuration mock_config = mocker.patch( - "fluentai.services.mnemonic.imageability.imageability.config" + "fluentai.services.mnemonic.imageability.predictor.config" ) mock_config.get.side_effect = lambda key: { "IMAGEABILITY": { @@ -109,7 +103,7 @@ def test_make_predictions( # Mock tqdm to just return the iterator mocker.patch( - "fluentai.services.mnemonic.imageability.imageability.tqdm", + "fluentai.services.mnemonic.imageability.predictor.tqdm", side_effect=lambda x, total=None: x, ) @@ -142,7 +136,7 @@ def test_ImageabilityPredictor_get_prediction( """ # Mock the configuration mock_config = mocker.patch( - "fluentai.services.mnemonic.imageability.imageability.config" + "fluentai.services.mnemonic.imageability.predictor.config" ) mock_config.get.side_effect = lambda key: { "IMAGEABILITY": { @@ -185,7 +179,7 @@ def test_ImageabilityPredictor_get_predictions( """ # Mock the configuration mock_config = mocker.patch( - "fluentai.services.mnemonic.imageability.imageability.config" + "fluentai.services.mnemonic.imageability.predictor.config" ) mock_config.get.side_effect = lambda key: { "IMAGEABILITY": { @@ -225,7 +219,7 @@ def test_ImageabilityPredictor_get_column_imageability( """ # Mock the configuration mock_config = mocker.patch( - "fluentai.services.mnemonic.imageability.imageability.config" + "fluentai.services.mnemonic.imageability.predictor.config" ) mock_config.get.side_effect = lambda key: { "IMAGEABILITY": { diff --git a/backend/tests/test_services/test_orthographic.py b/backend/tests/test_services/test_orthographic.py index 4b18ac0..ae0885d 100644 --- a/backend/tests/test_services/test_orthographic.py +++ b/backend/tests/test_services/test_orthographic.py @@ -1,12 +1,10 @@ -# test_similarity.py - import os import pytest os.environ["FLUENTAI_CONFIG_PATH"] = "config.yaml" # noqa -from fluentai.services.mnemonic.orthographic.orthographic import ( +from fluentai.services.mnemonic.orthographic.compute import ( compute_damerau_levenshtein_similarity, ) diff --git a/backend/tests/test_services/test_phonetic.py b/backend/tests/test_services/test_phonetic.py index 2bf3e58..85b15a7 100644 --- a/backend/tests/test_services/test_phonetic.py +++ b/backend/tests/test_services/test_phonetic.py @@ -1,5 +1,3 @@ -# tests/card_gen/test_phonetic.py - import os from unittest.mock import MagicMock, patch @@ -10,7 +8,7 @@ os.environ["FLUENTAI_CONFIG_PATH"] = "config.yaml" # noqa # Import the top_phonetic function -from fluentai.services.mnemonic.phonetic.phonetic import top_phonetic +from fluentai.services.mnemonic.phonetic.compute import top_phonetic @pytest.fixture @@ -18,7 +16,7 @@ def mock_config(mocker): """ Fixture to mock the config.get method. """ - return mocker.patch("fluentai.services.mnemonic.phonetic.phonetic.config") + return mocker.patch("fluentai.services.mnemonic.phonetic.compute.config") @pytest.fixture @@ -26,7 +24,7 @@ def mock_word2ipa(mocker): """ Fixture to mock the word2ipa function. """ - return mocker.patch("fluentai.services.mnemonic.phonetic.phonetic.word2ipa") + return mocker.patch("fluentai.services.mnemonic.phonetic.compute.word2ipa") @pytest.fixture @@ -34,7 +32,7 @@ def mock_load_cache(mocker): """ Fixture to mock the load_cache function. """ - return mocker.patch("fluentai.services.mnemonic.phonetic.phonetic.load_cache") + return mocker.patch("fluentai.services.mnemonic.phonetic.compute.load_cache") @pytest.fixture @@ -42,7 +40,7 @@ def mock_pad_vectors(mocker): """ Fixture to mock the pad_vectors function. """ - return mocker.patch("fluentai.services.mnemonic.phonetic.phonetic.pad_vectors") + return mocker.patch("fluentai.services.mnemonic.phonetic.compute.pad_vectors") @pytest.fixture @@ -50,9 +48,7 @@ def mock_convert_to_matrix(mocker): """ Fixture to mock the convert_to_matrix function. """ - return mocker.patch( - "fluentai.services.mnemonic.phonetic.phonetic.convert_to_matrix" - ) + return mocker.patch("fluentai.services.mnemonic.phonetic.compute.convert_to_matrix") @pytest.fixture @@ -61,7 +57,7 @@ def mock_faiss_normalize_L2(mocker): Fixture to mock the faiss.normalize_L2 function. """ return mocker.patch( - "fluentai.services.mnemonic.phonetic.phonetic.faiss.normalize_L2" + "fluentai.services.mnemonic.phonetic.compute.faiss.normalize_L2" ) @@ -74,7 +70,7 @@ def mock_faiss_IndexFlatIP(mocker): """ instance_mock = MagicMock() constructor_mock = mocker.patch( - "fluentai.services.mnemonic.phonetic.phonetic.faiss.IndexFlatIP", + "fluentai.services.mnemonic.phonetic.compute.faiss.IndexFlatIP", return_value=instance_mock, ) return constructor_mock, instance_mock @@ -144,7 +140,7 @@ def test_top_phonetic_success( # Create a mock vectorizer function (panphon_vec or soundvec) with patch( - "fluentai.services.mnemonic.phonetic.phonetic.panphon_vec", + "fluentai.services.mnemonic.phonetic.compute.panphon_vec", return_value=[[0.1, 0.2, 0.3]], ): # Initialize a mock g2p_model with a g2p method @@ -263,7 +259,7 @@ def test_top_phonetic_no_results( # Create a mock vectorizer function (panphon_vec or soundvec) with patch( - "fluentai.services.mnemonic.phonetic.phonetic.panphon_vec", + "fluentai.services.mnemonic.phonetic.compute.panphon_vec", return_value=[[]], ): # Initialize a mock g2p_model with a g2p method @@ -384,7 +380,7 @@ def test_top_phonetic_invalid_language_code( # Create a mock vectorizer function (soundvec) with patch( - "fluentai.services.mnemonic.phonetic.phonetic.soundvec", + "fluentai.services.mnemonic.phonetic.compute.soundvec", return_value=[[0.2, 0.3, 0.4]], ): # Initialize a mock g2p_model with a g2p method diff --git a/backend/tests/test_services/test_semantic.py b/backend/tests/test_services/test_semantic.py index b18c1b5..959493a 100644 --- a/backend/tests/test_services/test_semantic.py +++ b/backend/tests/test_services/test_semantic.py @@ -8,7 +8,7 @@ os.environ["FLUENTAI_CONFIG_PATH"] = "config.yaml" # noqa from fluentai.constants.config import config -from fluentai.services.mnemonic.semantic.semantic import SemanticSimilarity +from fluentai.services.mnemonic.semantic.compute import SemanticSimilarity model_name = config.get("SEMANTIC_SIM").get("MODEL").lower() @@ -130,7 +130,7 @@ def test_example_function(mocker, mock_config, mock_sentence_transformer, caplog """ Test the example function to ensure it logs similarities correctly. """ - from fluentai.services.mnemonic.semantic.semantic import example + from backend.fluentai.services.mnemonic.semantic.compute import example # Setup mock config to return models mock_config.get.return_value = { diff --git a/config.yaml b/config.yaml index 6874b7a..880ea80 100644 --- a/config.yaml +++ b/config.yaml @@ -17,6 +17,7 @@ WORD_LIMIT: 1000 G2P: MODEL: "charsiu/g2p_multilingual_byT5_small_100" TOKENIZER: "google/byt5-small" + LANGUAGE_JSON: "data/languages.json" LLM: MODEL: "microsoft/Phi-3-mini-4k-instruct" From d142020fbb19e2518d96393b169e0bb6f40473f9 Mon Sep 17 00:00:00 2001 From: StephanAkkerman Date: Sun, 5 Jan 2025 13:30:30 +0100 Subject: [PATCH 09/15] Fix pytest --- backend/tests/test_services/test_imageability.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/tests/test_services/test_imageability.py b/backend/tests/test_services/test_imageability.py index 1b0e793..4b18a01 100644 --- a/backend/tests/test_services/test_imageability.py +++ b/backend/tests/test_services/test_imageability.py @@ -8,7 +8,7 @@ os.environ["FLUENTAI_CONFIG_PATH"] = "config.yaml" # noqa # Import the functions and classes to be tested -from backend.fluentai.services.mnemonic.imageability.predictor import ( +from fluentai.services.mnemonic.imageability.predictor import ( ImageabilityPredictor, make_predictions, ) From 235b2a5a1e0309ed195f580e5da34b9d462123a0 Mon Sep 17 00:00:00 2001 From: StephanAkkerman Date: Sun, 5 Jan 2025 13:35:11 +0100 Subject: [PATCH 10/15] update word2mnemonic --- backend/fluentai/services/mnemonic/word2mnemonic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/fluentai/services/mnemonic/word2mnemonic.py b/backend/fluentai/services/mnemonic/word2mnemonic.py index 8ea74f0..e04bc1b 100644 --- a/backend/fluentai/services/mnemonic/word2mnemonic.py +++ b/backend/fluentai/services/mnemonic/word2mnemonic.py @@ -3,7 +3,7 @@ from fluentai.constants.config import config, weights_percentages from fluentai.constants.languages import G2P_LANGUAGES from fluentai.logger import logger -from fluentai.services.card_gen.mnemonic.imageability.imageability import ( +from fluentai.services.mnemonic.imageability.predictor import ( ImageabilityPredictor, ) from fluentai.services.mnemonic.orthographic.compute import ( From 6a4df9ed81e41c2e5cf8ab67d0343e3d0d4b8d96 Mon Sep 17 00:00:00 2001 From: StephanAkkerman Date: Sun, 5 Jan 2025 13:58:56 +0100 Subject: [PATCH 11/15] fix tests --- backend/tests/test_services/test_phonetic.py | 40 ++++++++++---------- backend/tests/test_services/test_semantic.py | 14 +++---- 2 files changed, 27 insertions(+), 27 deletions(-) diff --git a/backend/tests/test_services/test_phonetic.py b/backend/tests/test_services/test_phonetic.py index 85b15a7..3f212fe 100644 --- a/backend/tests/test_services/test_phonetic.py +++ b/backend/tests/test_services/test_phonetic.py @@ -28,11 +28,13 @@ def mock_word2ipa(mocker): @pytest.fixture -def mock_load_cache(mocker): +def mock_load_from_cache(mocker): """ - Fixture to mock the load_cache function. + Fixture to mock the load_from_cache function. """ - return mocker.patch("fluentai.services.mnemonic.phonetic.compute.load_cache") + return mocker.patch( + "fluentai.services.mnemonic.phonetic.utils.cache.load_from_cache" + ) @pytest.fixture @@ -79,7 +81,7 @@ def mock_faiss_IndexFlatIP(mocker): def test_top_phonetic_success( mock_config, mock_word2ipa, - mock_load_cache, + mock_load_from_cache, mock_pad_vectors, mock_convert_to_matrix, mock_faiss_normalize_L2, @@ -114,8 +116,8 @@ def test_top_phonetic_success( } ) - # Mock the load_cache function to return the mock dataset - mock_load_cache.return_value = mock_dataset + # Mock the load_from_cache function to return the mock dataset + mock_load_from_cache.return_value = mock_dataset # Mock pad_vectors to return padded vectors (assuming dimension=3 for simplicity) mock_pad_vectors.return_value = [ @@ -159,8 +161,8 @@ def test_top_phonetic_success( # Ensure word2ipa was called correctly mock_word2ipa.assert_called_once_with("kucing", "eng-us", mock_g2p_model) - # Ensure load_cache was called with the correct method - mock_load_cache.assert_called_once_with("panphon") + # Ensure load_from_cache was called with the correct method + mock_load_from_cache.assert_called_once_with("panphon") # Ensure pad_vectors was called with the correct data mock_pad_vectors.assert_called_once_with( @@ -210,7 +212,7 @@ def test_top_phonetic_success( def test_top_phonetic_no_results( mock_config, mock_word2ipa, - mock_load_cache, + mock_load_from_cache, mock_pad_vectors, mock_convert_to_matrix, mock_faiss_normalize_L2, @@ -237,8 +239,8 @@ def test_top_phonetic_no_results( {"token_ort": [], "token_ipa": [], "flattened_vectors": []} ) - # Mock the load_cache function to return the empty dataset - mock_load_cache.return_value = mock_dataset + # Mock the load_from_cache function to return the empty dataset + mock_load_from_cache.return_value = mock_dataset # Mock pad_vectors to return empty list mock_pad_vectors.return_value = [] @@ -275,8 +277,8 @@ def test_top_phonetic_no_results( # Ensure word2ipa was called correctly mock_word2ipa.assert_called_once_with("test", "eng-us", mock_g2p_model) - # Ensure load_cache was called with the correct method - mock_load_cache.assert_called_once_with("panphon") + # Ensure load_from_cache was called with the correct method + mock_load_from_cache.assert_called_once_with("panphon") # Ensure pad_vectors was called with the correct data mock_pad_vectors.assert_called_once_with( @@ -318,7 +320,7 @@ def test_top_phonetic_no_results( def test_top_phonetic_invalid_language_code( mock_config, mock_word2ipa, - mock_load_cache, + mock_load_from_cache, mock_pad_vectors, mock_convert_to_matrix, mock_faiss_normalize_L2, @@ -334,7 +336,7 @@ def test_top_phonetic_invalid_language_code( # Setup mock config.get to return necessary configuration mock_config.get.return_value = { # Testing with a different vectorizer - "EMBEDDINGS": {"METHOD": "soundvec"}, + "EMBEDDINGS": {"METHOD": "clts"}, "PHONETIC_SIM": {"IPA_REPO": "mock_repo", "IPA_FILE": "mock_file.tsv"}, } @@ -354,8 +356,8 @@ def test_top_phonetic_invalid_language_code( } ) - # Mock the load_cache function to return the mock dataset - mock_load_cache.return_value = mock_dataset + # Mock the load_from_cache function to return the mock dataset + mock_load_from_cache.return_value = mock_dataset # Mock pad_vectors to return padded vectors (assuming dimension=3 for simplicity) mock_pad_vectors.return_value = [ @@ -399,8 +401,8 @@ def test_top_phonetic_invalid_language_code( # Ensure word2ipa was called correctly mock_word2ipa.assert_called_once_with("nyangang", "mal", mock_g2p_model) - # Ensure load_cache was called with the correct method - mock_load_cache.assert_called_once_with("soundvec") + # Ensure load_from_cache was called with the correct method + mock_load_from_cache.assert_called_once_with("soundvec") # Ensure pad_vectors was called with the correct data mock_pad_vectors.assert_called_once_with( diff --git a/backend/tests/test_services/test_semantic.py b/backend/tests/test_services/test_semantic.py index 959493a..44bf2dd 100644 --- a/backend/tests/test_services/test_semantic.py +++ b/backend/tests/test_services/test_semantic.py @@ -1,5 +1,3 @@ -# tests/card_gen/test_semantic.py - import os from unittest.mock import MagicMock, patch @@ -18,7 +16,7 @@ def mock_config(mocker): """ Fixture to mock the config.get method. """ - return mocker.patch("fluentai.services.mnemonic.semantic.semantic.config") + return mocker.patch("fluentai.services.mnemonic.semantic.compute.config") @pytest.fixture @@ -48,7 +46,7 @@ def test_compute_similarity_transformer(mock_config, mock_sentence_transformer): # Patch 'SentenceTransformer' to return the mock transformer model with patch( - "fluentai.services.mnemonic.semantic.semantic.SentenceTransformer", + "fluentai.services.mnemonic.semantic.compute.SentenceTransformer", return_value=mock_sentence_transformer, ): # Initialize SemanticSimilarity @@ -87,7 +85,7 @@ def test_compute_similarity_word_not_in_transformer( # Patch 'SentenceTransformer' to return the mock transformer model with patch( - "fluentai.services.mnemonic.semantic.semantic.SentenceTransformer", + "fluentai.services.mnemonic.semantic.compute.SentenceTransformer", return_value=mock_sentence_transformer, ): # Initialize SemanticSimilarity @@ -112,7 +110,7 @@ def test_load_semantic_model_transformer(mock_config, mock_sentence_transformer) # Patch 'SentenceTransformer' to return the mock transformer model with patch( - "fluentai.services.mnemonic.semantic.semantic.SentenceTransformer", + "fluentai.services.mnemonic.semantic.compute.SentenceTransformer", return_value=mock_sentence_transformer, ): # Initialize SemanticSimilarity @@ -130,7 +128,7 @@ def test_example_function(mocker, mock_config, mock_sentence_transformer, caplog """ Test the example function to ensure it logs similarities correctly. """ - from backend.fluentai.services.mnemonic.semantic.compute import example + from fluentai.services.mnemonic.semantic.compute import example # Setup mock config to return models mock_config.get.return_value = { @@ -140,7 +138,7 @@ def test_example_function(mocker, mock_config, mock_sentence_transformer, caplog # Patch 'SentenceTransformer' to return the mock transformer model with patch( - "fluentai.services.mnemonic.semantic.semantic.SentenceTransformer", + "fluentai.services.mnemonic.semantic.compute.SentenceTransformer", return_value=mock_sentence_transformer, ): # Configure the mock models From 87b5c30166a6aeb6379949c01a6aceff61e34bf5 Mon Sep 17 00:00:00 2001 From: StephanAkkerman Date: Sun, 5 Jan 2025 14:00:30 +0100 Subject: [PATCH 12/15] disable cache tests --- backend/tests/test_services/test_phonetic.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/backend/tests/test_services/test_phonetic.py b/backend/tests/test_services/test_phonetic.py index 3f212fe..992e758 100644 --- a/backend/tests/test_services/test_phonetic.py +++ b/backend/tests/test_services/test_phonetic.py @@ -162,7 +162,7 @@ def test_top_phonetic_success( mock_word2ipa.assert_called_once_with("kucing", "eng-us", mock_g2p_model) # Ensure load_from_cache was called with the correct method - mock_load_from_cache.assert_called_once_with("panphon") + # mock_load_from_cache.assert_called_once_with("panphon") # Ensure pad_vectors was called with the correct data mock_pad_vectors.assert_called_once_with( @@ -357,7 +357,7 @@ def test_top_phonetic_invalid_language_code( ) # Mock the load_from_cache function to return the mock dataset - mock_load_from_cache.return_value = mock_dataset + # mock_load_from_cache.return_value = mock_dataset # Mock pad_vectors to return padded vectors (assuming dimension=3 for simplicity) mock_pad_vectors.return_value = [ @@ -402,7 +402,7 @@ def test_top_phonetic_invalid_language_code( mock_word2ipa.assert_called_once_with("nyangang", "mal", mock_g2p_model) # Ensure load_from_cache was called with the correct method - mock_load_from_cache.assert_called_once_with("soundvec") + # mock_load_from_cache.assert_called_once_with("soundvec") # Ensure pad_vectors was called with the correct data mock_pad_vectors.assert_called_once_with( From 44cb8bad5943231964ecc874eb5008f74844041d Mon Sep 17 00:00:00 2001 From: StephanAkkerman Date: Sun, 5 Jan 2025 14:29:00 +0100 Subject: [PATCH 13/15] fix mocks --- .../fluentai/services/mnemonic/phonetic/utils/cache.py | 2 ++ backend/tests/test_services/test_phonetic.py | 10 ++++------ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/backend/fluentai/services/mnemonic/phonetic/utils/cache.py b/backend/fluentai/services/mnemonic/phonetic/utils/cache.py index 8283467..354569c 100644 --- a/backend/fluentai/services/mnemonic/phonetic/utils/cache.py +++ b/backend/fluentai/services/mnemonic/phonetic/utils/cache.py @@ -17,6 +17,8 @@ def load_from_cache(method: str = "panphon"): ------- - DataFrame containing the cached dataset """ + logger.debug("Loading the cached dataset from Huggingface") + repo = config.get("PHONETIC_SIM").get("EMBEDDINGS").get("REPO") # Remove the file extension to get the dataset name dataset = config.get("PHONETIC_SIM").get("IPA").get("FILE").split(".")[0] diff --git a/backend/tests/test_services/test_phonetic.py b/backend/tests/test_services/test_phonetic.py index 992e758..64e293f 100644 --- a/backend/tests/test_services/test_phonetic.py +++ b/backend/tests/test_services/test_phonetic.py @@ -32,9 +32,7 @@ def mock_load_from_cache(mocker): """ Fixture to mock the load_from_cache function. """ - return mocker.patch( - "fluentai.services.mnemonic.phonetic.utils.cache.load_from_cache" - ) + return mocker.patch("fluentai.services.mnemonic.phonetic.compute.load_from_cache") @pytest.fixture @@ -162,7 +160,7 @@ def test_top_phonetic_success( mock_word2ipa.assert_called_once_with("kucing", "eng-us", mock_g2p_model) # Ensure load_from_cache was called with the correct method - # mock_load_from_cache.assert_called_once_with("panphon") + mock_load_from_cache.assert_called_once_with("panphon") # Ensure pad_vectors was called with the correct data mock_pad_vectors.assert_called_once_with( @@ -357,7 +355,7 @@ def test_top_phonetic_invalid_language_code( ) # Mock the load_from_cache function to return the mock dataset - # mock_load_from_cache.return_value = mock_dataset + mock_load_from_cache.return_value = mock_dataset # Mock pad_vectors to return padded vectors (assuming dimension=3 for simplicity) mock_pad_vectors.return_value = [ @@ -402,7 +400,7 @@ def test_top_phonetic_invalid_language_code( mock_word2ipa.assert_called_once_with("nyangang", "mal", mock_g2p_model) # Ensure load_from_cache was called with the correct method - # mock_load_from_cache.assert_called_once_with("soundvec") + mock_load_from_cache.assert_called_once_with("clts") # Ensure pad_vectors was called with the correct data mock_pad_vectors.assert_called_once_with( From f39e3ce01005330250cdc2e884b7626db0d23ae3 Mon Sep 17 00:00:00 2001 From: Stephan Akkerman Date: Sun, 5 Jan 2025 15:38:22 +0100 Subject: [PATCH 14/15] fix imageability_models --- .../mnemonic/imageability/imageability_models/ensemble.py | 2 +- .../mnemonic/imageability/imageability_models/models.py | 6 +++--- .../imageability/imageability_models/optimization.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/backend/fluentai/services/mnemonic/imageability/imageability_models/ensemble.py b/backend/fluentai/services/mnemonic/imageability/imageability_models/ensemble.py index 26f2d41..26f5739 100644 --- a/backend/fluentai/services/mnemonic/imageability/imageability_models/ensemble.py +++ b/backend/fluentai/services/mnemonic/imageability/imageability_models/ensemble.py @@ -11,7 +11,7 @@ from fluentai.constants.config import config from fluentai.logger import logger -from fluentai.services.mnemonic.imageability.imag_models.data import ( +from fluentai.services.mnemonic.imageability.imageability_models.data import ( append_to_log, ) diff --git a/backend/fluentai/services/mnemonic/imageability/imageability_models/models.py b/backend/fluentai/services/mnemonic/imageability/imageability_models/models.py index 701e3e3..0ef61ac 100644 --- a/backend/fluentai/services/mnemonic/imageability/imageability_models/models.py +++ b/backend/fluentai/services/mnemonic/imageability/imageability_models/models.py @@ -20,7 +20,7 @@ from fluentai.constants.config import config from fluentai.logger import logger -from fluentai.services.mnemonic.imageability.imag_models.data import ( +from fluentai.services.mnemonic.imageability.imageability_models.data import ( append_to_log, ensure_logs_directory, load_data, @@ -29,10 +29,10 @@ split_dataset, upload_model, ) -from fluentai.services.mnemonic.imageability.imag_models.ensemble import ( +from fluentai.services.mnemonic.imageability.imageability_models.ensemble import ( implement_ensemble_methods, ) -from fluentai.services.mnemonic.imageability.imag_models.optimization import ( +from fluentai.services.mnemonic.imageability.imageability_models.optimization import ( objective, ) diff --git a/backend/fluentai/services/mnemonic/imageability/imageability_models/optimization.py b/backend/fluentai/services/mnemonic/imageability/imageability_models/optimization.py index 7c07037..e977e20 100644 --- a/backend/fluentai/services/mnemonic/imageability/imageability_models/optimization.py +++ b/backend/fluentai/services/mnemonic/imageability/imageability_models/optimization.py @@ -16,7 +16,7 @@ from xgboost import XGBRegressor from fluentai.logger import logger -from fluentai.services.mnemonic.imageability.imag_models.data import ( +from fluentai.services.mnemonic.imageability.imageability_models.data import ( append_hyperparameters_log, ) From f8be519b21947d03b96ab4512b1b1d48e2b7ea5f Mon Sep 17 00:00:00 2001 From: Stephan Akkerman Date: Sun, 5 Jan 2025 19:13:40 +0100 Subject: [PATCH 15/15] Fix async calls --- backend/fluentai/api/routes/create_card.py | 10 +- backend/fluentai/run.py | 154 ++++++++++-------- .../services/mnemonic/word2mnemonic.py | 12 +- 3 files changed, 94 insertions(+), 82 deletions(-) diff --git a/backend/fluentai/api/routes/create_card.py b/backend/fluentai/api/routes/create_card.py index b584ebd..7eef712 100644 --- a/backend/fluentai/api/routes/create_card.py +++ b/backend/fluentai/api/routes/create_card.py @@ -8,7 +8,7 @@ from fluentai.constants.config import config from fluentai.constants.languages import G2P_LANGCODES, G2P_LANGUAGES from fluentai.logger import logger -from fluentai.run import generate_mnemonic_img +from fluentai.run import MnemonicPipeline create_card_router = APIRouter() @@ -110,9 +110,13 @@ async def get_image( if language_code not in G2P_LANGUAGES: raise HTTPException(status_code=400, detail="Invalid language code") + mnemonic_pipe = MnemonicPipeline() + try: - image_path, verbal_cue, translation, tts_path, ipa = generate_mnemonic_img( - word, language_code, llm_model, image_model, keyword, key_sentence + image_path, verbal_cue, translation, tts_path, ipa = ( + await mnemonic_pipe.generate_mnemonic_img( + word, language_code, llm_model, image_model, keyword, key_sentence + ) ) if not os.path.exists(image_path): diff --git a/backend/fluentai/run.py b/backend/fluentai/run.py index 4edbecc..ed7e31c 100644 --- a/backend/fluentai/run.py +++ b/backend/fluentai/run.py @@ -1,3 +1,5 @@ +import asyncio + import torch from fluentai.logger import logger @@ -7,84 +9,92 @@ from fluentai.services.tts.tts import TTS -def generate_mnemonic_img( - word: str, - lang_code: str, - llm_model: str = None, - image_model: str = None, - keyword: str = None, - key_sentence: str = None, -) -> tuple: - """ - Generate an image for a given word using the mnemonic pipeline. - - Parameters - ---------- - word : str - The word to generate an image for in the language of lang_code. - lang_code : str - The language code for the word. - llm_model : str, optional - The name of the LLM model to use for verbal cue generation. - image_model : str, optional - The name of the image model to use for image generation. - - Returns - ------- - str - The path to the generated image. - str - The verbal cue for the image. - str - The translated word. - str - The path to the generated audio file. - str - The IPA spelling of the best match. - """ - # Check if cuda is available - logger.info(f"cuda available: {torch.cuda.is_available()}") - logger.info(f"cuda device count: {torch.cuda.device_count()}") - - best_matches, translated_word, _, ipa = Word2Mnemonic().generate_mnemonic( - word, lang_code, keyword, key_sentence - ) - - if not key_sentence: - if not keyword: - # Get the top phonetic match - best_match = best_matches.iloc[0] - keyword = best_match["token_ort"] - - # Use the provided llm_model if available, otherwise default to the one in config - if llm_model: - vc = VerbalCue(model_name=llm_model) - else: - vc = VerbalCue() +class MnemonicPipeline: + def __init__(self): + self.w2m = Word2Mnemonic() + + # Check if cuda is available + logger.info(f"cuda available: {torch.cuda.is_available()}") + logger.info(f"cuda device count: {torch.cuda.device_count()}") + + async def generate_mnemonic_img( + self, + word: str, + lang_code: str, + llm_model: str = None, + image_model: str = None, + keyword: str = None, + key_sentence: str = None, + ) -> tuple: + """ + Generate an image for a given word using the mnemonic pipeline. + + Parameters + ---------- + word : str + The word to generate an image for in the language of lang_code. + lang_code : str + The language code for the word. + llm_model : str, optional + The name of the LLM model to use for verbal cue generation. + image_model : str, optional + The name of the image model to use for image generation. - # Generate a verbal cue - logger.debug( - "Generating verbal cue for '%s'-'%s'...", - keyword, - translated_word, + Returns + ------- + str + The path to the generated image. + str + The verbal cue for the image. + str + The translated word. + str + The path to the generated audio file. + str + The IPA spelling of the best match. + """ + best_matches, translated_word, _, ipa = await self.w2m.generate_mnemonic( + word, lang_code, keyword, key_sentence ) - key_sentence = vc.generate_cue(translated_word, keyword) - # Use the provided image_model if available, otherwise default to the one in config - if image_model: - img_gen = ImageGen(model=image_model) - else: - img_gen = ImageGen() + if not key_sentence: + if not keyword: + # Get the top phonetic match + best_match = best_matches.iloc[0] + keyword = best_match["token_ort"] - # Generate the image - image_path = img_gen.generate_img(prompt=key_sentence, word1=word, word2=keyword) + # Use the provided llm_model if available, otherwise default to the one in config + if llm_model: + vc = VerbalCue(model_name=llm_model) + else: + vc = VerbalCue() + + # Generate a verbal cue + logger.debug( + "Generating verbal cue for '%s'-'%s'...", + keyword, + translated_word, + ) + key_sentence = vc.generate_cue(translated_word, keyword) + + # Use the provided image_model if available, otherwise default to the one in config + if image_model: + img_gen = ImageGen(model=image_model) + else: + img_gen = ImageGen() + + # Generate the image + image_path = img_gen.generate_img( + prompt=key_sentence, word1=word, word2=keyword + ) - # Generate TTS - tts_model = TTS() - tts_path = tts_model.tts(word, lang=lang_code) + # Generate TTS + tts_model = TTS() + tts_path = tts_model.tts(word, lang=lang_code) - return image_path, key_sentence, translated_word, tts_path, ipa + return image_path, key_sentence, translated_word, tts_path, ipa if __name__ == "__main__": - generate_mnemonic_img("kat", "dut") + pipeline = MnemonicPipeline() + asyncio.run(pipeline.generate_mnemonic_img("kat", "dut")) diff --git a/backend/fluentai/services/mnemonic/word2mnemonic.py b/backend/fluentai/services/mnemonic/word2mnemonic.py index e04bc1b..ab7bc3c 100644 --- a/backend/fluentai/services/mnemonic/word2mnemonic.py +++ b/backend/fluentai/services/mnemonic/word2mnemonic.py @@ -23,7 +23,7 @@ def __init__(self): self.imageability_predictor = ImageabilityPredictor() self.semantic_sim = SemanticSimilarity() - def generate_mnemonic( + async def generate_mnemonic( self, word: str, language_code: str, @@ -57,9 +57,7 @@ def generate_mnemonic( logger.error(f"Invalid language code: {language_code}") return - translated_word, transliterated_word = asyncio.run( - translate_word(word, language_code) - ) + translated_word, transliterated_word = await translate_word(word, language_code) if keyword or key_sentence: # If keyword is provided, use it directly for scoring @@ -119,6 +117,6 @@ def generate_mnemonic( if __name__ == "__main__": w2m = Word2Mnemonic() - print(w2m.generate_mnemonic("kat", "dut")) - print(w2m.generate_mnemonic("house", "eng", keyword="হাউজ")) - print(w2m.generate_mnemonic("猫", "zho-s")) + print(asyncio.run(w2m.generate_mnemonic("kat", "dut"))) + print(asyncio.run(w2m.generate_mnemonic("house", "eng", keyword="হাউজ"))) + print(asyncio.run(w2m.generate_mnemonic("猫", "zho-s")))