diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md
index 6be1030..3914bac 100644
--- a/.github/ISSUE_TEMPLATE/feature_request.md
+++ b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -6,22 +6,22 @@ labels: ''
assignees: ''
---
+
1. Description:
- - Problem:
-
+ - Problem:
- - Solution:
-
- - Prerequisites:
-
+ - Solution:
-2. Tasks:
+
+ - Prerequisites:
+
+
+2. Tasks:
- [ ] Task 1
- [ ] Task 2
- [ ] Task 3
-3. Additional context
-
\ No newline at end of file
+3. Additional context
diff --git a/.github/workflows/pytests.yaml b/.github/workflows/pytests.yaml
index 8d0dcd0..281a2a4 100644
--- a/.github/workflows/pytests.yaml
+++ b/.github/workflows/pytests.yaml
@@ -7,8 +7,6 @@ on:
- 'requirements.txt' # Dependency file
- 'setup.py' # Setup script
- 'pyproject.toml' # Modern Python project configuration
- # Add any other relevant paths as needed
-
permissions:
contents: read
@@ -26,6 +24,7 @@ jobs:
- name: Install dependencies
run: |
+ cd backend
python -m pip install --upgrade pip
pip install pytest pytest-mock
pip install -e .
diff --git a/README.md b/README.md
index 0483adc..063973e 100644
--- a/README.md
+++ b/README.md
@@ -53,7 +53,7 @@ Before starting, make sure you have the following requirements:
We have bundled all required dependencies into a package for easy installation. To get started, simply run one of the following commands:
```bash
-pip install .
+pip install backend/.
```
or install directly from the repository:
@@ -81,7 +81,7 @@ If you prefer to build from source, follow these steps:
3. Install the dependencies:
```bash
- pip install -r requirements.txt
+ pip install -r backend/requirements.txt
```
### Install with GPU Support (Recommended)
@@ -90,7 +90,7 @@ If you would like to run the code on a GPU, you can install the `torch` package
After installing the required dependencies, run the following command:
```bash
-pip install -r requirements/gpu.txt
+pip install -r backend/gpu-requirements.txt
```
## Usage ⌨️
diff --git a/fluentai/__init__.py b/backend/fluentai/__init__.py
similarity index 100%
rename from fluentai/__init__.py
rename to backend/fluentai/__init__.py
diff --git a/backend/fluentai/api/app.py b/backend/fluentai/api/app.py
new file mode 100644
index 0000000..a6dc35f
--- /dev/null
+++ b/backend/fluentai/api/app.py
@@ -0,0 +1,47 @@
+import argparse
+
+import uvicorn
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+
+from fluentai.api.routes.anki import anki_router
+from fluentai.api.routes.create_card import create_card_router
+from fluentai.utils.load_models import download_all_models
+
+# Initialize FastAPI app
+app = FastAPI()
+
+# Configure CORS middleware
+app.add_middleware(
+ CORSMiddleware,
+ allow_origins=[
+ "http://localhost:3000",
+ "https://akkerman.ai",
+ ],
+ allow_credentials=True,
+ allow_methods=["*"],
+ allow_headers=["*"],
+)
+
+app.include_router(anki_router)
+app.include_router(create_card_router)
+
+
+def main():
+ """Start the FastAPI application."""
+ # Start by downloading all models
+ download_all_models()
+
+ parser = argparse.ArgumentParser(description="")
+ parser.add_argument(
+ "--host", type=str, default="127.0.0.1", help="Hosting default: 127.0.0.1"
+ )
+ parser.add_argument("--port", type=int, default=8000)
+
+ args = parser.parse_args()
+
+ uvicorn.run("app:app", host=args.host, port=args.port)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/backend/fluentai/api/routes/anki.py b/backend/fluentai/api/routes/anki.py
new file mode 100644
index 0000000..71102c1
--- /dev/null
+++ b/backend/fluentai/api/routes/anki.py
@@ -0,0 +1,51 @@
+import httpx
+from fastapi import APIRouter, Request
+from fastapi.responses import JSONResponse
+
+anki_router = APIRouter()
+
+
+@anki_router.post("/api/anki")
+async def anki_proxy(request: Request):
+ """
+ Proxy API endpoint for forwarding requests to the Anki server.
+
+ This function receives a JSON request from the client, forwards it to the Anki
+ server running on localhost, and returns the response back to the client.
+
+ HACK: This uses the backend as a proxy for when the frontend is deployed in GH Pages
+
+ Parameters
+ ----------
+ request : Request
+ The incoming HTTP request object containing the JSON payload to be forwarded.
+
+ Returns
+ -------
+ JSONResponse
+ A JSON response containing the Anki server response or an error message if
+ the request fails.
+ """
+ try:
+ # Forward the incoming request body to the Anki server
+ request_body = await request.json()
+
+ async with httpx.AsyncClient() as client:
+ response = await client.post(
+ "http://127.0.0.1:8765", # Assuming Anki is running on localhost with default port
+ json=request_body,
+ )
+
+ # Return the JSON response from Anki server
+ return JSONResponse(content=response.json(), status_code=response.status_code)
+
+ except httpx.RequestError as e:
+ return JSONResponse(
+ content={"error": "Failed to connect to Anki server.", "details": str(e)},
+ status_code=500,
+ )
+ except Exception as e:
+ return JSONResponse(
+ content={"error": "An unexpected error occurred.", "details": str(e)},
+ status_code=500,
+ )
diff --git a/fluentai/services/card_gen/api.py b/backend/fluentai/api/routes/create_card.py
similarity index 63%
rename from fluentai/services/card_gen/api.py
rename to backend/fluentai/api/routes/create_card.py
index 8e6eee0..7eef712 100644
--- a/fluentai/services/card_gen/api.py
+++ b/backend/fluentai/api/routes/create_card.py
@@ -1,33 +1,16 @@
-import argparse
import base64
import os
-import httpx
-import uvicorn
-from constants.languages import G2P_LANGCODES, G2P_LANGUAGES
-from fastapi import FastAPI, HTTPException, Query, Request
-from fastapi.middleware.cors import CORSMiddleware
+from fastapi import APIRouter, HTTPException, Query
from fastapi.responses import JSONResponse
from pydantic import BaseModel
-from fluentai.services.card_gen.constants.config import config
-from fluentai.services.card_gen.main import generate_mnemonic_img
-from fluentai.services.card_gen.utils.load_models import download_all_models
-from fluentai.services.card_gen.utils.logger import logger
+from fluentai.constants.config import config
+from fluentai.constants.languages import G2P_LANGCODES, G2P_LANGUAGES
+from fluentai.logger import logger
+from fluentai.run import MnemonicPipeline
-app = FastAPI()
-
-# Allow all origins for development (adjust in production)
-app.add_middleware(
- CORSMiddleware,
- allow_origins=[
- "http://localhost:3000",
- "https://akkerman.ai",
- ], # Replace "*" with your front-end URL in production
- allow_credentials=True,
- allow_methods=["*"],
- allow_headers=["*"],
-)
+create_card_router = APIRouter()
# Define Pydantic models for request and responses
@@ -41,7 +24,7 @@ class CreateCardResponse(BaseModel):
recording: str = None # Placeholder for future implementation
-@app.post("/create_card/word_data", response_model=CreateCardResponse)
+@create_card_router.post("/create_card/word_data", response_model=CreateCardResponse)
async def api_generate_mnemonic(request: CreateCardRequest) -> dict:
"""
Calls the main function to generate a mnemonic for a given word and language code.
@@ -85,7 +68,7 @@ async def api_generate_mnemonic(request: CreateCardRequest) -> dict:
raise HTTPException(status_code=500, detail="Internal Server Error")
-@app.get("/create_card/img")
+@create_card_router.get("/create_card/img")
async def get_image(
word: str = Query(...),
language_code: str = Query(...),
@@ -127,9 +110,13 @@ async def get_image(
if language_code not in G2P_LANGUAGES:
raise HTTPException(status_code=400, detail="Invalid language code")
+ mnemonic_pipe = MnemonicPipeline()
+
try:
- image_path, verbal_cue, translation, tts_path, ipa = generate_mnemonic_img(
- word, language_code, llm_model, image_model, keyword, key_sentence
+ image_path, verbal_cue, translation, tts_path, ipa = (
+ await mnemonic_pipe.generate_mnemonic_img(
+ word, language_code, llm_model, image_model, keyword, key_sentence
+ )
)
if not os.path.exists(image_path):
@@ -157,7 +144,7 @@ async def get_image(
raise HTTPException(status_code=500, detail=f"Internal Server Error: {e}")
-@app.get("/create_card/supported_languages")
+@create_card_router.get("/create_card/supported_languages")
async def get_supported_languages() -> JSONResponse:
"""
Returns a list of languages that the backend supports.
@@ -170,7 +157,7 @@ async def get_supported_languages() -> JSONResponse:
return JSONResponse(content={"languages": G2P_LANGCODES})
-@app.get("/create_card/image_models")
+@create_card_router.get("/create_card/image_models")
async def get_image_models() -> JSONResponse:
"""
Returns a list of available image generation models, with the recommended model at the top.
@@ -191,7 +178,7 @@ async def get_image_models() -> JSONResponse:
return JSONResponse(content={"models": available_models})
-@app.get("/create_card/llm_models")
+@create_card_router.get("/create_card/llm_models")
async def get_llm_models() -> JSONResponse:
"""
Returns a list of available LLM models, with the recommended model at the top.
@@ -211,65 +198,3 @@ async def get_llm_models() -> JSONResponse:
available_models = [recommended_model] + models["all"]
return JSONResponse(content={"models": available_models})
-
-
-# HACK: This uses the backend as a proxy for when the frontend is deployed in GH Pages
-
-
-@app.post("/api/anki")
-async def anki_proxy(request: Request):
- """
- Proxy API endpoint for forwarding requests to the Anki server.
-
- This function receives a JSON request from the client, forwards it to the Anki
- server running on localhost, and returns the response back to the client.
-
- Parameters
- ----------
- request : Request
- The incoming HTTP request object containing the JSON payload to be forwarded.
-
- Returns
- -------
- JSONResponse
- A JSON response containing the Anki server response or an error message if
- the request fails.
- """
- try:
- # Forward the incoming request body to the Anki server
- request_body = await request.json()
-
- async with httpx.AsyncClient() as client:
- response = await client.post(
- "http://127.0.0.1:8765", # Assuming Anki is running on localhost with default port
- json=request_body,
- )
-
- # Return the JSON response from Anki server
- return JSONResponse(content=response.json(), status_code=response.status_code)
-
- except httpx.RequestError as e:
- return JSONResponse(
- content={"error": "Failed to connect to Anki server.", "details": str(e)},
- status_code=500,
- )
- except Exception as e:
- return JSONResponse(
- content={"error": "An unexpected error occurred.", "details": str(e)},
- status_code=500,
- )
-
-
-if __name__ == "__main__":
- # Start by downloading all models
- download_all_models()
-
- parser = argparse.ArgumentParser(description="")
- parser.add_argument(
- "--host", type=str, default="127.0.0.1", help="Hosting default: 127.0.0.1"
- )
- parser.add_argument("--port", type=int, default=8000)
-
- args = parser.parse_args()
-
- uvicorn.run("api:app", host=args.host, port=args.port)
diff --git a/fluentai/anki/__init__.py b/backend/fluentai/constants/__init__.py
similarity index 100%
rename from fluentai/anki/__init__.py
rename to backend/fluentai/constants/__init__.py
diff --git a/fluentai/services/card_gen/constants/config.py b/backend/fluentai/constants/config.py
similarity index 100%
rename from fluentai/services/card_gen/constants/config.py
rename to backend/fluentai/constants/config.py
diff --git a/backend/fluentai/constants/languages.py b/backend/fluentai/constants/languages.py
new file mode 100644
index 0000000..d419500
--- /dev/null
+++ b/backend/fluentai/constants/languages.py
@@ -0,0 +1,7 @@
+import json
+
+from fluentai.constants.config import config
+
+with open(config.get("G2P").get("LANGUAGE_JSON")) as f:
+ G2P_LANGCODES = json.load(f)
+G2P_LANGUAGES: dict = dict(map(reversed, G2P_LANGCODES.items()))
diff --git a/fluentai/services/card_gen/utils/logger.py b/backend/fluentai/logger.py
similarity index 93%
rename from fluentai/services/card_gen/utils/logger.py
rename to backend/fluentai/logger.py
index 501b4b7..70a5ae4 100644
--- a/fluentai/services/card_gen/utils/logger.py
+++ b/backend/fluentai/logger.py
@@ -2,7 +2,7 @@
import os
import sys
-from fluentai.services.card_gen.constants.config import config
+from fluentai.constants.config import config
class UTF8StreamHandler(logging.StreamHandler):
diff --git a/backend/fluentai/run.py b/backend/fluentai/run.py
new file mode 100644
index 0000000..ed7e31c
--- /dev/null
+++ b/backend/fluentai/run.py
@@ -0,0 +1,100 @@
+import asyncio
+
+import torch
+
+from fluentai.logger import logger
+from fluentai.services.imagine.image_gen import ImageGen
+from fluentai.services.imagine.verbal_cue_gen import VerbalCue
+from fluentai.services.mnemonic.word2mnemonic import Word2Mnemonic
+from fluentai.services.tts.tts import TTS
+
+
+class MnemonicPipeline:
+ def __init__(self):
+ self.w2m = Word2Mnemonic()
+
+ # Check if cuda is available
+ logger.info(f"cuda available: {torch.cuda.is_available()}")
+ logger.info(f"cuda device count: {torch.cuda.device_count()}")
+
+ async def generate_mnemonic_img(
+ self,
+ word: str,
+ lang_code: str,
+ llm_model: str = None,
+ image_model: str = None,
+ keyword: str = None,
+ key_sentence: str = None,
+ ) -> tuple:
+ """
+ Generate an image for a given word using the mnemonic pipeline.
+
+ Parameters
+ ----------
+ word : str
+ The word to generate an image for in the language of lang_code.
+ lang_code : str
+ The language code for the word.
+ llm_model : str, optional
+ The name of the LLM model to use for verbal cue generation.
+ image_model : str, optional
+ The name of the image model to use for image generation.
+
+ Returns
+ -------
+ str
+ The path to the generated image.
+ str
+ The verbal cue for the image.
+ str
+ The translated word.
+ str
+ The path to the generated audio file.
+ str
+ The IPA spelling of the best match.
+ """
+ best_matches, translated_word, _, ipa = await self.w2m.generate_mnemonic(
+ word, lang_code, keyword, key_sentence
+ )
+
+ if not key_sentence:
+ if not keyword:
+ # Get the top phonetic match
+ best_match = best_matches.iloc[0]
+ keyword = best_match["token_ort"]
+
+ # Use the provided llm_model if available, otherwise default to the one in config
+ if llm_model:
+ vc = VerbalCue(model_name=llm_model)
+ else:
+ vc = VerbalCue()
+
+ # Generate a verbal cue
+ logger.debug(
+ "Generating verbal cue for '%s'-'%s'...",
+ keyword,
+ translated_word,
+ )
+ key_sentence = vc.generate_cue(translated_word, keyword)
+
+ # Use the provided image_model if available, otherwise default to the one in config
+ if image_model:
+ img_gen = ImageGen(model=image_model)
+ else:
+ img_gen = ImageGen()
+
+ # Generate the image
+ image_path = img_gen.generate_img(
+ prompt=key_sentence, word1=word, word2=keyword
+ )
+
+ # Generate TTS
+ tts_model = TTS()
+ tts_path = tts_model.tts(word, lang=lang_code)
+
+ return image_path, key_sentence, translated_word, tts_path, ipa
+
+
+if __name__ == "__main__":
+ pipeline = MnemonicPipeline()
+ asyncio.run(pipeline.generate_mnemonic_img("kat", "dut"))
diff --git a/fluentai/services/__init__.py b/backend/fluentai/services/__init__.py
similarity index 100%
rename from fluentai/services/__init__.py
rename to backend/fluentai/services/__init__.py
diff --git a/fluentai/services/card_gen/__init__.py b/backend/fluentai/services/imagine/__init__.py
similarity index 100%
rename from fluentai/services/card_gen/__init__.py
rename to backend/fluentai/services/imagine/__init__.py
diff --git a/fluentai/services/card_gen/imagine/image_gen.py b/backend/fluentai/services/imagine/image_gen.py
similarity index 70%
rename from fluentai/services/card_gen/imagine/image_gen.py
rename to backend/fluentai/services/imagine/image_gen.py
index 6cf216b..688b62f 100644
--- a/fluentai/services/card_gen/imagine/image_gen.py
+++ b/backend/fluentai/services/imagine/image_gen.py
@@ -1,51 +1,12 @@
-import functools
-import gc
import os
from pathlib import Path
import torch
from diffusers import AutoPipelineForText2Image, SanaPipeline
-from fluentai.services.card_gen.constants.config import config
-from fluentai.services.card_gen.utils.logger import logger
-
-
-def manage_model_memory(method):
- """
- Decorator to manage model memory by offloading to GPU before the method call.
- """
-
- @functools.wraps(method)
- def wrapper(self, *args, **kwargs):
- # Initialize the pipe if it's not already loaded
- if self.pipe is None:
- self._initialize_pipe()
-
- # Move to GPU if offloading is enabled
- if self.offload:
- logger.debug("Moving the pipeline to GPU (cuda).")
- self.pipe.to("cuda")
-
- try:
- # Execute the decorated method
- result = method(self, *args, **kwargs)
- finally:
- # Delete the pipeline if DELETE_AFTER_USE is True
- if self.config.get("DELETE_AFTER_USE", True):
- logger.debug("Deleting the pipeline to free up memory.")
- del self.pipe
- self.pipe = None
- gc.collect()
- torch.cuda.empty_cache()
-
- # Move the pipeline back to CPU if offloading is enabled
- if self.offload and self.pipe is not None:
- logger.debug("Moving the pipeline back to CPU.")
- self.pipe.to("cpu", silence_dtype_warnings=True)
-
- return result
-
- return wrapper
+from fluentai.constants.config import config
+from fluentai.logger import logger
+from fluentai.utils.model_mem import manage_memory
class ImageGen:
@@ -102,7 +63,11 @@ def _initialize_pipe(self):
cache_dir="models",
)
- @manage_model_memory
+ @manage_memory(
+ targets=["pipe"],
+ delete_attrs=["pipe"],
+ move_kwargs={"silence_dtype_warnings": True},
+ )
def generate_img(
self,
prompt: str = "Imagine a flashy bottle that stands out from the other bottles.",
diff --git a/fluentai/services/card_gen/imagine/verbal_cue.py b/backend/fluentai/services/imagine/verbal_cue_gen.py
similarity index 65%
rename from fluentai/services/card_gen/imagine/verbal_cue.py
rename to backend/fluentai/services/imagine/verbal_cue_gen.py
index 9ed3810..b01358c 100644
--- a/fluentai/services/card_gen/imagine/verbal_cue.py
+++ b/backend/fluentai/services/imagine/verbal_cue_gen.py
@@ -1,53 +1,8 @@
-import functools
-import gc
-
-import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
-from fluentai.services.card_gen.constants.config import config
-from fluentai.services.card_gen.utils.logger import logger
-
-
-def manage_model_memory(method):
- """
- Decorator to manage model memory by offloading to GPU before the method call.
- """
-
- @functools.wraps(method)
- def wrapper(self, *args, **kwargs):
- # Initialize the pipe if it's not already loaded
- if self.pipe is None:
- self._initialize_pipe()
-
- # Move to GPU if offloading is enabled
- if self.offload:
- logger.debug("Moving the model to GPU (cuda).")
- self.model.to("cuda")
-
- try:
- # Execute the decorated method
- result = method(self, *args, **kwargs)
- finally:
- # Delete the pipeline if DELETE_AFTER_USE is True
- if self.config.get("DELETE_AFTER_USE", True):
- logger.debug("Deleting the model to free up memory.")
- del self.model
- del self.pipe
- del self.tokenizer
- self.model = None
- self.pipe = None
- self.tokenizer = None
- gc.collect()
- torch.cuda.empty_cache()
-
- # Move the pipeline back to CPU if offloading is enabled
- if self.offload and self.pipe is not None:
- logger.debug("Moving the model back to CPU.")
- self.model.to("cpu")
-
- return result
-
- return wrapper
+from fluentai.constants.config import config
+from fluentai.logger import logger
+from fluentai.utils.model_mem import manage_memory
class VerbalCue:
@@ -101,7 +56,9 @@ def _initialize_pipe(self):
tokenizer=self.tokenizer,
)
- @manage_model_memory
+ @manage_memory(
+ targets=["model"], delete_attrs=["model", "pipe", "tokenizer"], move_kwargs={}
+ )
def generate_cue(self, word1: str, word2: str) -> str:
"""
Generate a verbal cue that connects two words.
diff --git a/fluentai/services/card_gen/mnemonic/__init__.py b/backend/fluentai/services/mnemonic/__init__.py
similarity index 100%
rename from fluentai/services/card_gen/mnemonic/__init__.py
rename to backend/fluentai/services/mnemonic/__init__.py
diff --git a/fluentai/services/card_gen/constants/__init__.py b/backend/fluentai/services/mnemonic/imageability/__init__.py
similarity index 100%
rename from fluentai/services/card_gen/constants/__init__.py
rename to backend/fluentai/services/mnemonic/imageability/__init__.py
diff --git a/fluentai/services/card_gen/mnemonic/imageability/embeddings.py b/backend/fluentai/services/mnemonic/imageability/embeddings.py
similarity index 96%
rename from fluentai/services/card_gen/mnemonic/imageability/embeddings.py
rename to backend/fluentai/services/mnemonic/imageability/embeddings.py
index 6706556..72bbb90 100644
--- a/fluentai/services/card_gen/mnemonic/imageability/embeddings.py
+++ b/backend/fluentai/services/mnemonic/imageability/embeddings.py
@@ -8,8 +8,8 @@
from sentence_transformers import SentenceTransformer
from tqdm import tqdm
-from fluentai.services.card_gen.constants.config import config
-from fluentai.services.card_gen.utils.logger import logger
+from fluentai.constants.config import config
+from fluentai.logger import logger
EMBEDDING_MODEL = None
@@ -26,7 +26,8 @@ def load_embedding_model(self):
Load the specified embedding model.
"""
if self.model_name == "fasttext":
- from fluentai.services.card_gen.utils.fasttext import fasttext_model
+ logger.info("Loading FastText model for imageability embeddings...")
+ from fluentai.utils.fasttext import fasttext_model
return fasttext_model
diff --git a/fluentai/services/card_gen/mnemonic/imageability/imag_models/data.py b/backend/fluentai/services/mnemonic/imageability/imageability_models/data.py
similarity index 98%
rename from fluentai/services/card_gen/mnemonic/imageability/imag_models/data.py
rename to backend/fluentai/services/mnemonic/imageability/imageability_models/data.py
index 6f009b3..0a263e4 100644
--- a/fluentai/services/card_gen/mnemonic/imageability/imag_models/data.py
+++ b/backend/fluentai/services/mnemonic/imageability/imageability_models/data.py
@@ -8,8 +8,8 @@
from huggingface_hub import HfApi, hf_hub_download
from sklearn.model_selection import train_test_split
-from fluentai.services.card_gen.constants.config import config
-from fluentai.services.card_gen.utils.logger import logger
+from fluentai.constants.config import config
+from fluentai.logger import logger
def upload_model(model_path: str):
diff --git a/fluentai/services/card_gen/mnemonic/imageability/imag_models/ensemble.py b/backend/fluentai/services/mnemonic/imageability/imageability_models/ensemble.py
similarity index 96%
rename from fluentai/services/card_gen/mnemonic/imageability/imag_models/ensemble.py
rename to backend/fluentai/services/mnemonic/imageability/imageability_models/ensemble.py
index 26ebc39..26f5739 100644
--- a/fluentai/services/card_gen/mnemonic/imageability/imag_models/ensemble.py
+++ b/backend/fluentai/services/mnemonic/imageability/imageability_models/ensemble.py
@@ -9,11 +9,11 @@
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error, r2_score
-from fluentai.services.card_gen.constants.config import config
-from fluentai.services.card_gen.mnemonic.imageability.imag_models.data import (
+from fluentai.constants.config import config
+from fluentai.logger import logger
+from fluentai.services.mnemonic.imageability.imageability_models.data import (
append_to_log,
)
-from fluentai.services.card_gen.utils.logger import logger
def implement_ensemble_methods(
diff --git a/fluentai/services/card_gen/mnemonic/imageability/imag_models/models.py b/backend/fluentai/services/mnemonic/imageability/imageability_models/models.py
similarity index 97%
rename from fluentai/services/card_gen/mnemonic/imageability/imag_models/models.py
rename to backend/fluentai/services/mnemonic/imageability/imageability_models/models.py
index b0b291e..0ef61ac 100644
--- a/fluentai/services/card_gen/mnemonic/imageability/imag_models/models.py
+++ b/backend/fluentai/services/mnemonic/imageability/imageability_models/models.py
@@ -18,8 +18,9 @@
from tqdm import tqdm
from xgboost import XGBRegressor
-from fluentai.services.card_gen.constants.config import config
-from fluentai.services.card_gen.mnemonic.imageability.imag_models.data import (
+from fluentai.constants.config import config
+from fluentai.logger import logger
+from fluentai.services.mnemonic.imageability.imageability_models.data import (
append_to_log,
ensure_logs_directory,
load_data,
@@ -28,13 +29,12 @@
split_dataset,
upload_model,
)
-from fluentai.services.card_gen.mnemonic.imageability.imag_models.ensemble import (
+from fluentai.services.mnemonic.imageability.imageability_models.ensemble import (
implement_ensemble_methods,
)
-from fluentai.services.card_gen.mnemonic.imageability.imag_models.optimization import (
+from fluentai.services.mnemonic.imageability.imageability_models.optimization import (
objective,
)
-from fluentai.services.card_gen.utils.logger import logger
def train_and_evaluate_models(X_train, X_test, y_train, y_test, dataset_hash):
diff --git a/fluentai/services/card_gen/mnemonic/imageability/imag_models/optimization.py b/backend/fluentai/services/mnemonic/imageability/imageability_models/optimization.py
similarity index 98%
rename from fluentai/services/card_gen/mnemonic/imageability/imag_models/optimization.py
rename to backend/fluentai/services/mnemonic/imageability/imageability_models/optimization.py
index 477acee..e977e20 100644
--- a/fluentai/services/card_gen/mnemonic/imageability/imag_models/optimization.py
+++ b/backend/fluentai/services/mnemonic/imageability/imageability_models/optimization.py
@@ -15,10 +15,10 @@
from sklearn.svm import SVR
from xgboost import XGBRegressor
-from fluentai.services.card_gen.mnemonic.imageability.imag_models.data import (
+from fluentai.logger import logger
+from fluentai.services.mnemonic.imageability.imageability_models.data import (
append_hyperparameters_log,
)
-from fluentai.services.card_gen.utils.logger import logger
def get_optuna_search_space(trial, model_name):
diff --git a/fluentai/services/card_gen/mnemonic/imageability/make_data/imageabilitycorpus.py b/backend/fluentai/services/mnemonic/imageability/make_data/corpus.py
similarity index 100%
rename from fluentai/services/card_gen/mnemonic/imageability/make_data/imageabilitycorpus.py
rename to backend/fluentai/services/mnemonic/imageability/make_data/corpus.py
diff --git a/fluentai/services/card_gen/mnemonic/imageability/make_data/complete.py b/backend/fluentai/services/mnemonic/imageability/make_data/run.py
similarity index 100%
rename from fluentai/services/card_gen/mnemonic/imageability/make_data/complete.py
rename to backend/fluentai/services/mnemonic/imageability/make_data/run.py
diff --git a/fluentai/services/card_gen/mnemonic/imageability/imageability.py b/backend/fluentai/services/mnemonic/imageability/predictor.py
similarity index 96%
rename from fluentai/services/card_gen/mnemonic/imageability/imageability.py
rename to backend/fluentai/services/mnemonic/imageability/predictor.py
index 3bec490..bbfc60e 100644
--- a/fluentai/services/card_gen/mnemonic/imageability/imageability.py
+++ b/backend/fluentai/services/mnemonic/imageability/predictor.py
@@ -5,8 +5,8 @@
from huggingface_hub import hf_hub_download
from tqdm import tqdm
-from fluentai.services.card_gen.constants.config import config
-from fluentai.services.card_gen.mnemonic.imageability.embeddings import (
+from fluentai.constants.config import config
+from fluentai.services.mnemonic.imageability.embeddings import (
ImageabilityEmbeddings,
)
diff --git a/fluentai/services/card_gen/imagine/__init__.py b/backend/fluentai/services/mnemonic/orthographic/__init__.py
similarity index 100%
rename from fluentai/services/card_gen/imagine/__init__.py
rename to backend/fluentai/services/mnemonic/orthographic/__init__.py
diff --git a/fluentai/services/card_gen/mnemonic/orthographic/orthographic.py b/backend/fluentai/services/mnemonic/orthographic/compute.py
similarity index 100%
rename from fluentai/services/card_gen/mnemonic/orthographic/orthographic.py
rename to backend/fluentai/services/mnemonic/orthographic/compute.py
diff --git a/fluentai/services/card_gen/mnemonic/orthographic/eval.py b/backend/fluentai/services/mnemonic/orthographic/eval.py
similarity index 96%
rename from fluentai/services/card_gen/mnemonic/orthographic/eval.py
rename to backend/fluentai/services/mnemonic/orthographic/eval.py
index 11c74dc..7758872 100644
--- a/fluentai/services/card_gen/mnemonic/orthographic/eval.py
+++ b/backend/fluentai/services/mnemonic/orthographic/eval.py
@@ -1,11 +1,11 @@
import pandas as pd
from datasets import load_dataset
-from orthographic import compute_similarity
from scipy.stats import pearsonr, spearmanr
from sklearn.preprocessing import MinMaxScaler
-from fluentai.services.card_gen.constants.config import config
-from fluentai.services.card_gen.utils.logger import logger
+from fluentai.constants.config import config
+from fluentai.logger import logger
+from fluentai.services.mnemonic.orthographic.compute import compute_similarity
def scale_ratings(ratings: pd.Series) -> pd.Series:
diff --git a/fluentai/services/card_gen/mnemonic/imageability/__init__.py b/backend/fluentai/services/mnemonic/phonetic/__init__.py
similarity index 100%
rename from fluentai/services/card_gen/mnemonic/imageability/__init__.py
rename to backend/fluentai/services/mnemonic/phonetic/__init__.py
diff --git a/fluentai/services/card_gen/mnemonic/phonetic/phonetic.py b/backend/fluentai/services/mnemonic/phonetic/compute.py
similarity index 90%
rename from fluentai/services/card_gen/mnemonic/phonetic/phonetic.py
rename to backend/fluentai/services/mnemonic/phonetic/compute.py
index d12eeb8..2dd5fa2 100644
--- a/fluentai/services/card_gen/mnemonic/phonetic/phonetic.py
+++ b/backend/fluentai/services/mnemonic/phonetic/compute.py
@@ -3,14 +3,14 @@
import pandas as pd
from huggingface_hub import hf_hub_download
-from fluentai.services.card_gen.constants.config import config
-from fluentai.services.card_gen.mnemonic.phonetic.ipa2vec import panphon_vec, soundvec
-from fluentai.services.card_gen.mnemonic.phonetic.utils import (
+from fluentai.constants.config import config
+from fluentai.logger import logger
+from fluentai.services.mnemonic.phonetic.ipa2vec import panphon_vec, soundvec
+from fluentai.services.mnemonic.phonetic.utils.cache import load_from_cache
+from fluentai.services.mnemonic.phonetic.utils.vectors import (
convert_to_matrix,
- load_cache,
pad_vectors,
)
-from fluentai.services.card_gen.utils.logger import logger
def word2ipa(
@@ -124,7 +124,7 @@ def top_phonetic(
ipa = word2ipa(input_word, language_code, g2p_model)
# Attempt to load from cache
- dataset = load_cache(method)
+ dataset = load_from_cache(method)
dataset_vectors_flat = dataset["flattened_vectors"].tolist()
@@ -175,7 +175,7 @@ def top_phonetic(
os.environ["KMP_DUPLICATE_LIB_OK"] = "True"
# Load the G2P model
- from fluentai.services.card_gen.mnemonic.phonetic.g2p import G2P
+ from fluentai.services.mnemonic.phonetic.grapheme2phoneme import Grapheme2Phoneme
- result = top_phonetic(word_input, language_code, top_n, G2P())
+ result = top_phonetic(word_input, language_code, top_n, Grapheme2Phoneme())
print(result)
diff --git a/fluentai/services/card_gen/mnemonic/phonetic/eval.py b/backend/fluentai/services/mnemonic/phonetic/eval.py
similarity index 96%
rename from fluentai/services/card_gen/mnemonic/phonetic/eval.py
rename to backend/fluentai/services/mnemonic/phonetic/eval.py
index d0b6c41..534ab90 100644
--- a/fluentai/services/card_gen/mnemonic/phonetic/eval.py
+++ b/backend/fluentai/services/mnemonic/phonetic/eval.py
@@ -2,23 +2,23 @@
import numpy as np
import pandas as pd
from datasets import load_dataset
-from g2p import G2P
from huggingface_hub import hf_hub_download
from scipy.stats import pearsonr, spearmanr
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler
from tqdm import tqdm
-from fluentai.services.card_gen.constants.config import config
-from fluentai.services.card_gen.mnemonic.phonetic.ipa2vec import panphon_vec, soundvec
-from fluentai.services.card_gen.utils.logger import logger
+from fluentai.constants.config import config
+from fluentai.logger import logger
+from fluentai.services.mnemonic.phonetic.grapheme2phoneme import Grapheme2Phoneme
+from fluentai.services.mnemonic.phonetic.ipa2vec import panphon_vec, soundvec
def word2ipa(
word: str,
ipa_dataset: pd.DataFrame,
use_fallback: bool = True,
- g2p_model: G2P = G2P(),
+ g2p_model: Grapheme2Phoneme = Grapheme2Phoneme(),
) -> str:
"""
Convert a word to its IPA transcription using the dataset and fallback to the g2p model if necessary.
diff --git a/fluentai/services/card_gen/mnemonic/phonetic/g2p.py b/backend/fluentai/services/mnemonic/phonetic/grapheme2phoneme.py
similarity index 93%
rename from fluentai/services/card_gen/mnemonic/phonetic/g2p.py
rename to backend/fluentai/services/mnemonic/phonetic/grapheme2phoneme.py
index 5560bc8..2e03d76 100644
--- a/fluentai/services/card_gen/mnemonic/phonetic/g2p.py
+++ b/backend/fluentai/services/mnemonic/phonetic/grapheme2phoneme.py
@@ -1,10 +1,10 @@
from transformers import AutoTokenizer, T5ForConditionalGeneration
-from fluentai.services.card_gen.constants.config import config
-from fluentai.services.card_gen.utils.logger import logger
+from fluentai.constants.config import config
+from fluentai.logger import logger
-class G2P:
+class Grapheme2Phoneme:
def __init__(self):
# https://github.com/lingjzhu/CharsiuG2P
logger.debug("Loading G2P model")
@@ -49,7 +49,7 @@ def example():
"""
Example usage of the G2P module. It prints the phonetic transcription of the words in Indonesian, English, and Dutch.
"""
- g2p = G2P()
+ g2p = Grapheme2Phoneme()
# https://en.wiktionary.org/wiki/kucing#Indonesian
# IPA(key): /ˈkut͡ʃɪŋ/
diff --git a/fluentai/services/card_gen/mnemonic/phonetic/ipa2vec.py b/backend/fluentai/services/mnemonic/phonetic/ipa2vec.py
similarity index 92%
rename from fluentai/services/card_gen/mnemonic/phonetic/ipa2vec.py
rename to backend/fluentai/services/mnemonic/phonetic/ipa2vec.py
index 29cdb35..ad9b416 100644
--- a/fluentai/services/card_gen/mnemonic/phonetic/ipa2vec.py
+++ b/backend/fluentai/services/mnemonic/phonetic/ipa2vec.py
@@ -5,8 +5,8 @@
from pyclts import CLTS
from soundvectors import SoundVectors
-from fluentai.services.card_gen.mnemonic.phonetic.clts_utils import get_clts
-from fluentai.services.card_gen.utils.logger import logger
+from fluentai.logger import logger
+from fluentai.services.mnemonic.phonetic.utils.clts import get_clts
# Test if data/clts exists
if not os.path.exists("local_data/clts"):
diff --git a/fluentai/services/card_gen/mnemonic/phonetic/make_data/filter_words.py b/backend/fluentai/services/mnemonic/phonetic/make_data/filter_words.py
similarity index 97%
rename from fluentai/services/card_gen/mnemonic/phonetic/make_data/filter_words.py
rename to backend/fluentai/services/mnemonic/phonetic/make_data/filter_words.py
index a0210d5..1ffe645 100644
--- a/fluentai/services/card_gen/mnemonic/phonetic/make_data/filter_words.py
+++ b/backend/fluentai/services/mnemonic/phonetic/make_data/filter_words.py
@@ -6,7 +6,7 @@
from nltk.stem import WordNetLemmatizer
from tqdm import tqdm
-from fluentai.services.card_gen.constants.config import config
+from fluentai.constants.config import config
# Download required NLTK data
nltk.download("words")
diff --git a/backend/fluentai/services/mnemonic/phonetic/utils/cache.py b/backend/fluentai/services/mnemonic/phonetic/utils/cache.py
new file mode 100644
index 0000000..354569c
--- /dev/null
+++ b/backend/fluentai/services/mnemonic/phonetic/utils/cache.py
@@ -0,0 +1,36 @@
+import pandas as pd
+from huggingface_hub import hf_hub_download
+
+from fluentai.constants.config import config
+from fluentai.logger import logger
+
+
+def load_from_cache(method: str = "panphon"):
+ """
+ Load the processed dataset from a cache file.
+
+ Parameters
+ ----------
+ - cache_file: String, path to the cache file
+
+ Returns
+ -------
+ - DataFrame containing the cached dataset
+ """
+ logger.debug("Loading the cached dataset from Huggingface")
+
+ repo = config.get("PHONETIC_SIM").get("EMBEDDINGS").get("REPO")
+ # Remove the file extension to get the dataset name
+ dataset = config.get("PHONETIC_SIM").get("IPA").get("FILE").split(".")[0]
+ file = f"{dataset}_{method}.parquet"
+
+ dataset = pd.read_parquet(
+ hf_hub_download(
+ repo_id=repo,
+ filename=file,
+ cache_dir="datasets",
+ repo_type="dataset",
+ )
+ )
+ logger.info(f"Loaded parsed dataset from '{repo}' and file {file}.")
+ return dataset
diff --git a/fluentai/services/card_gen/mnemonic/phonetic/clts_utils.py b/backend/fluentai/services/mnemonic/phonetic/utils/clts.py
similarity index 97%
rename from fluentai/services/card_gen/mnemonic/phonetic/clts_utils.py
rename to backend/fluentai/services/mnemonic/phonetic/utils/clts.py
index 6b5aedb..e8ae427 100644
--- a/fluentai/services/card_gen/mnemonic/phonetic/clts_utils.py
+++ b/backend/fluentai/services/mnemonic/phonetic/utils/clts.py
@@ -4,7 +4,7 @@
from git import GitCommandError, RemoteProgress, Repo
from tqdm import tqdm
-from fluentai.services.card_gen.utils.logger import logger
+from fluentai.logger import logger
def check_directory_exists(directory_path):
diff --git a/fluentai/services/card_gen/mnemonic/phonetic/utils.py b/backend/fluentai/services/mnemonic/phonetic/utils/vectors.py
similarity index 73%
rename from fluentai/services/card_gen/mnemonic/phonetic/utils.py
rename to backend/fluentai/services/mnemonic/phonetic/utils/vectors.py
index 4f0eb70..b2d8110 100644
--- a/fluentai/services/card_gen/mnemonic/phonetic/utils.py
+++ b/backend/fluentai/services/mnemonic/phonetic/utils/vectors.py
@@ -1,11 +1,8 @@
import ast
import numpy as np
-import pandas as pd
-from huggingface_hub import hf_hub_download
-from fluentai.services.card_gen.constants.config import config
-from fluentai.services.card_gen.utils.logger import logger
+from fluentai.logger import logger
def pad_vectors(vectors):
@@ -65,35 +62,6 @@ def parse_vectors(dataset, vector_column="vectors"):
return dataset
-def load_cache(method: str = "panphon"):
- """
- Load the processed dataset from a cache file.
-
- Parameters
- ----------
- - cache_file: String, path to the cache file
-
- Returns
- -------
- - DataFrame containing the cached dataset
- """
- repo = config.get("PHONETIC_SIM").get("EMBEDDINGS").get("REPO")
- # Remove the file extension to get the dataset name
- dataset = config.get("PHONETIC_SIM").get("IPA").get("FILE").split(".")[0]
- file = f"{dataset}_{method}.parquet"
-
- dataset = pd.read_parquet(
- hf_hub_download(
- repo_id=repo,
- filename=file,
- cache_dir="datasets",
- repo_type="dataset",
- )
- )
- logger.info(f"Loaded parsed dataset from '{repo}' and file {file}.")
- return dataset
-
-
def flatten_vector(vec):
"""
Flatten a nested list of vectors into a single 1D NumPy array.
diff --git a/fluentai/services/card_gen/mnemonic/phonetic/vectorizer.py b/backend/fluentai/services/mnemonic/phonetic/vectorizer.py
similarity index 95%
rename from fluentai/services/card_gen/mnemonic/phonetic/vectorizer.py
rename to backend/fluentai/services/mnemonic/phonetic/vectorizer.py
index 3a48998..e816911 100644
--- a/fluentai/services/card_gen/mnemonic/phonetic/vectorizer.py
+++ b/backend/fluentai/services/mnemonic/phonetic/vectorizer.py
@@ -5,10 +5,10 @@
from huggingface_hub import hf_hub_download
from tqdm import tqdm
-from fluentai.services.card_gen.constants.config import config
-from fluentai.services.card_gen.mnemonic.phonetic.ipa2vec import ft, sv
-from fluentai.services.card_gen.mnemonic.phonetic.utils import flatten_vectors
-from fluentai.services.card_gen.utils.logger import logger
+from fluentai.constants.config import config
+from fluentai.logger import logger
+from fluentai.services.mnemonic.phonetic.ipa2vec import ft, sv
+from fluentai.services.mnemonic.phonetic.utils.vectors import flatten_vectors
def vectorize_word_clts(word, sv):
diff --git a/fluentai/services/card_gen/mnemonic/orthographic/__init__.py b/backend/fluentai/services/mnemonic/semantic/__init__.py
similarity index 100%
rename from fluentai/services/card_gen/mnemonic/orthographic/__init__.py
rename to backend/fluentai/services/mnemonic/semantic/__init__.py
diff --git a/fluentai/services/card_gen/mnemonic/semantic/semantic.py b/backend/fluentai/services/mnemonic/semantic/compute.py
similarity index 94%
rename from fluentai/services/card_gen/mnemonic/semantic/semantic.py
rename to backend/fluentai/services/mnemonic/semantic/compute.py
index 8c4d38b..a672a90 100644
--- a/fluentai/services/card_gen/mnemonic/semantic/semantic.py
+++ b/backend/fluentai/services/mnemonic/semantic/compute.py
@@ -1,8 +1,8 @@
from gensim.models.fasttext import FastTextKeyedVectors
from sentence_transformers import SentenceTransformer
-from fluentai.services.card_gen.constants.config import config
-from fluentai.services.card_gen.utils.logger import logger
+from fluentai.constants.config import config
+from fluentai.logger import logger
class SemanticSimilarity:
@@ -68,7 +68,7 @@ def load_semantic_model(self) -> SentenceTransformer | FastTextKeyedVectors:
_description_
"""
if self.model_name == "fasttext":
- from fluentai.services.card_gen.utils.fasttext import fasttext_model
+ from fluentai.utils.fasttext import fasttext_model
return fasttext_model
diff --git a/fluentai/services/card_gen/mnemonic/semantic/eval.py b/backend/fluentai/services/mnemonic/semantic/eval.py
similarity index 97%
rename from fluentai/services/card_gen/mnemonic/semantic/eval.py
rename to backend/fluentai/services/mnemonic/semantic/eval.py
index 378a9d4..a51b456 100644
--- a/fluentai/services/card_gen/mnemonic/semantic/eval.py
+++ b/backend/fluentai/services/mnemonic/semantic/eval.py
@@ -9,9 +9,9 @@
from scipy.stats import pearsonr, spearmanr
from tqdm import tqdm
-from fluentai.services.card_gen.constants.config import config
-from fluentai.services.card_gen.mnemonic.semantic.semantic import SemanticSimilarity
-from fluentai.services.card_gen.utils.logger import logger
+from fluentai.constants.config import config
+from fluentai.logger import logger
+from fluentai.services.mnemonic.semantic.compute import SemanticSimilarity
def compute_dataset_hash(df: pd.DataFrame) -> str:
diff --git a/fluentai/services/card_gen/mnemonic/phonetic/__init__.py b/backend/fluentai/services/mnemonic/semantic/make_data/__init__.py
similarity index 100%
rename from fluentai/services/card_gen/mnemonic/phonetic/__init__.py
rename to backend/fluentai/services/mnemonic/semantic/make_data/__init__.py
diff --git a/fluentai/services/card_gen/mnemonic/semantic/make_data/data.py b/backend/fluentai/services/mnemonic/semantic/make_data/data.py
similarity index 98%
rename from fluentai/services/card_gen/mnemonic/semantic/make_data/data.py
rename to backend/fluentai/services/mnemonic/semantic/make_data/data.py
index f34e403..4c736f5 100644
--- a/fluentai/services/card_gen/mnemonic/semantic/make_data/data.py
+++ b/backend/fluentai/services/mnemonic/semantic/make_data/data.py
@@ -1,7 +1,7 @@
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
-from fluentai.services.card_gen.utils.logger import logger
+from fluentai.logger import logger
# Function to scale similarity scores to 0-1 range
diff --git a/fluentai/services/card_gen/mnemonic/semantic/translator.py b/backend/fluentai/services/mnemonic/semantic/translator.py
similarity index 97%
rename from fluentai/services/card_gen/mnemonic/semantic/translator.py
rename to backend/fluentai/services/mnemonic/semantic/translator.py
index 480b38f..2dd3690 100644
--- a/fluentai/services/card_gen/mnemonic/semantic/translator.py
+++ b/backend/fluentai/services/mnemonic/semantic/translator.py
@@ -5,8 +5,8 @@
import pandas as pd
from googletrans import Translator
-from fluentai.services.card_gen.utils.lang_codes import map_language_code
-from fluentai.services.card_gen.utils.logger import logger
+from fluentai.logger import logger
+from fluentai.utils.lang_codes import map_language_code
def is_latin_script(word: str) -> bool:
diff --git a/fluentai/services/card_gen/mnemonic/word2mnemonic.py b/backend/fluentai/services/mnemonic/word2mnemonic.py
similarity index 77%
rename from fluentai/services/card_gen/mnemonic/word2mnemonic.py
rename to backend/fluentai/services/mnemonic/word2mnemonic.py
index 00db61f..ab7bc3c 100644
--- a/fluentai/services/card_gen/mnemonic/word2mnemonic.py
+++ b/backend/fluentai/services/mnemonic/word2mnemonic.py
@@ -1,27 +1,29 @@
import asyncio
-from fluentai.services.card_gen.constants.config import config, weights_percentages
-from fluentai.services.card_gen.constants.languages import G2P_LANGUAGES
-from fluentai.services.card_gen.mnemonic.imageability.imageability import (
+from fluentai.constants.config import config, weights_percentages
+from fluentai.constants.languages import G2P_LANGUAGES
+from fluentai.logger import logger
+from fluentai.services.mnemonic.imageability.predictor import (
ImageabilityPredictor,
)
-from fluentai.services.card_gen.mnemonic.orthographic.orthographic import (
+from fluentai.services.mnemonic.orthographic.compute import (
compute_damerau_levenshtein_similarity,
)
-from fluentai.services.card_gen.mnemonic.phonetic.g2p import G2P
-from fluentai.services.card_gen.mnemonic.phonetic.phonetic import top_phonetic, word2ipa
-from fluentai.services.card_gen.mnemonic.semantic.semantic import SemanticSimilarity
-from fluentai.services.card_gen.mnemonic.semantic.translator import translate_word
-from fluentai.services.card_gen.utils.logger import logger
+from fluentai.services.mnemonic.phonetic.compute import top_phonetic, word2ipa
+from fluentai.services.mnemonic.phonetic.grapheme2phoneme import (
+ Grapheme2Phoneme,
+)
+from fluentai.services.mnemonic.semantic.compute import SemanticSimilarity
+from fluentai.services.mnemonic.semantic.translator import translate_word
class Word2Mnemonic:
def __init__(self):
- self.g2p_model = G2P()
+ self.g2p_model = Grapheme2Phoneme()
self.imageability_predictor = ImageabilityPredictor()
self.semantic_sim = SemanticSimilarity()
- def generate_mnemonic(
+ async def generate_mnemonic(
self,
word: str,
language_code: str,
@@ -55,9 +57,7 @@ def generate_mnemonic(
logger.error(f"Invalid language code: {language_code}")
return
- translated_word, transliterated_word = asyncio.run(
- translate_word(word, language_code)
- )
+ translated_word, transliterated_word = await translate_word(word, language_code)
if keyword or key_sentence:
# If keyword is provided, use it directly for scoring
@@ -117,6 +117,6 @@ def generate_mnemonic(
if __name__ == "__main__":
w2m = Word2Mnemonic()
- print(w2m.generate_mnemonic("kat", "dut"))
- print(w2m.generate_mnemonic("house", "eng", keyword="হাউজ"))
- print(w2m.generate_mnemonic("猫", "zho-s"))
+ print(asyncio.run(w2m.generate_mnemonic("kat", "dut")))
+ print(asyncio.run(w2m.generate_mnemonic("house", "eng", keyword="হাউজ")))
+ print(asyncio.run(w2m.generate_mnemonic("猫", "zho-s")))
diff --git a/fluentai/services/card_gen/tts/fallback.py b/backend/fluentai/services/tts/fallback.py
similarity index 98%
rename from fluentai/services/card_gen/tts/fallback.py
rename to backend/fluentai/services/tts/fallback.py
index ebc106a..6315943 100644
--- a/fluentai/services/card_gen/tts/fallback.py
+++ b/backend/fluentai/services/tts/fallback.py
@@ -4,7 +4,7 @@
import scipy
from transformers import VitsModel, VitsTokenizer, pipeline
-from fluentai.services.card_gen.utils.logger import logger
+from fluentai.logger import logger
# Check if the language code is supported
supported_languages = pd.read_parquet("data/tts-languages.parquet")
diff --git a/fluentai/services/card_gen/tts/tts.py b/backend/fluentai/services/tts/tts.py
similarity index 90%
rename from fluentai/services/card_gen/tts/tts.py
rename to backend/fluentai/services/tts/tts.py
index 37e886c..14b7f2b 100644
--- a/fluentai/services/card_gen/tts/tts.py
+++ b/backend/fluentai/services/tts/tts.py
@@ -3,8 +3,8 @@
import gtts
from gtts import gTTS
-from fluentai.services.card_gen.utils.lang_codes import map_language_code
-from fluentai.services.card_gen.utils.logger import logger
+from fluentai.logger import logger
+from fluentai.utils.lang_codes import map_language_code
class TTS:
diff --git a/fluentai/services/card_gen/mnemonic/semantic/__init__.py b/backend/fluentai/utils/__init__.py
similarity index 100%
rename from fluentai/services/card_gen/mnemonic/semantic/__init__.py
rename to backend/fluentai/utils/__init__.py
diff --git a/fluentai/services/card_gen/utils/fasttext.py b/backend/fluentai/utils/fasttext.py
similarity index 99%
rename from fluentai/services/card_gen/utils/fasttext.py
rename to backend/fluentai/utils/fasttext.py
index 7172f2a..0ce9284 100644
--- a/fluentai/services/card_gen/utils/fasttext.py
+++ b/backend/fluentai/utils/fasttext.py
@@ -9,7 +9,7 @@
from gensim.models.fasttext import FastTextKeyedVectors, load_facebook_vectors
from tqdm import tqdm
-from fluentai.services.card_gen.utils.logger import logger
+from fluentai.logger import logger
def download_file(url, dest_path, chunk_size=1024):
diff --git a/fluentai/services/card_gen/utils/lang_codes.py b/backend/fluentai/utils/lang_codes.py
similarity index 96%
rename from fluentai/services/card_gen/utils/lang_codes.py
rename to backend/fluentai/utils/lang_codes.py
index 72441b9..46e1ba0 100644
--- a/fluentai/services/card_gen/utils/lang_codes.py
+++ b/backend/fluentai/utils/lang_codes.py
@@ -1,8 +1,8 @@
import pycountry
+from googletrans import LANGCODES as TRANSLATE_LANGCODES
-from fluentai.services.card_gen.constants.languages import (
+from fluentai.constants.languages import (
G2P_LANGCODES,
- TRANSLATE_LANGCODES,
)
diff --git a/fluentai/services/card_gen/utils/load_models.py b/backend/fluentai/utils/load_models.py
similarity index 83%
rename from fluentai/services/card_gen/utils/load_models.py
rename to backend/fluentai/utils/load_models.py
index 6c9a2d0..c714fd1 100644
--- a/fluentai/services/card_gen/utils/load_models.py
+++ b/backend/fluentai/utils/load_models.py
@@ -3,12 +3,12 @@
import torch
-from fluentai.services.card_gen.constants.config import config
-from fluentai.services.card_gen.imagine.image_gen import ImageGen
-from fluentai.services.card_gen.imagine.verbal_cue import VerbalCue
-from fluentai.services.card_gen.mnemonic.phonetic.g2p import G2P
-from fluentai.services.card_gen.mnemonic.semantic.semantic import SemanticSimilarity
-from fluentai.services.card_gen.utils.logger import logger
+from fluentai.constants.config import config
+from fluentai.logger import logger
+from fluentai.services.imagine.image_gen import ImageGen
+from fluentai.services.imagine.verbal_cue_gen import VerbalCue
+from fluentai.services.mnemonic.phonetic.grapheme2phoneme import Grapheme2Phoneme
+from fluentai.services.mnemonic.semantic.compute import SemanticSimilarity
def get_model_dir_name(model: str) -> str:
@@ -43,7 +43,7 @@ def download_all_models():
g2p_model = config.get("G2P").get("MODEL")
if get_model_dir_name(g2p_model) not in downloaded_models:
logger.info(f"Downloading G2P model: {g2p_model}")
- clean(G2P())
+ clean(Grapheme2Phoneme())
# LLM model
llm_model = config.get("LLM").get("MODEL")
diff --git a/backend/fluentai/utils/model_mem.py b/backend/fluentai/utils/model_mem.py
new file mode 100644
index 0000000..614b519
--- /dev/null
+++ b/backend/fluentai/utils/model_mem.py
@@ -0,0 +1,71 @@
+import functools
+import gc
+
+import torch
+
+from fluentai.logger import logger
+
+
+def manage_memory(targets=None, delete_attrs=None, move_kwargs=None):
+ """
+ Decorator to manage memory by moving specified attributes to GPU before method call and back to CPU after method.
+
+ Args:
+ targets (list[str]): List of attribute names to move to GPU (e.g., ['model', 'pipe']).
+ delete_attrs (list[str]): List of attribute names to delete after method execution.
+ move_kwargs (dict): Additional keyword arguments to pass to the `.to()` method.
+
+ Returns
+ -------
+ function: Decorated method.
+ """
+ if targets is None:
+ targets = []
+ if delete_attrs is None:
+ delete_attrs = []
+ if move_kwargs is None:
+ move_kwargs = {}
+
+ def decorator(method):
+ @functools.wraps(method)
+ def wrapper(self, *args, **kwargs):
+ # Initialize the pipe if it's not already loaded
+ if getattr(self, "pipe", None) is None:
+ self._initialize_pipe()
+
+ # Move specified targets to GPU if offloading is enabled
+ if getattr(self, "offload", False):
+ for target in targets:
+ attr = getattr(self, target, None)
+ if attr is not None:
+ logger.debug(f"Moving {target} to GPU (cuda).")
+ attr.to("cuda", **move_kwargs)
+
+ try:
+ # Execute the decorated method
+ result = method(self, *args, **kwargs)
+ finally:
+ # Delete specified attributes if DELETE_AFTER_USE is True
+ if self.config.get("DELETE_AFTER_USE", True):
+ for attr_name in delete_attrs:
+ attr = getattr(self, attr_name, None)
+ if attr is not None:
+ logger.debug(f"Deleting {attr_name} to free up memory.")
+ delattr(self, attr_name)
+ setattr(self, attr_name, None)
+ gc.collect()
+ torch.cuda.empty_cache()
+
+ # Move specified targets back to CPU if offloading is enabled
+ if getattr(self, "offload", False):
+ for target in targets:
+ attr = getattr(self, target, None)
+ if attr is not None:
+ logger.debug(f"Moving {target} back to CPU.")
+ attr.to("cpu", **move_kwargs)
+
+ return result
+
+ return wrapper
+
+ return decorator
diff --git a/requirements/gpu.txt b/backend/gpu-requirements.txt
similarity index 100%
rename from requirements/gpu.txt
rename to backend/gpu-requirements.txt
diff --git a/pyproject.toml b/backend/pyproject.toml
similarity index 100%
rename from pyproject.toml
rename to backend/pyproject.toml
diff --git a/requirements/requirements.txt b/backend/requirements.txt
similarity index 100%
rename from requirements/requirements.txt
rename to backend/requirements.txt
diff --git a/setup.py b/backend/setup.py
similarity index 77%
rename from setup.py
rename to backend/setup.py
index c53d0a6..41c80f0 100644
--- a/setup.py
+++ b/backend/setup.py
@@ -29,11 +29,7 @@ def parse_requirements(filename: str) -> list[str]:
# Read dependencies from requirements.txt
-requirements = parse_requirements("requirements/requirements.txt")
-
-# Read the long description from README.md
-with open("README.md", encoding="utf-8") as fh:
- long_description = fh.read()
+requirements = parse_requirements("requirements.txt")
setup(
name="fluentai",
@@ -41,8 +37,6 @@ def parse_requirements(filename: str) -> list[str]:
packages=find_packages(),
install_requires=requirements,
description="FluentAI short description",
- long_description=long_description,
- long_description_content_type="text/markdown",
url="https://github.com/StephanAkkerman/",
classifiers=[
"Programming Language :: Python :: 3",
@@ -51,7 +45,7 @@ def parse_requirements(filename: str) -> list[str]:
],
entry_points={
"console_scripts": [
- "fluentai-main=fluentai.main:main", # Adjust as needed
+ "fluentai-main=fluentai.main:main",
],
},
python_requires=">=3.10",
diff --git a/fluentai/services/card_gen/mnemonic/semantic/make_data/__init__.py b/backend/tests/__init__.py
similarity index 100%
rename from fluentai/services/card_gen/mnemonic/semantic/make_data/__init__.py
rename to backend/tests/__init__.py
diff --git a/fluentai/services/card_gen/utils/__init__.py b/backend/tests/test_services/__init__.py
similarity index 100%
rename from fluentai/services/card_gen/utils/__init__.py
rename to backend/tests/test_services/__init__.py
diff --git a/tests/card_gen/test_imageability.py b/backend/tests/test_services/test_imageability.py
similarity index 88%
rename from tests/card_gen/test_imageability.py
rename to backend/tests/test_services/test_imageability.py
index d68c554..4b18a01 100644
--- a/tests/card_gen/test_imageability.py
+++ b/backend/tests/test_services/test_imageability.py
@@ -1,5 +1,3 @@
-# tests/test_card_gen.py
-
import os
from unittest.mock import MagicMock
@@ -10,7 +8,7 @@
os.environ["FLUENTAI_CONFIG_PATH"] = "config.yaml" # noqa
# Import the functions and classes to be tested
-from fluentai.services.card_gen.mnemonic.imageability.predictions import (
+from fluentai.services.mnemonic.imageability.predictor import (
ImageabilityPredictor,
make_predictions,
)
@@ -22,7 +20,7 @@ def mock_hf_hub_download(mocker):
Fixture to mock hf_hub_download function.
"""
return mocker.patch(
- "fluentai.services.card_gen.mnemonic.imageability.predictions.hf_hub_download"
+ "fluentai.services.mnemonic.imageability.predictor.hf_hub_download"
)
@@ -31,9 +29,7 @@ def mock_joblib_load(mocker):
"""
Fixture to mock joblib.load function.
"""
- return mocker.patch(
- "fluentai.services.card_gen.mnemonic.imageability.predictions.joblib.load"
- )
+ return mocker.patch("fluentai.services.mnemonic.imageability.predictor.joblib.load")
@pytest.fixture
@@ -41,9 +37,7 @@ def mock_pd_read_csv(mocker):
"""
Fixture to mock pandas.read_csv function.
"""
- return mocker.patch(
- "fluentai.services.card_gen.mnemonic.imageability.predictions.pd.read_csv"
- )
+ return mocker.patch("fluentai.services.mnemonic.imageability.predictor.pd.read_csv")
@pytest.fixture
@@ -52,7 +46,7 @@ def mock_ImageabilityEmbeddings(mocker):
Fixture to mock ImageabilityEmbeddings class.
"""
mock_class = mocker.patch(
- "fluentai.services.card_gen.mnemonic.imageability.predictions.ImageabilityEmbeddings"
+ "fluentai.services.mnemonic.imageability.predictor.ImageabilityEmbeddings"
)
mock_instance = MagicMock()
mock_instance.get_embedding.side_effect = lambda word: np.array([1.0, 2.0, 3.0])
@@ -72,7 +66,7 @@ def test_make_predictions(
"""
# Mock the configuration
mock_config = mocker.patch(
- "fluentai.services.card_gen.mnemonic.imageability.predictions.config"
+ "fluentai.services.mnemonic.imageability.predictor.config"
)
mock_config.get.side_effect = lambda key: {
"IMAGEABILITY": {
@@ -109,7 +103,7 @@ def test_make_predictions(
# Mock tqdm to just return the iterator
mocker.patch(
- "fluentai.services.card_gen.mnemonic.imageability.predictions.tqdm",
+ "fluentai.services.mnemonic.imageability.predictor.tqdm",
side_effect=lambda x, total=None: x,
)
@@ -142,7 +136,7 @@ def test_ImageabilityPredictor_get_prediction(
"""
# Mock the configuration
mock_config = mocker.patch(
- "fluentai.services.card_gen.mnemonic.imageability.predictions.config"
+ "fluentai.services.mnemonic.imageability.predictor.config"
)
mock_config.get.side_effect = lambda key: {
"IMAGEABILITY": {
@@ -185,7 +179,7 @@ def test_ImageabilityPredictor_get_predictions(
"""
# Mock the configuration
mock_config = mocker.patch(
- "fluentai.services.card_gen.mnemonic.imageability.predictions.config"
+ "fluentai.services.mnemonic.imageability.predictor.config"
)
mock_config.get.side_effect = lambda key: {
"IMAGEABILITY": {
@@ -225,7 +219,7 @@ def test_ImageabilityPredictor_get_column_imageability(
"""
# Mock the configuration
mock_config = mocker.patch(
- "fluentai.services.card_gen.mnemonic.imageability.predictions.config"
+ "fluentai.services.mnemonic.imageability.predictor.config"
)
mock_config.get.side_effect = lambda key: {
"IMAGEABILITY": {
diff --git a/tests/card_gen/test_orthographic.py b/backend/tests/test_services/test_orthographic.py
similarity index 94%
rename from tests/card_gen/test_orthographic.py
rename to backend/tests/test_services/test_orthographic.py
index f137252..ae0885d 100644
--- a/tests/card_gen/test_orthographic.py
+++ b/backend/tests/test_services/test_orthographic.py
@@ -1,12 +1,10 @@
-# test_similarity.py
-
import os
import pytest
os.environ["FLUENTAI_CONFIG_PATH"] = "config.yaml" # noqa
-from fluentai.services.card_gen.mnemonic.orthographic.orthographic import (
+from fluentai.services.mnemonic.orthographic.compute import (
compute_damerau_levenshtein_similarity,
)
diff --git a/tests/card_gen/test_phonetic.py b/backend/tests/test_services/test_phonetic.py
similarity index 86%
rename from tests/card_gen/test_phonetic.py
rename to backend/tests/test_services/test_phonetic.py
index f9bf266..64e293f 100644
--- a/tests/card_gen/test_phonetic.py
+++ b/backend/tests/test_services/test_phonetic.py
@@ -1,5 +1,3 @@
-# tests/card_gen/test_phonetic.py
-
import os
from unittest.mock import MagicMock, patch
@@ -10,7 +8,7 @@
os.environ["FLUENTAI_CONFIG_PATH"] = "config.yaml" # noqa
# Import the top_phonetic function
-from fluentai.services.card_gen.mnemonic.phonetic.phonetic import top_phonetic
+from fluentai.services.mnemonic.phonetic.compute import top_phonetic
@pytest.fixture
@@ -18,7 +16,7 @@ def mock_config(mocker):
"""
Fixture to mock the config.get method.
"""
- return mocker.patch("fluentai.services.card_gen.mnemonic.phonetic.phonetic.config")
+ return mocker.patch("fluentai.services.mnemonic.phonetic.compute.config")
@pytest.fixture
@@ -26,19 +24,15 @@ def mock_word2ipa(mocker):
"""
Fixture to mock the word2ipa function.
"""
- return mocker.patch(
- "fluentai.services.card_gen.mnemonic.phonetic.phonetic.word2ipa"
- )
+ return mocker.patch("fluentai.services.mnemonic.phonetic.compute.word2ipa")
@pytest.fixture
-def mock_load_cache(mocker):
+def mock_load_from_cache(mocker):
"""
- Fixture to mock the load_cache function.
+ Fixture to mock the load_from_cache function.
"""
- return mocker.patch(
- "fluentai.services.card_gen.mnemonic.phonetic.phonetic.load_cache"
- )
+ return mocker.patch("fluentai.services.mnemonic.phonetic.compute.load_from_cache")
@pytest.fixture
@@ -46,9 +40,7 @@ def mock_pad_vectors(mocker):
"""
Fixture to mock the pad_vectors function.
"""
- return mocker.patch(
- "fluentai.services.card_gen.mnemonic.phonetic.phonetic.pad_vectors"
- )
+ return mocker.patch("fluentai.services.mnemonic.phonetic.compute.pad_vectors")
@pytest.fixture
@@ -56,9 +48,7 @@ def mock_convert_to_matrix(mocker):
"""
Fixture to mock the convert_to_matrix function.
"""
- return mocker.patch(
- "fluentai.services.card_gen.mnemonic.phonetic.phonetic.convert_to_matrix"
- )
+ return mocker.patch("fluentai.services.mnemonic.phonetic.compute.convert_to_matrix")
@pytest.fixture
@@ -67,7 +57,7 @@ def mock_faiss_normalize_L2(mocker):
Fixture to mock the faiss.normalize_L2 function.
"""
return mocker.patch(
- "fluentai.services.card_gen.mnemonic.phonetic.phonetic.faiss.normalize_L2"
+ "fluentai.services.mnemonic.phonetic.compute.faiss.normalize_L2"
)
@@ -80,7 +70,7 @@ def mock_faiss_IndexFlatIP(mocker):
"""
instance_mock = MagicMock()
constructor_mock = mocker.patch(
- "fluentai.services.card_gen.mnemonic.phonetic.phonetic.faiss.IndexFlatIP",
+ "fluentai.services.mnemonic.phonetic.compute.faiss.IndexFlatIP",
return_value=instance_mock,
)
return constructor_mock, instance_mock
@@ -89,7 +79,7 @@ def mock_faiss_IndexFlatIP(mocker):
def test_top_phonetic_success(
mock_config,
mock_word2ipa,
- mock_load_cache,
+ mock_load_from_cache,
mock_pad_vectors,
mock_convert_to_matrix,
mock_faiss_normalize_L2,
@@ -124,8 +114,8 @@ def test_top_phonetic_success(
}
)
- # Mock the load_cache function to return the mock dataset
- mock_load_cache.return_value = mock_dataset
+ # Mock the load_from_cache function to return the mock dataset
+ mock_load_from_cache.return_value = mock_dataset
# Mock pad_vectors to return padded vectors (assuming dimension=3 for simplicity)
mock_pad_vectors.return_value = [
@@ -150,7 +140,7 @@ def test_top_phonetic_success(
# Create a mock vectorizer function (panphon_vec or soundvec)
with patch(
- "fluentai.services.card_gen.mnemonic.phonetic.phonetic.panphon_vec",
+ "fluentai.services.mnemonic.phonetic.compute.panphon_vec",
return_value=[[0.1, 0.2, 0.3]],
):
# Initialize a mock g2p_model with a g2p method
@@ -169,8 +159,8 @@ def test_top_phonetic_success(
# Ensure word2ipa was called correctly
mock_word2ipa.assert_called_once_with("kucing", "eng-us", mock_g2p_model)
- # Ensure load_cache was called with the correct method
- mock_load_cache.assert_called_once_with("panphon")
+ # Ensure load_from_cache was called with the correct method
+ mock_load_from_cache.assert_called_once_with("panphon")
# Ensure pad_vectors was called with the correct data
mock_pad_vectors.assert_called_once_with(
@@ -220,7 +210,7 @@ def test_top_phonetic_success(
def test_top_phonetic_no_results(
mock_config,
mock_word2ipa,
- mock_load_cache,
+ mock_load_from_cache,
mock_pad_vectors,
mock_convert_to_matrix,
mock_faiss_normalize_L2,
@@ -247,8 +237,8 @@ def test_top_phonetic_no_results(
{"token_ort": [], "token_ipa": [], "flattened_vectors": []}
)
- # Mock the load_cache function to return the empty dataset
- mock_load_cache.return_value = mock_dataset
+ # Mock the load_from_cache function to return the empty dataset
+ mock_load_from_cache.return_value = mock_dataset
# Mock pad_vectors to return empty list
mock_pad_vectors.return_value = []
@@ -269,7 +259,7 @@ def test_top_phonetic_no_results(
# Create a mock vectorizer function (panphon_vec or soundvec)
with patch(
- "fluentai.services.card_gen.mnemonic.phonetic.phonetic.panphon_vec",
+ "fluentai.services.mnemonic.phonetic.compute.panphon_vec",
return_value=[[]],
):
# Initialize a mock g2p_model with a g2p method
@@ -285,8 +275,8 @@ def test_top_phonetic_no_results(
# Ensure word2ipa was called correctly
mock_word2ipa.assert_called_once_with("test", "eng-us", mock_g2p_model)
- # Ensure load_cache was called with the correct method
- mock_load_cache.assert_called_once_with("panphon")
+ # Ensure load_from_cache was called with the correct method
+ mock_load_from_cache.assert_called_once_with("panphon")
# Ensure pad_vectors was called with the correct data
mock_pad_vectors.assert_called_once_with(
@@ -328,7 +318,7 @@ def test_top_phonetic_no_results(
def test_top_phonetic_invalid_language_code(
mock_config,
mock_word2ipa,
- mock_load_cache,
+ mock_load_from_cache,
mock_pad_vectors,
mock_convert_to_matrix,
mock_faiss_normalize_L2,
@@ -344,7 +334,7 @@ def test_top_phonetic_invalid_language_code(
# Setup mock config.get to return necessary configuration
mock_config.get.return_value = {
# Testing with a different vectorizer
- "EMBEDDINGS": {"METHOD": "soundvec"},
+ "EMBEDDINGS": {"METHOD": "clts"},
"PHONETIC_SIM": {"IPA_REPO": "mock_repo", "IPA_FILE": "mock_file.tsv"},
}
@@ -364,8 +354,8 @@ def test_top_phonetic_invalid_language_code(
}
)
- # Mock the load_cache function to return the mock dataset
- mock_load_cache.return_value = mock_dataset
+ # Mock the load_from_cache function to return the mock dataset
+ mock_load_from_cache.return_value = mock_dataset
# Mock pad_vectors to return padded vectors (assuming dimension=3 for simplicity)
mock_pad_vectors.return_value = [
@@ -390,7 +380,7 @@ def test_top_phonetic_invalid_language_code(
# Create a mock vectorizer function (soundvec)
with patch(
- "fluentai.services.card_gen.mnemonic.phonetic.phonetic.soundvec",
+ "fluentai.services.mnemonic.phonetic.compute.soundvec",
return_value=[[0.2, 0.3, 0.4]],
):
# Initialize a mock g2p_model with a g2p method
@@ -409,8 +399,8 @@ def test_top_phonetic_invalid_language_code(
# Ensure word2ipa was called correctly
mock_word2ipa.assert_called_once_with("nyangang", "mal", mock_g2p_model)
- # Ensure load_cache was called with the correct method
- mock_load_cache.assert_called_once_with("soundvec")
+ # Ensure load_from_cache was called with the correct method
+ mock_load_from_cache.assert_called_once_with("clts")
# Ensure pad_vectors was called with the correct data
mock_pad_vectors.assert_called_once_with(
diff --git a/tests/card_gen/test_semantic.py b/backend/tests/test_services/test_semantic.py
similarity index 87%
rename from tests/card_gen/test_semantic.py
rename to backend/tests/test_services/test_semantic.py
index 9b28ab9..44bf2dd 100644
--- a/tests/card_gen/test_semantic.py
+++ b/backend/tests/test_services/test_semantic.py
@@ -1,5 +1,3 @@
-# tests/card_gen/test_semantic.py
-
import os
from unittest.mock import MagicMock, patch
@@ -7,8 +5,8 @@
os.environ["FLUENTAI_CONFIG_PATH"] = "config.yaml" # noqa
-from fluentai.services.card_gen.constants.config import config
-from fluentai.services.card_gen.mnemonic.semantic.semantic import SemanticSimilarity
+from fluentai.constants.config import config
+from fluentai.services.mnemonic.semantic.compute import SemanticSimilarity
model_name = config.get("SEMANTIC_SIM").get("MODEL").lower()
@@ -18,7 +16,7 @@ def mock_config(mocker):
"""
Fixture to mock the config.get method.
"""
- return mocker.patch("fluentai.services.card_gen.mnemonic.semantic.semantic.config")
+ return mocker.patch("fluentai.services.mnemonic.semantic.compute.config")
@pytest.fixture
@@ -48,7 +46,7 @@ def test_compute_similarity_transformer(mock_config, mock_sentence_transformer):
# Patch 'SentenceTransformer' to return the mock transformer model
with patch(
- "fluentai.services.card_gen.mnemonic.semantic.semantic.SentenceTransformer",
+ "fluentai.services.mnemonic.semantic.compute.SentenceTransformer",
return_value=mock_sentence_transformer,
):
# Initialize SemanticSimilarity
@@ -87,7 +85,7 @@ def test_compute_similarity_word_not_in_transformer(
# Patch 'SentenceTransformer' to return the mock transformer model
with patch(
- "fluentai.services.card_gen.mnemonic.semantic.semantic.SentenceTransformer",
+ "fluentai.services.mnemonic.semantic.compute.SentenceTransformer",
return_value=mock_sentence_transformer,
):
# Initialize SemanticSimilarity
@@ -112,7 +110,7 @@ def test_load_semantic_model_transformer(mock_config, mock_sentence_transformer)
# Patch 'SentenceTransformer' to return the mock transformer model
with patch(
- "fluentai.services.card_gen.mnemonic.semantic.semantic.SentenceTransformer",
+ "fluentai.services.mnemonic.semantic.compute.SentenceTransformer",
return_value=mock_sentence_transformer,
):
# Initialize SemanticSimilarity
@@ -130,7 +128,7 @@ def test_example_function(mocker, mock_config, mock_sentence_transformer, caplog
"""
Test the example function to ensure it logs similarities correctly.
"""
- from fluentai.services.card_gen.mnemonic.semantic.semantic import example
+ from fluentai.services.mnemonic.semantic.compute import example
# Setup mock config to return models
mock_config.get.return_value = {
@@ -140,7 +138,7 @@ def test_example_function(mocker, mock_config, mock_sentence_transformer, caplog
# Patch 'SentenceTransformer' to return the mock transformer model
with patch(
- "fluentai.services.card_gen.mnemonic.semantic.semantic.SentenceTransformer",
+ "fluentai.services.mnemonic.semantic.compute.SentenceTransformer",
return_value=mock_sentence_transformer,
):
# Configure the mock models
diff --git a/config.yaml b/config.yaml
index 6874b7a..880ea80 100644
--- a/config.yaml
+++ b/config.yaml
@@ -17,6 +17,7 @@ WORD_LIMIT: 1000
G2P:
MODEL: "charsiu/g2p_multilingual_byT5_small_100"
TOKENIZER: "google/byt5-small"
+ LANGUAGE_JSON: "data/languages.json"
LLM:
MODEL: "microsoft/Phi-3-mini-4k-instruct"
diff --git a/fluentai/anki/anki.py b/fluentai/anki/anki.py
deleted file mode 100644
index 480fa3a..0000000
--- a/fluentai/anki/anki.py
+++ /dev/null
@@ -1,308 +0,0 @@
-import base64
-import html
-import os
-
-import requests
-
-from fluentai.services.card_gen.constants.config import config
-from fluentai.services.card_gen.utils.logger import logger
-
-
-class AnkiConnect:
- # URL and version for AnkiConnect
- URL = "http://localhost:8765/"
- VERSION = 6
-
- def invoke(self, action: str, params: dict = None):
- """Invoke an AnkiConnect action with optional parameters.
-
- Parameters
- ----------
- action : str
- The action to invoke. See the AnkiConnect API documentation for a list of actions.
- params : dict, optional
- The parameters associated with this action, by default None
-
- Returns
- -------
- dict
- The result of the action.
-
- Raises
- ------
- Exception
- If the response does not contain the expected fields.
- Exception
- If the response contains an error message.
- Exception
- If the response contains an unexpected number of fields.
- """
- payload = {"action": action, "version": self.VERSION}
-
- # Add parameters if they exist
- if params:
- payload["params"] = params
-
- try:
- response = requests.post(self.URL, json=payload).json()
- except requests.exceptions.ConnectionError:
- logger.error(
- """Could not establish connection with Anki.
-This can be caused by two things:
-1. Anki is not running
-2. Anki does not have the Anki-Connect plugin: https://foosoft.net/projects/anki-connect/."""
- )
- return
-
- if len(response) != 2:
- logger.error("Unexpected number of fields in response")
- logger.error(response)
- return
-
- if "error" not in response or "result" not in response:
- logger.error("Response is missing required fields")
- logger.error(response)
- return
-
- if response["error"] is not None:
- if "model was not found" in response["error"]:
- logger.warning(
- f"The {response['error']}. We will create the model now and try again."
- )
- # Create the model if it does not exist
- create_model(params["note"]["modelName"])
-
- # Retry the action
- return self.invoke(action, params)
-
- if "deck was not found" in response["error"]:
- logger.error(
- f"The following {response['error']}. Please ensure a deck with that name exists in Anki."
- )
- else:
- logger.error(response["error"])
- return
-
- return response["result"]
-
- def get_deck_names(self) -> list[str]:
- """Retrieves a list of deck names from Anki.
-
- Returns
- -------
- list[str]
- List of deck names.
- """
- try:
- return self.invoke("deckNames")
- except Exception:
- logger.error("Could not establish connection with Anki")
- logger.error(
- "Please make sure Anki is running and AnkiConnect is installed"
- )
-
- def store_media_file(self, src_file_path: str, word: str) -> str:
- """Stores a media file in Anki's collection.
-
- Parameters
- ----------
- src_file_path : str
- The path to the file to store.
- word : str
- The word to use as the filename in Anki.
-
- Returns
- -------
- str
- Returns the filename used in Anki.
- """
- # Sanitize the word to remove special characters
- sanitized_word = "".join(
- [c for c in word if c.isalnum() or c in (" ", "-")]
- ).rstrip()
-
- # Get the file extension
- ext = os.path.splitext(src_file_path)[1]
- dst = f"{sanitized_word}{ext}"
-
- # Encode the file as base64
- with open(src_file_path, "rb") as f:
- b64_output = base64.b64encode(f.read()).decode("utf-8")
-
- self.invoke("storeMediaFile", {"filename": dst, "data": b64_output})
-
- return dst
-
- @staticmethod
- def format_notes(notes: str) -> str:
- """Formats notes by escaping HTML and converting line breaks.
-
- Parameters
- ----------
- notes : str
- The notes to format.
-
- Returns
- -------
- str
- The formatted notes.
- """
- html_notes = "
".join(html.escape(notes.strip()).split("\n"))
- return f"