Skip to content

Commit

Permalink
comment out google usage and text splitters
Browse files Browse the repository at this point in the history
  • Loading branch information
ccheng26 committed Apr 17, 2024
1 parent 047c5b6 commit 7b841d6
Showing 1 changed file with 5 additions and 5 deletions.
10 changes: 5 additions & 5 deletions 02-household-queries/optimize_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from bs4 import BeautifulSoup
import dotenv
from langchain.docstore.document import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter, NLTKTextSplitter, SpacyTextSplitter
from langchain_text_splitters import RecursiveCharacterTextSplitter #, NLTKTextSplitter, SpacyTextSplitter
import json
from langchain_community.embeddings import (
SentenceTransformerEmbeddings,
Expand All @@ -11,13 +11,13 @@
import chromadb
from chromadb.config import Settings

from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
# from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings

from llm import ollama_client
from retrieval import create_retriever
from langchain_community.vectorstores import Chroma
# import nltk
import spacy
# import spacy

dotenv.load_dotenv()

Expand Down Expand Up @@ -85,7 +85,6 @@ def add_json_html_data_to_vector_db(vectordb, file_path, content_key, index_key,

def ingest_call(vectordb, chunk_size, chunk_overlap):
# download from https://drive.google.com/drive/folders/1DkAQ03bBVIPoO1d8gcHVnilQ-9VXfhJ8?usp=drive_link
# spacy.cli.download("en_core_web_sm")
guru_file_path = "./guru_cards_for_nava.json"
add_json_html_data_to_vector_db(
vectordb=vectordb,
Expand Down Expand Up @@ -134,7 +133,6 @@ def evaluate_retrieval(vectordb, recall_results):
return recall_results

def run_embedding_func_and_eval_retrieval(embeddings, chunk_size, chunk_overlap):
# nltk.download('punkt')
selected_embedding = embeddings["func"]
persistent_client= chromadb.PersistentClient(
settings=Settings(allow_reset=True), path="./chroma_db"
Expand All @@ -151,6 +149,8 @@ def run_embedding_func_and_eval_retrieval(embeddings, chunk_size, chunk_overlap)
persistent_client.reset()
return recall_results

# nltk.download('punkt')
# spacy.cli.download("en_core_web_sm")
overall_results = []
for embedding in EMBEDDINGS:
print("Embedding: " + embedding)
Expand Down

0 comments on commit 7b841d6

Please sign in to comment.