the app is replaced with aidb instead of pgvector

bilge-ince · Jul 31, 2024 · ead5a06 · ead5a06
1 parent b1cbbff
commit ead5a06
Show file tree

Hide file tree

Showing 8 changed files with 85 additions and 118 deletions.
diff --git a/README.md b/README.md
@@ -1,18 +1,18 @@
-# pgvector-rag
-An application to demonstrate how can you make a RAG using pgvector and PostgreSQL
+# aidb-rag
+An application to demonstrate how can you make a RAG using EDB's aidb and PostgreSQL
 
 ## Requirements
 - Python3
 - PostgreSQL
-- pgvector
+- aidb
 
 ## Install
 
 Clone the repository
 
 ```
-git clone [email protected]:gulcin/pgvector-rag.git
-cd pgvector-rag
+git clone [email protected]:gulcin/aidb-rag-app.git
+cd aidb-rag-app
 ```
 
 Install Dependencies
@@ -31,10 +31,14 @@ cp .env-example .env
 
 ## Run
 
+First run your `aidb` extension by following the step by step installation guide: https://www.enterprisedb.com/docs/edb-postgres-ai/ai-ml/install-tech-preview/
+
+Make sure your aidb extension is ready to accept connections. Then you can continue as follows:
+
 ```
 python app.py --help
 
-usage: app.py [-h] {create-db,import-data,chat} ...
+usage: app.py [-h] {create-db,import-data,chat} {data_source}
 
 Application Description
 
@@ -49,34 +53,3 @@ Subcommands:
     chat                Use chat feature
 ```
 
-## Run UI 
-
-We use Streamlit for creating a simple Graphical User Interface for our pgvector-rag app. 
-
-To be able to run Streamlit please do the following:
-
-```
-pip install streamlit
-```
-
-**Add keys/secrets to Streamlit secrets**
-
-If you need to store secrets that Streamlit app will use, you can do this by creating
-`.streamlit/secrets.toml` file under Streamlit directory and adding lines like following:
-
-```
-# .streamlit/secrets.toml
-OPENAI_API_KEY = "YOUR_API_KEY"
-```
-**Run Streamlit app for generating UI**
-
-```
-streamlit run chatgptui.py
-```
-You can create as many apps you'd like and place them under Streamlit directory,
-edit the keys if needed and run them like described above. 
-
-
-
-
-
diff --git a/app.py b/app.py
@@ -50,32 +50,40 @@ def main():
 
     args = parser.parse_args()
 
-    if hasattr(args, "func"):
-        if torch.cuda.is_available():
-            device = "cuda"
-            bnb_config = BitsAndBytesConfig(
-                load_in_4bit=True,
-                bnb_4bit_use_double_quant=True,
-                bnb_4bit_quant_type="nf4",
-                bnb_4bit_compute_dtype=torch.bfloat16
+    if args.command==Command.CHAT.value:
+        if hasattr(args, "func"):
+            if torch.cuda.is_available():
+                device = "cuda"
+                bnb_config = BitsAndBytesConfig(
+                    load_in_4bit=True,
+                    bnb_4bit_use_double_quant=True,
+                    bnb_4bit_quant_type="nf4",
+                    bnb_4bit_compute_dtype=torch.bfloat16
+                )
+                dtype = torch.float16
+            elif torch.backends.mps.is_available():
+                device = "mps"
+                bnb_config = None
+                dtype = torch.float16  # MPS supports float16å
+
+            else:
+                device = "cpu"
+                bnb_config = None
+            tokenizer = AutoTokenizer.from_pretrained(
+                os.getenv("TOKENIZER_NAME"),
+                token=os.getenv("HUGGING_FACE_ACCESS_TOKEN"),
             )
-        else:
-            device = "cpu"
-            bnb_config = None
-
-        tokenizer = AutoTokenizer.from_pretrained(
-            os.getenv("TOKENIZER_NAME"),
-            token=os.getenv("HUGGING_FACE_ACCESS_TOKEN"),
-        )
-        model = AutoModelForCausalLM.from_pretrained(
-            os.getenv("MODEL_NAME"),
-            token=os.getenv("HUGGING_FACE_ACCESS_TOKEN"),
-            quantization_config=bnb_config,
-            device_map=device,
-            torch_dtype=torch.float16,
-        )
-
-        args.func(args, model, device, tokenizer)
+            model = AutoModelForCausalLM.from_pretrained(
+                os.getenv("MODEL_NAME"),
+                token=os.getenv("HUGGING_FACE_ACCESS_TOKEN"),
+                quantization_config=bnb_config,
+                device_map=device,
+                torch_dtype=torch.float16,
+            )
+
+            args.func(args, model, device, tokenizer)
+    elif ((args.command==Command.IMPORT_DATA.value) or (args.command==Command.CREATE_DB.value)):
+            args.func(args)
     else:
         print("Invalid command. Use '--help' for assistance.")
 

diff --git a/commands/chat.py b/commands/chat.py
@@ -10,7 +10,7 @@ def chat(args, model, device, tokenizer):
         if question.lower() == "exit":
             break
 
-        answer = rag_query(tokenizer=tokenizer, model=model, device=device, query=question)
+        answer = rag_query(tokenizer=tokenizer, model=model, device=device, query=question, topk=5)
 
         print(f"You Asked: {question}")
         print(f"Answer: {answer}")

diff --git a/commands/create_db.py b/commands/create_db.py
@@ -2,7 +2,7 @@
 import psycopg2
 
 
-def create_db(args, model, device, tokenizer):
+def create_db(args):
     db_config = {
         "user": os.getenv("DB_USER"),
         "password": os.getenv("DB_PASSWORD"),
@@ -32,12 +32,12 @@ def create_db(args, model, device, tokenizer):
     conn.autocommit = True
 
     cursor = conn.cursor()
-    cursor.execute("CREATE EXTENSION IF NOT EXISTS vector;")
+    cursor.execute("CREATE EXTENSION IF NOT EXISTS aidb cascade;")
     cursor.close()
 
     cursor = conn.cursor()
     cursor.execute(
-        "CREATE TABLE IF NOT EXISTS embeddings (id serial PRIMARY KEY, doc_fragment text, embeddings vector(4096));"
+        "CREATE TABLE IF NOT EXISTS documents (id text PRIMARY KEY, doc_fragment text);"
     )
     cursor.close()
 

diff --git a/commands/import_data.py b/commands/import_data.py
@@ -1,30 +1,22 @@
-import numpy as np
-
 from db import get_connection
 from embedding import generate_embeddings, read_pdf_file
 
 
-def import_data(args, model, device, tokenizer):
+def import_data(args):
     data = read_pdf_file(args.data_source)
-
-    embeddings = [
-        generate_embeddings(tokenizer=tokenizer, model=model, device=device, text=line)
-        for line in data
-    ]
-
     conn = get_connection()
     cursor = conn.cursor()
-
     # Store each embedding in the database
-    for i, (doc_fragment, embedding) in enumerate(embeddings):
+    for i, (doc_fragment) in enumerate(data):
         cursor.execute(
-            "INSERT INTO embeddings (id, doc_fragment, embeddings) VALUES (%s, %s, %s)",
-            (i, doc_fragment, embedding[0]),
+            "INSERT INTO documents (id, doc_fragment) VALUES (%s, %s)",
+            (i, doc_fragment),
         )
     conn.commit()
-
+    generate_embeddings()
     print(
         "import-data command executed. Data source: {}".format(
             args.data_source
         )
     )
+
diff --git a/embedding.py b/embedding.py
@@ -1,15 +1,25 @@
 # importing all the required modules
 import PyPDF2
-import torch
-from transformers import pipeline
+from db import get_connection
 
-def generate_embeddings(tokenizer, model, device, text):
-    inputs = tokenizer(
-        text, return_tensors="pt", truncation=True, max_length=512
-    ).to(device)
-    with torch.no_grad():
-        outputs = model(**inputs, output_hidden_states=True)
-    return text, outputs.hidden_states[-1].mean(dim=1).tolist()
+def generate_embeddings():
+    conn = get_connection()
+    cursor = conn.cursor()
+
+    cursor.execute("""
+                    SELECT aidb.create_pg_retriever(
+                        'documents_embeddings',
+                        'public',
+                        'id',
+                        'all-MiniLM-L6-v2',
+                        'text',
+                        'documents',
+                        ARRAY['id', 'doc_fragment'],
+                    FALSE);""")
+    cursor.execute("""
+            SELECT aidb.refresh_retriever('documents_embeddings');""")
+    conn.commit()
+    return None
 
 
 def read_pdf_file(pdf_path):

diff --git a/rag.py b/rag.py
@@ -1,11 +1,4 @@
-from itertools import chain
-import torch
-from pgvector.psycopg2 import register_vector
-
 from db import get_connection
-from embedding import generate_embeddings
-
-from pgvector.psycopg2 import register_vector
 
 template = """<s>[INST]
 You are a friendly documentation search bot.
@@ -20,38 +13,30 @@
 Answer:
 """
 
-def get_retrieval_condition(query_embedding, threshold=0.7):
+def get_retrieval_condition(query_embedding, topk):
     # Convert query embedding to a string format for SQL query
     query_embedding_str = ",".join(map(str, query_embedding))
 
-    # SQL condition for cosine similarity
-    condition = f"(embeddings <=> '{query_embedding_str}') < {threshold} ORDER BY embeddings <=> '{query_embedding_str}'"
-    return condition
-
-
-def rag_query(tokenizer, model, device, query):
-    # Generate query embedding
-    query_embedding = generate_embeddings(
-        tokenizer=tokenizer, model=model, device=device, text=query
-    )[1]
-
-    # Retrieve relevant embeddings from the database
-    retrieval_condition = get_retrieval_condition(query_embedding)
-
+    # # SQL condition for cosine similarity
+    # condition = f"(embeddings <=> '{query_embedding_str}') < {threshold} ORDER BY embeddings <=> '{query_embedding_str}'"
     conn = get_connection()
-    register_vector(conn)
     cursor = conn.cursor()
     cursor.execute(
-        f"SELECT doc_fragment FROM embeddings WHERE {retrieval_condition} LIMIT 5"
-    )
-    retrieved = cursor.fetchall()
+            f"""SELECT data from aidb.retrieve('{query_embedding_str}', {topk}, 'documents_embeddings');"""
+        )
+    results = cursor.fetchall()
+    rag_query = ' '.join([row[0] for row in results])
+    return rag_query
 
-    rag_query = ' '.join([row[0] for row in retrieved])
 
+def rag_query(tokenizer, model, device, query, topk):
+    # Retrieve relevant embeddings from the database
+    rag_query = get_retrieval_condition(query, topk)
     query_template = template.format(context=rag_query, question=query)
 
     input_ids = tokenizer.encode(query_template, return_tensors="pt")
 
     # Generate the response
-    generated_response = model.generate(input_ids.to(device), max_new_tokens=50, pad_token_id=tokenizer.eos_token_id)
+    model.generation_config.pad_token_id = tokenizer.pad_token_id
+    generated_response = model.generate(input_ids.to(device), max_new_tokens=100)
     return tokenizer.decode(generated_response[0][input_ids.shape[-1]:], skip_special_tokens=True)
diff --git a/requirements.txt b/requirements.txt
@@ -3,7 +3,6 @@ psycopg2
 transformers
 torch
 black
-pgvector
 PyPDF2
 bitsandbytes
 accelerate