merge

raghavdixit99 · May 17, 2024 · 6f3b4fd · 6f3b4fd
2 parents 5bbcf98 + bbc96f0
commit 6f3b4fd
Show file tree

Hide file tree

Showing 75 changed files with 13,478 additions and 31 deletions.
diff --git a/.github/workflows/examples-test.yml b/.github/workflows/examples-test.yml
@@ -46,7 +46,7 @@ jobs:
         run: |
           for folder in *; do
             echo "$folder";
-            if [[ $folder == multimodal_clip ]]; then
+            if [[ $folder == multimodal_clip_diffusiondb ]]; then
               continue
             fi
             if [ ! -f "$folder"/test.py ]; then
@@ -59,7 +59,7 @@ jobs:
                 echo "$file";
                 python -m pip install -r "$file";
                 pip uninstall lancedb -y
-                pip install "lancedb @ git+https://github.com/lancedb/lancedb.git#egg=subdir&subdirectory=python"
+                pip install lancedb
               fi
             done
             for file in *; do
@@ -129,6 +129,7 @@ jobs:
       env:
         OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
       run: |
+        npm install @lancedb/vectordb-linux-x64-gnu
         for d in *; do
           if [[ $d == *.js ]]; then
             echo "$d";

diff --git a/README.md b/README.md
diff --git a/applications/Chatbot_with_Parler_TTS/.env-example b/applications/Chatbot_with_Parler_TTS/.env-example
@@ -0,0 +1 @@
+OPENAI_API_KEY = 'sk-youekey'
diff --git a/applications/Chatbot_with_Parler_TTS/README.md b/applications/Chatbot_with_Parler_TTS/README.md
@@ -0,0 +1,39 @@
+# Chat with PDF using LanceDB and Paler TTS
+This application integrates a PDF chat functionality using LanceDB with advanced RAG (Retrieval-Augmented Generation) methods and
+leverages the Paler Text-to-Speech (TTS) model for audio output.
+
+It is designed to enable high-quality text and speech interaction with PDF documents.
+![image](../../assets/chatbot_tts.png)
+
+## Features
+
+Hybrid Search: Combines vector-based and keyword searches to improve result relevance. 
+
+Full-Text Search (FTS): Utilizes Tavity for enhanced text search capabilities within documents.
+
+Colbert Reranker: Improves the accuracy of search results by reranking them based on relevance.
+
+Langchain Prompts: Controls LLM (Large Language Model) outputs using customized prompts for more tailored interactions.
+
+Paler Text-to-Speech (TTS): A lightweight, high-quality TTS model that mimics various speech styles and attributes.
+
+## Installation
+Clone the repository and install the required packages:
+```
+pip install -r requirements.txt
+```
+
+## Running the Application
+Start the application by running the main script. This will launch a Gradio interface accessible via a web browser:
+
+create  ```.env ``` file & pass the openai_api_key. or simply rename the ```.env-example ``` file to ```.env``` 
+
+```
+python3 main.py  # Gradio app will run
+```
+## Outputs
+The application provides two types of outputs from the processed PDF documents:
+
+Text: Extracted and processed text displayed in a user-friendly format.
+Audio: Natural sounding speech generated from the text, customizable by speaker characteristics such as gender and pitch.
+
diff --git a/applications/Chatbot_with_Parler_TTS/constants.py b/applications/Chatbot_with_Parler_TTS/constants.py
@@ -0,0 +1,5 @@
+input_pdf = "https://d18rn0p25nwr6d.cloudfront.net/CIK-0001559720/8a9ebed0-815a-469a-87eb-1767d21d8cec.pdf"
+
+parler_tts_description = """ Utilize a male voice with  an Indian English 
+accent for the chatbot. The speech should be clear, ensuring each word is 
+distinctly articulated in a crisp and confined audio environment. """
diff --git a/applications/Chatbot_with_Parler_TTS/main.py b/applications/Chatbot_with_Parler_TTS/main.py
@@ -0,0 +1,38 @@
+import gradio as gr
+from rag_lance import get_rag_output
+from tts_module import text_to_speech
+
+
+def process_question(question, include_audio):
+    generated_text = get_rag_output(question)
+    if include_audio:
+        audio_file_path = text_to_speech(generated_text)
+        return generated_text, audio_file_path
+    else:
+        return generated_text, None  # Return None for the audio part
+
+
+iface = gr.Interface(
+    fn=process_question,
+    inputs=[
+        gr.Textbox(lines=2, placeholder="Enter a question..."),
+        gr.Checkbox(
+            label="Include audio", value=True
+        ),  # Default to True, can be unchecked by user
+    ],
+    outputs=[
+        gr.Textbox(label="Generated Text"),
+        gr.Audio(label="Generated Audio", type="filepath"),  # No optional keyword
+    ],
+    title="Advance RAG chatbot with TTS support",
+    description="Ask a question and get a text response along with its audio representation. Optionally, include the audio response.",
+    examples=[
+        ["What is net profit of Airbnb ?"],
+        [
+            "What are the specific factors contributing to Airbnb's increased operational expenses in the last fiscal year"
+        ],
+    ],
+)
+
+if __name__ == "__main__":
+    iface.launch(debug=True, share=True)
diff --git a/applications/Chatbot_with_Parler_TTS/prompt.py b/applications/Chatbot_with_Parler_TTS/prompt.py
@@ -0,0 +1,19 @@
+rag_prompt = """
+As an AI Assistant, your role is to provide authentic and accurate responses. Analyze the question and its context thoroughly to determine the most appropriate answer.
+
+**Instructions:**
+- Understand the context and nuances of the question to identify relevant and precise information.
+- if its general greeting then  answer should be  hellow how can i help you,please ask related quetions so i can help
+- If an answer cannot be conclusively determined from the provided information, inform the user rather than making up an answer.
+- When multiple interpretations of a question exist, briefly present these viewpoints, then provide the most plausible answer based on the context.
+- Focus on providing concise and factual responses, excluding irrelevant details.
+- For sensitive or potentially harmful topics, advise users to seek professional advice or consult authoritative sources.
+- Keep your answer clear and within 500 words.
+
+**Context:**
+{context}
+
+**Question:**
+{question}
+
+"""
diff --git a/applications/Chatbot_with_Parler_TTS/rag_lance.py b/applications/Chatbot_with_Parler_TTS/rag_lance.py
@@ -0,0 +1,113 @@
+import os
+import torch
+import lancedb
+from dotenv import load_dotenv
+from constants import input_pdf
+from prompts import rag_prompt
+from langchain_community.vectorstores import LanceDB
+from langchain.prompts import PromptTemplate
+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.runnables import RunnablePassthrough
+from langchain_openai import ChatOpenAI, OpenAIEmbeddings
+from langchain_core.prompts import ChatPromptTemplate
+from langchain.document_loaders import PyPDFLoader
+from langchain_core.messages import HumanMessage, SystemMessage
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from lancedb.embeddings import get_registry
+from lancedb.pydantic import Vector, LanceModel
+from lancedb.rerankers import ColbertReranker
+
+
+load_dotenv()
+
+
+class Document:
+    def __init__(self, page_content, metadata=None):
+        self.page_content = page_content
+        self.metadata = metadata if metadata is not None else {}
+
+    def __repr__(self):
+        return f"Document(page_content='{self.page_content}', metadata={self.metadata})"
+
+
+def get_rag_output(question):
+    input_pdf_file = input_pdf
+
+    # Create your PDF loader
+    loader = PyPDFLoader(input_pdf_file)
+
+    # Load the PDF document
+    documents = loader.load()
+
+    # Chunk the financial report
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=0)
+    docs = text_splitter.split_documents(documents)
+
+    openai = get_registry().get("openai").create()
+
+    class Schema(LanceModel):
+        text: str = openai.SourceField()
+        vector: Vector(openai.ndims()) = openai.VectorField()
+
+    embedding_function = OpenAIEmbeddings()
+
+    db = lancedb.connect("~/langchain")
+    table = db.create_table(
+        "airbnb",
+        schema=Schema,
+        mode="overwrite",
+    )
+
+    # Load the document into LanceDB
+    db = LanceDB.from_documents(docs, embedding_function, connection=table)
+    table.create_fts_index("text", replace=True)
+
+    reranker = ColbertReranker()
+    docs_n = (
+        table.search(question, query_type="hybrid")
+        .limit(5)
+        .rerank(reranker=reranker)
+        .to_pandas()["text"]
+        .to_list()
+    )
+
+    metadata = {"source": input_pdf_file}
+    docs_with_metadata = [
+        Document(page_content=text, metadata=metadata) for text in docs_n
+    ]
+
+    vectorstore = LanceDB.from_documents(
+        documents=docs_with_metadata,
+        embedding=OpenAIEmbeddings(openai_api_key=os.environ["OPENAI_API_KEY"]),
+    )
+
+    retriever = vectorstore.as_retriever()
+
+    rag_prompt_template = rag_prompt
+
+    prompt = PromptTemplate(
+        template=rag_prompt_template,
+        input_variables=[
+            "context",
+            "question",
+        ],
+    )
+
+    def format_docs(docs):
+        return "\n\n".join(doc.page_content for doc in docs)
+
+    llm = ChatOpenAI(
+        model="gpt-3.5-turbo",
+        temperature=0,
+        openai_api_key=os.environ["OPENAI_API_KEY"],
+    )
+
+    rag_chain = (
+        {"context": retriever | format_docs, "question": RunnablePassthrough()}
+        | prompt
+        | llm
+        | StrOutputParser()
+    )
+
+    output = rag_chain.invoke(question)
+    return output
diff --git a/applications/Chatbot_with_Parler_TTS/requirements.txt b/applications/Chatbot_with_Parler_TTS/requirements.txt
@@ -0,0 +1,10 @@
+pip install  langchain 
+langchain-community 
+langchain-openai 
+lancedb 
+bs4 
+tantivy==0.20.1 
+pypdf 
+gradio
+torch
+git+https://github.com/huggingface/parler-tts.git
diff --git a/applications/Chatbot_with_Parler_TTS/tts_module.py b/applications/Chatbot_with_Parler_TTS/tts_module.py
@@ -0,0 +1,23 @@
+import torch
+import soundfile as sf
+from transformers import AutoTokenizer
+from parler_tts import ParlerTTSForConditionalGeneration
+
+
+def text_to_speech(text, filename="output_audio.wav"):
+    device = "cuda:0" if torch.cuda.is_available() else "cpu"
+    model = ParlerTTSForConditionalGeneration.from_pretrained(
+        "parler-tts/parler_tts_mini_v0.1"
+    ).to(device)
+    tokenizer = AutoTokenizer.from_pretrained("parler-tts/parler_tts_mini_v0.1")
+
+    # description = "A clear and articulate Indian English male voice with a medium pitch and neutral accent  with a friendly and engaging tone. The audio quality is high, ensuring that each word is easily understandable without any background noise."
+    description = "Utilize a male voice with a low pitch and an Indian English accent for the chatbot. The speech should be fast yet clear, ensuring each word is distinctly articulated in a crisp and confined audio environment."
+    input_ids = tokenizer(description, return_tensors="pt").input_ids.to(device)
+    prompt_input_ids = tokenizer(text, return_tensors="pt").input_ids.to(device)
+
+    generation = model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids)
+    audio_arr = generation.cpu().numpy().squeeze()
+    sf.write(filename, audio_arr, model.config.sampling_rate)
+
+    return filename
diff --git a/applications/Healthcare_chatbot/README.md b/applications/Healthcare_chatbot/README.md
@@ -0,0 +1,47 @@
+## Overview
+This project introduces a healthcare-related Retrieval-Augmented Generation (RAG) chatbot, designed to deliver quick  responses to medical inquiries. Utilizing  OpenBioLLM-Llama3 / openai llm and the NeuML's PubMedBERT for embedding,
+this chatbot is adept at processing and responding to medical data queries.
+
+![image](../../assets/chatbot_medical.png)
+
+## Key Features
+### Language Model: 
+
+To utilize OpenBioLLM-Llama3 .download model in the local system & pass the path of it 
+link for downloading gguf version model https://huggingface.co/PrunaAI/OpenBioLLM-Llama3-8B-GGUF-smashed
+change this model based on requirements & performance 
+
+### Embeddings: 
+Uses NeuML's PubMedBERT (https://huggingface.co/NeuML/pubmedbert-base-embeddings), which is fine-tuned on PubMed data with the BERT architecture to ensure high relevance and contextual accuracy in responses.
+
+### Database and Framework: 
+Incorporates the LanceDB vector database and Cohere reranker within the LangChain framework to enhance efficient query processing and response generation.
+
+## Installation
+Follow these steps to set up the chatbot on your local machine:
+
+Clone the repository & install 
+
+```pip install -r requirements.txt```
+
+## Start the application:
+```
+uvicorn main:app --reload
+```
+
+
+After launching the server, open the ```index.html ```
+file in any web browser to start interacting with the chatbot.
+
+
+## Usage
+
+Use the chatbot via the provided web interface. Enter your medical-related questions into the chat input box, and receive responses generated from the integrated language models and databases.
+
+
+
+## Note
+Please be advised that while the chatbot provides information based on learned data, it can occasionally deliver incorrect information or miss critical nuances. Always consult with a healthcare professional before making any medical decisions based on advice received from the chatbot.
+
+## Disclaimer
+This chatbot is intended for informational purposes only and should not be used as a substitute for professional medical advice, diagnosis, or treatment. Always seek the advice of your physician or other qualified health provider with any questions you may have regarding a medical condition
diff --git a/applications/Healthcare_chatbot/data/DIABETES.pdf b/applications/Healthcare_chatbot/data/DIABETES.pdf
diff --git a/applications/Healthcare_chatbot/data/HBP_Guide_English_2018.pdf b/applications/Healthcare_chatbot/data/HBP_Guide_English_2018.pdf
diff --git a/applications/Healthcare_chatbot/data/cancer.pdf b/applications/Healthcare_chatbot/data/cancer.pdf
diff --git a/applications/Healthcare_chatbot/data/data.txt b/applications/Healthcare_chatbot/data/data.txt
@@ -0,0 +1 @@
+