diff --git a/applications/GTE_mlx_RAG/CLI_example.ipynb b/applications/GTE_mlx_RAG/CLI_example.ipynb deleted file mode 100644 index 3058ac9..0000000 --- a/applications/GTE_mlx_RAG/CLI_example.ipynb +++ /dev/null @@ -1,90 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# CLI example workflow for a simple RAG model\n", - "This cli app uses `lancedb.embeddings.gte` API support for generating the embeddings in MLX format and `mlx_lm` as the LLM for generating the final response.\n", - "#### Overview:\n", - "- ingest_pdf.py - Extracts text from input pdf and stores in vectorDB.\n", - "- query_gte.py - Retrieves context relevent to question from vectorDB and augments prompt to generate RAG response.\n", - "\n", - "\n", - "**Note**: This works only on Apple silicon devices, install necessary packages via : `pip install -r ./GTE_mlx_RAG/requirements.txt`\n", - "\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Fetching 4 files: 100%|████████████████████████| 4/4 [00:00<00:00, 88768.34it/s]\n", - "\u001b[0m\u001b[38;5;8m[\u001b[0m2024-01-30T23:55:24Z \u001b[0m\u001b[33mWARN \u001b[0m lance::dataset\u001b[0m\u001b[38;5;8m]\u001b[0m No existing dataset at /tmp/lancedb/test.lance, it will be created\n", - "ingestion done , move to query!\n" - ] - } - ], - "source": [ - "! python ingest_pdf.py --pdf ./data/mossformer.pdf" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Fetching 4 files: 100%|████████████████████████| 4/4 [00:00<00:00, 44858.87it/s]\n", - "Fetching 6 files: 100%|████████████████████████| 6/6 [00:00<00:00, 16090.68it/s]\n", - "('In this step, researchers combine two parts of a speech separation model '\n", - " 'called MossFormer and a Recurrent module. The MossFormer part helps '\n", - " 'understand the whole input sound sequence, while the Recurrent module '\n", - " 'focuses on the recurring patterns in speech, like how words or sounds follow '\n", - " 'each other. By combining these two parts, the model called MossFormer2 '\n", - " 'performs better than the original MossFormer, especially in separating '\n", - " 'speech sounds from a mixture. This simpler explanation highlights the '\n", - " 'integration of a Recurrent module into the MossFormer framework, which helps '\n", - " 'the model better handle recurrent patterns in speech signals without using '\n", - " 'traditional recurrent connections. This leads to better performance in '\n", - " 'speech separation tasks.')\n" - ] - } - ], - "source": [ - "! python query_gte.py --question \"Explain 2.2.- Hybrid MossFormer and Recurrent Modules in simpler language.\"" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.7" - }, - "orig_nbformat": 4 - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/applications/GTE_mlx_RAG/README.md b/applications/GTE_mlx_RAG/README.md new file mode 100644 index 0000000..9637fa2 --- /dev/null +++ b/applications/GTE_mlx_RAG/README.md @@ -0,0 +1,40 @@ + +## CLI RAG application using GTE MLX (Apple silicon) and Lancedb +This is a Command Line Interface app designed to provide users with quick and accurate responses to their queries based on the input file (pdf) via RAG architecture. + +## Overview +It is built using `lancedb.embeddings.gte` embedding function which uses General Text Embeddings (GTE) model to embed documents, we have added support for Apple silicon devices by adding the MLX format of the model which can be accessed by the 'mlx=True' argument in the function. This app uses `mlx_lm` as the LLM for generating the final response. + +- ingest_pdf.py - Extracts text from input pdf and stores in vectorDB. +- query_gte.py - Retrieves context relevent to question from vectorDB and augments prompt to generate RAG response. + + +## Getting started + +* Install requirements + +Please install lancedb via git instead of PyPI as some latest features might be missing, to get the latest code, run the following in your virtual env : +```bash +pip install -e "git+https://github.com/lancedb/lancedb.git#egg=lancedb&subdirectory=python" + +python3 -m pip install -r requirements.txt +``` + +* Create vectors from a pdf file and store in lancedb + +Store your input data in './data/' folder and run the following to ingest the vectors : +```bash +python3 ingest_pdf.py --pdf ./data/mossformer.pdf +``` + +* Query database and generate response + +Run the following command by adding your query after the '--question' argument to get the response from the llm +```bash +python3 query_gte.py --question "Explain 2.2.- Hybrid MossFormer and Recurrent Modules in simpler language." +``` + + +## Sample response + +![image](../../assets/GTE.png) diff --git a/applications/GTE_mlx_RAG/requirements.txt b/applications/GTE_mlx_RAG/requirements.txt index be2e1ff..5eee571 100644 --- a/applications/GTE_mlx_RAG/requirements.txt +++ b/applications/GTE_mlx_RAG/requirements.txt @@ -1,3 +1,4 @@ unstructured[pdf] mlx -mlx_lm \ No newline at end of file +mlx_lm +ai2-olmo \ No newline at end of file diff --git a/assets/GTE.png b/assets/GTE.png new file mode 100644 index 0000000..b699553 Binary files /dev/null and b/assets/GTE.png differ