forked from lancedb/vectordb-recipes
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
c887886
commit bdbca9c
Showing
5 changed files
with
292 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
# Imagebind demo | ||
|
||
A gradio app showcasing multi-modal capabilities of Imagebind supported via lanceDB API | ||
|
||
## Usage | ||
you can run it locally by cloning the project as mentioned below, or access via Colab - <a href="https://colab.research.google.com/github/lancedb/vectordb-recipes/blob/main/examples/imagebind_demo/main.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"></a> | ||
|
||
```bash | ||
git clone https://github.com/lancedb/vectordb-recipes.git | ||
cd examples/imagebind_demo | ||
``` | ||
## Install dependencies and run the app | ||
```bash | ||
pip install -r requirements.txt | ||
python3 app.py | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,146 @@ | ||
import lancedb | ||
import requests | ||
import lancedb.embeddings.imagebind | ||
from lancedb.embeddings import get_registry | ||
from lancedb.pydantic import LanceModel, Vector | ||
import pandas as pd | ||
import os | ||
import gradio as gr | ||
from downloader import dowload_and_save_audio, dowload_and_save_image | ||
|
||
model = get_registry().get("imagebind").create() | ||
|
||
class TextModel(LanceModel): | ||
text: str | ||
image_uri: str = model.SourceField() | ||
audio_path: str | ||
vector: Vector(model.ndims()) = model.VectorField() | ||
|
||
text_list=["A bird", "A dragon", "A car"] | ||
image_paths=dowload_and_save_image() | ||
audio_paths=dowload_and_save_audio() | ||
|
||
# Load data | ||
inputs = [ | ||
{ | ||
"text": a, | ||
"audio_path":b, | ||
"image_uri":c | ||
} for a, | ||
b, | ||
c in zip(text_list, | ||
audio_paths, | ||
image_paths) | ||
] | ||
|
||
db = lancedb.connect("/tmp/lancedb") | ||
table = db.create_table("img_bind",schema=TextModel) | ||
table.add(inputs) | ||
|
||
def process_image(inp_img) -> str: | ||
|
||
actual = ( | ||
table.search(inp_img, vector_column_name="vector") | ||
.limit(1) | ||
.to_pydantic(TextModel)[0] | ||
) | ||
|
||
return actual.text, actual.audio_path | ||
|
||
def process_text(inp_text) -> str: | ||
|
||
actual = ( | ||
table.search(inp_text, vector_column_name="vector") | ||
.limit(1) | ||
.to_pydantic(TextModel)[0] | ||
) | ||
|
||
return actual.image_uri, actual.audio_path | ||
|
||
def process_audio(inp_audio) -> str: | ||
|
||
actual = ( | ||
table.search(inp_audio, vector_column_name="vector") | ||
.limit(1) | ||
.to_pydantic(TextModel)[0] | ||
) | ||
|
||
return actual.image_uri, actual.text | ||
|
||
css = """ | ||
output-audio, output-text { | ||
display: None | ||
} | ||
img { | ||
# width: 500px; | ||
# height: 450px; | ||
margin-left: auto; | ||
margin-right: auto; | ||
object-fit: cover; | ||
""" | ||
with gr.Blocks(css=css) as app: | ||
# Using Markdown for custom CSS (optional) | ||
with gr.Tab("Image to Text and Audio"): | ||
with gr.Row(): | ||
with gr.Column(): | ||
|
||
inp1 = gr.Image(value=image_paths[0],type='filepath',elem_id='img',interactive=False) | ||
output_audio1 = gr.Audio(label="Output Audio", elem_id="output-audio") | ||
output_text1 = gr.Textbox(label="Output Text", elem_id="output-text") | ||
btn_img1 = gr.Button("Retrieve") | ||
|
||
# output_audio1 = gr.Audio(label="Output Audio 1", elem_id="output-audio1") | ||
with gr.Column(): | ||
inp2 = gr.Image(value=image_paths[1],type='filepath',elem_id='img',interactive=False) | ||
output_audio2 = gr.Audio(label="Output Audio", elem_id="output-audio") | ||
output_text2 = gr.Textbox(label="Output Text", elem_id="output-text") | ||
btn_img2 = gr.Button("Retrieve") | ||
|
||
with gr.Column(): | ||
inp3 = gr.Image(value=image_paths[2],type='filepath',elem_id='img',interactive=False) | ||
output_audio3 = gr.Audio(label="Output Audio", elem_id="output-audio") | ||
output_text3 = gr.Textbox(label="Output Text", elem_id="output-text") | ||
btn_img3 = gr.Button("Retrieve") | ||
|
||
with gr.Tab("Text to Image and Audio"): | ||
with gr.Row(): | ||
with gr.Column(): | ||
input_txt1 = gr.Textbox(label="Enter a prompt:", elem_id="output-text") | ||
output_audio4 = gr.Audio(label="Output Audio", elem_id="output-audio") | ||
output_img1 = gr.Image(type='filepath',elem_id='img') | ||
|
||
with gr.Tab("Audio to Image and Text"): | ||
with gr.Row(): | ||
with gr.Column(): | ||
inp_audio1 = gr.Audio(value=audio_paths[0],type='filepath',interactive=False) | ||
output_img7 = gr.Image(type='filepath',elem_id='img') | ||
output_text7 = gr.Textbox(label="Output Text", elem_id="output-text") | ||
btn_audio1 = gr.Button("Retrieve") | ||
|
||
with gr.Column(): | ||
inp_audio2 = gr.Audio(value=audio_paths[1],type='filepath',interactive=False) | ||
output_img8 = gr.Image(type='filepath',elem_id='img') | ||
output_text8 = gr.Textbox(label="Output Text", elem_id="output-text") | ||
btn_audio2 = gr.Button("Retrieve") | ||
|
||
with gr.Column(): | ||
inp_audio3 = gr.Audio(value=audio_paths[2],type='filepath',interactive=False) | ||
output_img9 = gr.Image(type='filepath',elem_id='img') | ||
output_text9 = gr.Textbox(label="Output Text", elem_id="output-text") | ||
btn_audio3 = gr.Button("Retrieve") | ||
|
||
# Click actions for buttons/Textboxes | ||
btn_img1.click(process_image, inputs=[inp1],outputs=[output_text1,output_audio1]) | ||
btn_img2.click(process_image, inputs=[inp2],outputs=[output_text2,output_audio2]) | ||
btn_img3.click(process_image, inputs=[inp3],outputs=[output_text3,output_audio3]) | ||
|
||
input_txt1.submit(process_text, inputs=[input_txt1],outputs=[output_img1,output_audio4]) | ||
|
||
btn_audio1.click(process_audio, inputs=[inp_audio1],outputs=[output_img7,output_text7]) | ||
btn_audio2.click(process_audio, inputs=[inp_audio2],outputs=[output_img8,output_text8]) | ||
btn_audio3.click(process_audio, inputs=[inp_audio3],outputs=[output_img9,output_text9]) | ||
|
||
if __name__ == "__main__": | ||
app.launch(share=True,allowed_paths=['./test_inputs/']) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
import requests | ||
import os | ||
from pathlib import Path | ||
|
||
# URL of the raw audio file on GitHub | ||
audio_file_urls = ['https://github.com/raghavdixit99/assets/raw/main/bird_audio.wav', | ||
'https://github.com/raghavdixit99/assets/raw/main/dragon-growl-37570.wav', | ||
'https://github.com/raghavdixit99/assets/raw/main/car_audio.wav' | ||
] | ||
image_urls = ['https://github.com/raghavdixit99/assets/assets/34462078/abf47cc4-d979-4aaa-83be-53a2115bf318', | ||
'https://github.com/raghavdixit99/assets/assets/34462078/93be928e-522b-4e37-889d-d4efd54b2112', | ||
'https://github.com/raghavdixit99/assets/assets/34462078/025deaff-632a-4829-a86c-3de6e326402f'] | ||
|
||
base_path = os.path.dirname(os.path.abspath(__file__)) | ||
# Local path where you want to save the .wav file | ||
def dowload_and_save_audio(): | ||
audio_pths=[] | ||
for url in audio_file_urls : | ||
filename=url.split('/')[-1] | ||
local_file_path = Path(f'{base_path}/test_inputs/{filename}') | ||
local_file_path.parent.mkdir(parents=True, exist_ok=True) | ||
# Perform the GET request | ||
response = requests.get(url) | ||
|
||
# Check if the request was successful | ||
if response.status_code == 200: | ||
# Write the content of the response to a local file | ||
with open(local_file_path, 'wb') as audio_file: | ||
audio_file.write(response.content) | ||
audio_pths.append(str(local_file_path)) | ||
print(f"Audio file downloaded successfully and saved as '{local_file_path}'.") | ||
else: | ||
print(f"Failed to download file. Status code: {response.status_code}") | ||
return audio_pths | ||
|
||
def dowload_and_save_image(): | ||
image_paths=[] | ||
for url in image_urls : | ||
filename=url.split('/')[-1] | ||
local_file_path = Path(f'{base_path}/test_inputs/{filename}.jpeg') | ||
|
||
local_file_path.parent.mkdir(parents=True, exist_ok=True) | ||
# Perform the GET request | ||
response = requests.get(url) | ||
|
||
# Check if the request was successful | ||
if response.status_code == 200: | ||
# Write the content of the response to a local file | ||
with open(local_file_path, 'wb') as image_file: | ||
image_file.write(response.content) | ||
image_paths.append(str(local_file_path)) | ||
print(f"Image file downloaded successfully and saved as '{local_file_path}'.") | ||
else: | ||
print(f"Failed to download file. Status code: {response.status_code}") | ||
|
||
return image_paths |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Imagebind demo\n", | ||
"\n", | ||
"This notebook runs the gradio interface for the demo app" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"! pip install -r requirements.txt" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Traceback (most recent call last):\n", | ||
" File \"/Users/raghavdixit/Desktop/open_source/imagebind_demo/./app.py\", line 1, in <module>\n", | ||
" import lancedb\n", | ||
"ModuleNotFoundError: No module named 'lancedb'\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"! python3 ./app.py" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.10.13" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
lancedb | ||
gradio | ||
pandas | ||
imagebind@git+https://github.com/raghavdixit99/ImageBind.git |