Skip to content

Commit

Permalink
imagebind_demo
Browse files Browse the repository at this point in the history
  • Loading branch information
raghavdixit99 committed Mar 1, 2024
1 parent c887886 commit bdbca9c
Show file tree
Hide file tree
Showing 5 changed files with 292 additions and 0 deletions.
16 changes: 16 additions & 0 deletions examples/imagebind_demo/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Imagebind demo

A gradio app showcasing multi-modal capabilities of Imagebind supported via lanceDB API

## Usage
you can run it locally by cloning the project as mentioned below, or access via Colab - <a href="https://colab.research.google.com/github/lancedb/vectordb-recipes/blob/main/examples/imagebind_demo/main.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"></a>

```bash
git clone https://github.com/lancedb/vectordb-recipes.git
cd examples/imagebind_demo
```
## Install dependencies and run the app
```bash
pip install -r requirements.txt
python3 app.py
```
146 changes: 146 additions & 0 deletions examples/imagebind_demo/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
import lancedb
import requests
import lancedb.embeddings.imagebind
from lancedb.embeddings import get_registry
from lancedb.pydantic import LanceModel, Vector
import pandas as pd
import os
import gradio as gr
from downloader import dowload_and_save_audio, dowload_and_save_image

model = get_registry().get("imagebind").create()

class TextModel(LanceModel):
text: str
image_uri: str = model.SourceField()
audio_path: str
vector: Vector(model.ndims()) = model.VectorField()

text_list=["A bird", "A dragon", "A car"]
image_paths=dowload_and_save_image()
audio_paths=dowload_and_save_audio()

# Load data
inputs = [
{
"text": a,
"audio_path":b,
"image_uri":c
} for a,
b,
c in zip(text_list,
audio_paths,
image_paths)
]

db = lancedb.connect("/tmp/lancedb")
table = db.create_table("img_bind",schema=TextModel)
table.add(inputs)

def process_image(inp_img) -> str:

actual = (
table.search(inp_img, vector_column_name="vector")
.limit(1)
.to_pydantic(TextModel)[0]
)

return actual.text, actual.audio_path

def process_text(inp_text) -> str:

actual = (
table.search(inp_text, vector_column_name="vector")
.limit(1)
.to_pydantic(TextModel)[0]
)

return actual.image_uri, actual.audio_path

def process_audio(inp_audio) -> str:

actual = (
table.search(inp_audio, vector_column_name="vector")
.limit(1)
.to_pydantic(TextModel)[0]
)

return actual.image_uri, actual.text

css = """
output-audio, output-text {
display: None
}
img {
# width: 500px;
# height: 450px;
margin-left: auto;
margin-right: auto;
object-fit: cover;
"""
with gr.Blocks(css=css) as app:
# Using Markdown for custom CSS (optional)
with gr.Tab("Image to Text and Audio"):
with gr.Row():
with gr.Column():

inp1 = gr.Image(value=image_paths[0],type='filepath',elem_id='img',interactive=False)
output_audio1 = gr.Audio(label="Output Audio", elem_id="output-audio")
output_text1 = gr.Textbox(label="Output Text", elem_id="output-text")
btn_img1 = gr.Button("Retrieve")

# output_audio1 = gr.Audio(label="Output Audio 1", elem_id="output-audio1")
with gr.Column():
inp2 = gr.Image(value=image_paths[1],type='filepath',elem_id='img',interactive=False)
output_audio2 = gr.Audio(label="Output Audio", elem_id="output-audio")
output_text2 = gr.Textbox(label="Output Text", elem_id="output-text")
btn_img2 = gr.Button("Retrieve")

with gr.Column():
inp3 = gr.Image(value=image_paths[2],type='filepath',elem_id='img',interactive=False)
output_audio3 = gr.Audio(label="Output Audio", elem_id="output-audio")
output_text3 = gr.Textbox(label="Output Text", elem_id="output-text")
btn_img3 = gr.Button("Retrieve")

with gr.Tab("Text to Image and Audio"):
with gr.Row():
with gr.Column():
input_txt1 = gr.Textbox(label="Enter a prompt:", elem_id="output-text")
output_audio4 = gr.Audio(label="Output Audio", elem_id="output-audio")
output_img1 = gr.Image(type='filepath',elem_id='img')

with gr.Tab("Audio to Image and Text"):
with gr.Row():
with gr.Column():
inp_audio1 = gr.Audio(value=audio_paths[0],type='filepath',interactive=False)
output_img7 = gr.Image(type='filepath',elem_id='img')
output_text7 = gr.Textbox(label="Output Text", elem_id="output-text")
btn_audio1 = gr.Button("Retrieve")

with gr.Column():
inp_audio2 = gr.Audio(value=audio_paths[1],type='filepath',interactive=False)
output_img8 = gr.Image(type='filepath',elem_id='img')
output_text8 = gr.Textbox(label="Output Text", elem_id="output-text")
btn_audio2 = gr.Button("Retrieve")

with gr.Column():
inp_audio3 = gr.Audio(value=audio_paths[2],type='filepath',interactive=False)
output_img9 = gr.Image(type='filepath',elem_id='img')
output_text9 = gr.Textbox(label="Output Text", elem_id="output-text")
btn_audio3 = gr.Button("Retrieve")

# Click actions for buttons/Textboxes
btn_img1.click(process_image, inputs=[inp1],outputs=[output_text1,output_audio1])
btn_img2.click(process_image, inputs=[inp2],outputs=[output_text2,output_audio2])
btn_img3.click(process_image, inputs=[inp3],outputs=[output_text3,output_audio3])

input_txt1.submit(process_text, inputs=[input_txt1],outputs=[output_img1,output_audio4])

btn_audio1.click(process_audio, inputs=[inp_audio1],outputs=[output_img7,output_text7])
btn_audio2.click(process_audio, inputs=[inp_audio2],outputs=[output_img8,output_text8])
btn_audio3.click(process_audio, inputs=[inp_audio3],outputs=[output_img9,output_text9])

if __name__ == "__main__":
app.launch(share=True,allowed_paths=['./test_inputs/'])

56 changes: 56 additions & 0 deletions examples/imagebind_demo/downloader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import requests
import os
from pathlib import Path

# URL of the raw audio file on GitHub
audio_file_urls = ['https://github.com/raghavdixit99/assets/raw/main/bird_audio.wav',
'https://github.com/raghavdixit99/assets/raw/main/dragon-growl-37570.wav',
'https://github.com/raghavdixit99/assets/raw/main/car_audio.wav'
]
image_urls = ['https://github.com/raghavdixit99/assets/assets/34462078/abf47cc4-d979-4aaa-83be-53a2115bf318',
'https://github.com/raghavdixit99/assets/assets/34462078/93be928e-522b-4e37-889d-d4efd54b2112',
'https://github.com/raghavdixit99/assets/assets/34462078/025deaff-632a-4829-a86c-3de6e326402f']

base_path = os.path.dirname(os.path.abspath(__file__))
# Local path where you want to save the .wav file
def dowload_and_save_audio():
audio_pths=[]
for url in audio_file_urls :
filename=url.split('/')[-1]
local_file_path = Path(f'{base_path}/test_inputs/{filename}')
local_file_path.parent.mkdir(parents=True, exist_ok=True)
# Perform the GET request
response = requests.get(url)

# Check if the request was successful
if response.status_code == 200:
# Write the content of the response to a local file
with open(local_file_path, 'wb') as audio_file:
audio_file.write(response.content)
audio_pths.append(str(local_file_path))
print(f"Audio file downloaded successfully and saved as '{local_file_path}'.")
else:
print(f"Failed to download file. Status code: {response.status_code}")
return audio_pths

def dowload_and_save_image():
image_paths=[]
for url in image_urls :
filename=url.split('/')[-1]
local_file_path = Path(f'{base_path}/test_inputs/{filename}.jpeg')

local_file_path.parent.mkdir(parents=True, exist_ok=True)
# Perform the GET request
response = requests.get(url)

# Check if the request was successful
if response.status_code == 200:
# Write the content of the response to a local file
with open(local_file_path, 'wb') as image_file:
image_file.write(response.content)
image_paths.append(str(local_file_path))
print(f"Image file downloaded successfully and saved as '{local_file_path}'.")
else:
print(f"Failed to download file. Status code: {response.status_code}")

return image_paths
70 changes: 70 additions & 0 deletions examples/imagebind_demo/main.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Imagebind demo\n",
"\n",
"This notebook runs the gradio interface for the demo app"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"! pip install -r requirements.txt"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Traceback (most recent call last):\n",
" File \"/Users/raghavdixit/Desktop/open_source/imagebind_demo/./app.py\", line 1, in <module>\n",
" import lancedb\n",
"ModuleNotFoundError: No module named 'lancedb'\n"
]
}
],
"source": [
"! python3 ./app.py"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
4 changes: 4 additions & 0 deletions examples/imagebind_demo/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
lancedb
gradio
pandas
imagebind@git+https://github.com/raghavdixit99/ImageBind.git

0 comments on commit bdbca9c

Please sign in to comment.