-
-
Notifications
You must be signed in to change notification settings - Fork 245
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Kye
committed
Nov 25, 2023
1 parent
399099e
commit 9c3a292
Showing
5 changed files
with
160 additions
and
38 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
from swarms.structs import Flow | ||
from swarms.models.gpt4_vision_api import GPT4VisionAPI | ||
|
||
|
||
llm = GPT4VisionAPI() | ||
|
||
task = "What is the color of the object?" | ||
img = "images/swarms.jpeg" | ||
|
||
## Initialize the workflow | ||
flow = Flow( | ||
llm=llm, | ||
max_loops='auto', | ||
dashboard=True, | ||
) | ||
|
||
flow.run(task=task, img=img) |
33 changes: 0 additions & 33 deletions
33
playground/demos/multi_modal_autonomous_agents/multi_modal_auto_agent.py
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,127 @@ | ||
import base64 | ||
import os | ||
import requests | ||
from dotenv import load_dotenv | ||
|
||
# Load environment variables | ||
load_dotenv() | ||
openai_api_key = os.getenv("OPENAI_API_KEY") | ||
|
||
class GPT4VisionAPI: | ||
""" | ||
GPT-4 Vision API | ||
This class is a wrapper for the OpenAI API. It is used to run the GPT-4 Vision model. | ||
Parameters | ||
---------- | ||
openai_api_key : str | ||
The OpenAI API key. Defaults to the OPENAI_API_KEY environment variable. | ||
Methods | ||
------- | ||
encode_image(img: str) | ||
Encode image to base64. | ||
run(task: str, img: str) | ||
Run the model. | ||
__call__(task: str, img: str) | ||
Run the model. | ||
Examples: | ||
--------- | ||
>>> from swarms.models import GPT4VisionAPI | ||
>>> llm = GPT4VisionAPI() | ||
>>> task = "What is the color of the object?" | ||
>>> img = "https://i.imgur.com/2M2ZGwC.jpeg" | ||
>>> llm.run(task, img) | ||
""" | ||
def __init__( | ||
self, | ||
openai_api_key: str = openai_api_key | ||
): | ||
super().__init__() | ||
self.openai_api_key = openai_api_key | ||
|
||
def encode_image(self, img: str): | ||
"""Encode image to base64.""" | ||
with open(img, "rb") as image_file: | ||
return base64.b64encode(image_file.read()).decode("utf-8") | ||
|
||
# Function to handle vision tasks | ||
def run(self, task: str, img: str): | ||
"""Run the model.""" | ||
try: | ||
base64_image = self.encode_image(img) | ||
headers = { | ||
"Content-Type": "application/json", | ||
"Authorization": f"Bearer {openai_api_key}", | ||
} | ||
payload = { | ||
"model": "gpt-4-vision-preview", | ||
"messages": [ | ||
{ | ||
"role": "user", | ||
"content": [ | ||
{"type": "text", "text": task}, | ||
{ | ||
"type": "image_url", | ||
"image_url": { | ||
"url": f"data:image/jpeg;base64,{base64_image}" | ||
}, | ||
}, | ||
], | ||
} | ||
], | ||
"max_tokens": 300, | ||
} | ||
response = requests.post( | ||
"https://api.openai.com/v1/chat/completions", | ||
headers=headers, | ||
json=payload, | ||
) | ||
|
||
out = response.json() | ||
|
||
out = out["choices"][0]["text"] | ||
except Exception as error: | ||
print(f"Error with the request: {error}") | ||
raise error | ||
# Function to handle vision tasks | ||
|
||
def __call__(self, task: str, img: str): | ||
"""Run the model.""" | ||
try: | ||
base64_image = self.encode_image(img) | ||
headers = { | ||
"Content-Type": "application/json", | ||
"Authorization": f"Bearer {openai_api_key}", | ||
} | ||
payload = { | ||
"model": "gpt-4-vision-preview", | ||
"messages": [ | ||
{ | ||
"role": "user", | ||
"content": [ | ||
{"type": "text", "text": task}, | ||
{ | ||
"type": "image_url", | ||
"image_url": { | ||
"url": f"data:image/jpeg;base64,{base64_image}" | ||
}, | ||
}, | ||
], | ||
} | ||
], | ||
"max_tokens": 300, | ||
} | ||
response = requests.post( | ||
"https://api.openai.com/v1/chat/completions", | ||
headers=headers, | ||
json=payload, | ||
) | ||
return response.json() | ||
except Exception as error: | ||
print(f"Error with the request: {error}") | ||
raise error |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters