From 29c956546c7d688e67c00ca173cdccbd010d3c50 Mon Sep 17 00:00:00 2001 From: Travis Dent Date: Fri, 20 Dec 2024 14:18:34 -0800 Subject: [PATCH] Cleanup vision tool package --- agentstack/tools/vision/__init__.py | 67 ++++++++++++++++++++++++++++- agentstack/tools/vision/vision.py | 66 ---------------------------- 2 files changed, 66 insertions(+), 67 deletions(-) delete mode 100644 agentstack/tools/vision/vision.py diff --git a/agentstack/tools/vision/__init__.py b/agentstack/tools/vision/__init__.py index 8c6726de..d6485598 100644 --- a/agentstack/tools/vision/__init__.py +++ b/agentstack/tools/vision/__init__.py @@ -1,5 +1,70 @@ """Vision tool for analyzing images using OpenAI's Vision API.""" -from .vision import analyze_image +import base64 +from typing import Optional +import requests +from openai import OpenAI __all__ = ["analyze_image"] + + +def analyze_image(image_path_url: str) -> str: + """ + Analyze an image using OpenAI's Vision API. + + Args: + image_path_url: Local path or URL to the image + + Returns: + str: Description of the image contents + """ + client = OpenAI() + + if not image_path_url: + return "Image Path or URL is required." + + if "http" in image_path_url: + return _analyze_web_image(client, image_path_url) + return _analyze_local_image(client, image_path_url) + + +def _analyze_web_image(client: OpenAI, image_path_url: str) -> str: + response = client.chat.completions.create( + model="gpt-4-vision-preview", + messages=[ + { + "role": "user", + "content": [ + {"type": "text", "text": "What's in this image?"}, + {"type": "image_url", "image_url": {"url": image_path_url}}, + ], + } + ], + max_tokens=300, + ) + return response.choices[0].message.content + + +def _analyze_local_image(client: OpenAI, image_path: str) -> str: + base64_image = _encode_image(image_path) + headers = {"Content-Type": "application/json", "Authorization": f"Bearer {client.api_key}"} + payload = { + "model": "gpt-4-vision-preview", + "messages": [ + { + "role": "user", + "content": [ + {"type": "text", "text": "What's in this image?"}, + {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}, + ], + } + ], + "max_tokens": 300, + } + response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload) + return response.json()["choices"][0]["message"]["content"] + + +def _encode_image(image_path: str) -> str: + with open(image_path, "rb") as image_file: + return base64.b64encode(image_file.read()).decode("utf-8") diff --git a/agentstack/tools/vision/vision.py b/agentstack/tools/vision/vision.py deleted file mode 100644 index 2cd84503..00000000 --- a/agentstack/tools/vision/vision.py +++ /dev/null @@ -1,66 +0,0 @@ -import base64 -from typing import Optional -import requests -from openai import OpenAI - - -def analyze_image(image_path_url: str) -> str: - """ - Analyze an image using OpenAI's Vision API. - - Args: - image_path_url: Local path or URL to the image - - Returns: - str: Description of the image contents - """ - client = OpenAI() - - if not image_path_url: - return "Image Path or URL is required." - - if "http" in image_path_url: - return _analyze_web_image(client, image_path_url) - return _analyze_local_image(client, image_path_url) - - -def _analyze_web_image(client: OpenAI, image_path_url: str) -> str: - response = client.chat.completions.create( - model="gpt-4-vision-preview", - messages=[ - { - "role": "user", - "content": [ - {"type": "text", "text": "What's in this image?"}, - {"type": "image_url", "image_url": {"url": image_path_url}}, - ], - } - ], - max_tokens=300, - ) - return response.choices[0].message.content - - -def _analyze_local_image(client: OpenAI, image_path: str) -> str: - base64_image = _encode_image(image_path) - headers = {"Content-Type": "application/json", "Authorization": f"Bearer {client.api_key}"} - payload = { - "model": "gpt-4-vision-preview", - "messages": [ - { - "role": "user", - "content": [ - {"type": "text", "text": "What's in this image?"}, - {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}, - ], - } - ], - "max_tokens": 300, - } - response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload) - return response.json()["choices"][0]["message"]["content"] - - -def _encode_image(image_path: str) -> str: - with open(image_path, "rb") as image_file: - return base64.b64encode(image_file.read()).decode("utf-8")