diff --git a/llms/providers/openai.py b/llms/providers/openai.py index 74be2d3..3ba3125 100644 --- a/llms/providers/openai.py +++ b/llms/providers/openai.py @@ -1,9 +1,7 @@ from typing import AsyncGenerator, Dict, Generator, List, Optional, Union - -import aiohttp import tiktoken -import openai +from openai import AsyncOpenAI, OpenAI import json from ..results.result import AsyncStreamResult, Result, StreamResult @@ -13,19 +11,18 @@ class OpenAIProvider(BaseProvider): # cost is per million tokens MODEL_INFO = { - "gpt-3.5-turbo": {"prompt": 2.0, "completion": 2.0, "token_limit": 4097, "is_chat": True}, "gpt-3.5-turbo-instruct": {"prompt": 2.0, "completion": 2.0, "token_limit": 4097, "is_chat": False}, "gpt-4": {"prompt": 30.0, "completion": 60.0, "token_limit": 8192, "is_chat": True}, "gpt-4-1106-preview": {"prompt": 10.0, "completion": 20.0, "token_limit": 128000, "is_chat": True}, } - def __init__(self, api_key, model=None): - openai.api_key = api_key + def __init__(self, api_key=None, model=None): if model is None: model = list(self.MODEL_INFO.keys())[0] self.model = model - self.client = openai.ChatCompletion if self.is_chat_model else openai.Completion + self.client = OpenAI(api_key=api_key) + self.async_client = AsyncOpenAI(api_key=api_key) @property def is_chat_model(self) -> bool: @@ -58,7 +55,7 @@ def _prepare_model_inputs( self, prompt: str, history: Optional[List[dict]] = None, - system_message: str = None, + system_message: Union[str, List[dict], None] = None, temperature: float = 0, max_tokens: int = 300, stream: bool = False, @@ -70,9 +67,13 @@ def _prepare_model_inputs( if history: messages = [*history, *messages] - if system_message: + if isinstance(system_message, str): messages = [{"role": "system", "content": system_message}, *messages] + # users can input multiple full system message in dict form + elif isinstance(system_message, list): + messages = [*system_message, *messages] + model_inputs = { "messages": messages, "temperature": temperature, @@ -115,7 +116,7 @@ def complete( system_message: system messages in OpenAI format, must have role and content key. It can has name key to include few-shots examples. """ - + model_inputs = self._prepare_model_inputs( prompt=prompt, history=history, @@ -124,14 +125,19 @@ def complete( max_tokens=max_tokens, **kwargs, ) - + with self.track_latency(): - response = self.client.create(model=self.model, **model_inputs) - + if self.is_chat_model: + response = self.client.chat.completions.create(model=self.model, **model_inputs) + else: + response = self.client.completions.create(model=self.model, **model_inputs) + is_func_call = response.choices[0].finish_reason == "function_call" + function_call = {} + completion = "" if self.is_chat_model: if is_func_call: - completion = { + function_call = { "name": response.choices[0].message.function_call.name, "arguments": json.loads(response.choices[0].message.function_call.arguments) } @@ -139,20 +145,20 @@ def complete( completion = response.choices[0].message.content.strip() else: completion = response.choices[0].text.strip() - + usage = response.usage - + meta = { - "tokens_prompt": usage["prompt_tokens"], - "tokens_completion": usage["completion_tokens"], + "tokens_prompt": usage.prompt_tokens, + "tokens_completion": usage.completion_tokens, "latency": self.latency, } return Result( - text=completion if not is_func_call else '', + text=completion, model_inputs=model_inputs, provider=self, meta=meta, - function_call=completion if is_func_call else {} + function_call=function_call, ) async def acomplete( @@ -162,7 +168,6 @@ async def acomplete( system_message: Optional[List[dict]] = None, temperature: float = 0, max_tokens: int = 300, - aiosession: Optional[aiohttp.ClientSession] = None, **kwargs, ) -> Result: """ @@ -171,9 +176,6 @@ async def acomplete( system_message: system messages in OpenAI format, must have role and content key. It can has name key to include few-shots examples. """ - if aiosession is not None: - openai.aiosession.set(aiosession) - model_inputs = self._prepare_model_inputs( prompt=prompt, history=history, @@ -184,7 +186,10 @@ async def acomplete( ) with self.track_latency(): - response = await self.client.acreate(model=self.model, **model_inputs) + if self.is_chat_model: + response = await self.async_client.chat.completions.create(model=self.model, **model_inputs) + else: + response = await self.async_client.completions.create(model=self.model, **model_inputs) if self.is_chat_model: completion = response.choices[0].message.content.strip() @@ -194,8 +199,8 @@ async def acomplete( usage = response.usage meta = { - "tokens_prompt": usage["prompt_tokens"], - "tokens_completion": usage["completion_tokens"], + "tokens_prompt": usage.prompt_tokens, + "tokens_completion": usage.completion_tokens, "latency": self.latency, } return Result( @@ -208,8 +213,8 @@ async def acomplete( def complete_stream( self, prompt: str, - history: Optional[List[tuple]] = None, - system_message: str = None, + history: Optional[List[dict]] = None, + system_message: Union[str, List[dict], None] = None, temperature: float = 0, max_tokens: int = 300, **kwargs, @@ -229,7 +234,11 @@ def complete_stream( stream=True, **kwargs, ) - response = self.client.create(model=self.model, **model_inputs) + + if self.is_chat_model: + response = self.client.chat.completions.create(model=self.model, **model_inputs) + else: + response = self.client.completions.create(model=self.model, **model_inputs) stream = self._process_stream(response) return StreamResult(stream=stream, model_inputs=model_inputs, provider=self) @@ -237,27 +246,27 @@ def complete_stream( def _process_stream(self, response: Generator) -> Generator: if self.is_chat_model: chunk_generator = ( - chunk["choices"][0].get("delta", {}).get("content", "") - for chunk in response + chunk.choices[0].delta.content for chunk in response ) else: chunk_generator = ( - chunk["choices"][0].get("text", "") for chunk in response + chunk.choices[0].text for chunk in response ) while not (first_text := next(chunk_generator)): continue yield first_text.lstrip() - yield from chunk_generator + for chunk in chunk_generator: + if chunk is not None: + yield chunk async def acomplete_stream( self, prompt: str, - history: Optional[List[tuple]] = None, - system_message: str = None, + history: Optional[List[dict]] = None, + system_message: Union[str, List[dict], None] = None, temperature: float = 0, max_tokens: int = 300, - aiosession: Optional[aiohttp.ClientSession] = None, **kwargs, ) -> AsyncStreamResult: """ @@ -266,9 +275,6 @@ async def acomplete_stream( system_message: system messages in OpenAI format, must have role and content key. It can has name key to include few-shots examples. """ - if aiosession is not None: - openai.aiosession.set(aiosession) - model_inputs = self._prepare_model_inputs( prompt=prompt, history=history, @@ -279,7 +285,11 @@ async def acomplete_stream( **kwargs, ) - response = await self.client.acreate(model=self.model, **model_inputs) + with self.track_latency(): + if self.is_chat_model: + response = await self.async_client.chat.completions.create(model=self.model, **model_inputs) + else: + response = await self.async_client.completions.create(model=self.model, **model_inputs) stream = self._aprocess_stream(response) return AsyncStreamResult( stream=stream, model_inputs=model_inputs, provider=self @@ -288,21 +298,23 @@ async def acomplete_stream( async def _aprocess_stream(self, response: AsyncGenerator) -> AsyncGenerator: if self.is_chat_model: while True: - first_completion = (await response.__anext__())["choices"][0].get("delta", {}).get("content", "") + first_completion = (await response.__anext__()).choices[0].delta.content if first_completion: yield first_completion.lstrip() break - + async for chunk in response: - completion = chunk["choices"][0].get("delta", {}).get("content", "") - yield completion + completion = chunk.choices[0].delta.content + if completion is not None: + yield completion else: while True: - first_completion = (await response.__anext__())["choices"][0].get("text", "") + first_completion = (await response.__anext__()).choices[0].text if first_completion: yield first_completion.lstrip() break - + async for chunk in response: - completion = chunk["choices"][0].get("text", "") - yield completion + completion = chunk.choices[0].text + if completion is not None: + yield completion diff --git a/requirements.txt b/requirements.txt index 47d5d8f..4a5d210 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ openai tiktoken -anthropic==0.3 +anthropic>=0.3 anthropic_bedrock ai21 cohere diff --git a/setup.py b/setup.py index 6da5f7f..cda30a5 100644 --- a/setup.py +++ b/setup.py @@ -9,7 +9,7 @@ setup( name="pyllms", - version="0.3.8.1", + version="0.3.9", description="Minimal Python library to connect to LLMs (OpenAI, Anthropic, Google Palm2/Vertex, AI21, Cohere, Aleph-Alpha, HuggingfaceHub), with a built-in model performance benchmark.", long_description=long_description, long_description_content_type="text/markdown", @@ -17,7 +17,7 @@ author_email="vlad@kagi.com", packages=find_packages(), install_requires=[ - "openai", + "openai>=1", "tiktoken", "anthropic>=0.3", "anthropic_bedrock", @@ -28,7 +28,8 @@ "google-cloud-aiplatform", "prettytable", "protobuf~=3.20.3", - "grpcio~=1.54.2" + "grpcio~=1.54.2", + "google-generativeai", ], extras_require={ "local": ["einops", "accelerate"]